0
# Expression Analysis and Debugging
1
2
Tools for analyzing, validating, and debugging expressions including disassembly of compiled expressions. These utilities help developers understand expression behavior, optimize performance, and troubleshoot issues during development.
3
4
## Capabilities
5
6
### Expression Validation
7
8
Validate expressions without executing them, providing type and shape analysis for debugging and development workflows.
9
10
```python { .api }
11
def validate(ex, local_dict=None, global_dict=None, out=None, order='K', casting='safe', **kwargs):
12
"""
13
Validate a mathematical expression without evaluating it.
14
15
Validates an expression by performing parsing, type checking, and
16
compatibility analysis without executing the computation. Returns None
17
on successful validation or an Exception object if validation fails.
18
19
Parameters:
20
- ex (str): Mathematical expression string to validate
21
- local_dict (dict, optional): Local variable bindings for type checking
22
- global_dict (dict, optional): Global variable bindings
23
- out (ndarray, optional): Output array for compatibility checking
24
- order (str): Memory layout order ('K', 'A', 'C', 'F')
25
- casting (str): Casting safety level ('no', 'equiv', 'safe', 'same_kind', 'unsafe')
26
- **kwargs: Additional variables for validation
27
28
Returns:
29
None or Exception: None if expression is valid, Exception object if invalid
30
31
Note:
32
After successful validation, you can proceed directly to re_evaluate()
33
"""
34
```
35
36
**Usage Examples:**
37
38
```python
39
import numpy as np
40
import numexpr as ne
41
42
# Validate expression before expensive computation
43
a = np.random.random((1000, 1000))
44
b = np.random.random((1000, 1000))
45
46
validation_result = ne.validate("a * sin(b) + sqrt(a**2 + b**2)",
47
local_dict={'a': a, 'b': b})
48
if validation_result is None:
49
print("Expression is valid")
50
# Now safe to evaluate
51
result = ne.evaluate("a * sin(b) + sqrt(a**2 + b**2)", local_dict={'a': a, 'b': b})
52
else:
53
print(f"Expression validation failed: {validation_result}")
54
55
# Validate output array compatibility
56
output = np.empty((1000, 1000), dtype=np.float32)
57
validation_result = ne.validate("a + b", local_dict={'a': a, 'b': b}, out=output, casting='safe')
58
if validation_result is None:
59
print("Output array compatible")
60
else:
61
print(f"Output array incompatible: {validation_result}")
62
```
63
64
### Expression Disassembly
65
66
Examine the internal representation of compiled expressions to understand optimization and execution paths.
67
68
```python { .api }
69
def disassemble(nex):
70
"""
71
Disassemble a NumExpr object to show internal opcodes.
72
73
Provides a human-readable representation of the compiled expression's
74
internal virtual machine opcodes, useful for understanding optimization
75
decisions and debugging performance issues.
76
77
Parameters:
78
- nex (NumExpr): Compiled expression object to disassemble
79
80
Returns:
81
str: Human-readable disassembly showing opcodes, registers, and operations
82
83
Raises:
84
TypeError: If input is not a compiled NumExpr object
85
"""
86
```
87
88
**Usage Examples:**
89
90
```python
91
# Create and disassemble a compiled expression
92
expr = ne.NumExpr("a * b + sin(c) * exp(d)")
93
disassembly = ne.disassemble(expr)
94
print("Expression disassembly:")
95
print(disassembly)
96
97
# Compare simple vs complex expressions
98
simple_expr = ne.NumExpr("a + b")
99
complex_expr = ne.NumExpr("sin(a) * cos(b) + exp(c/10) * sqrt(d)")
100
101
print("Simple expression:")
102
print(ne.disassemble(simple_expr))
103
print("\nComplex expression:")
104
print(ne.disassemble(complex_expr))
105
106
# Analyze optimization decisions
107
memory_intensive = ne.NumExpr("a * b * c * d * e") # Many temporaries
108
print("\nMemory-intensive expression:")
109
print(ne.disassemble(memory_intensive))
110
```
111
112
113
## Advanced Analysis Techniques
114
115
### Performance Profiling
116
117
```python
118
import time
119
import numpy as np
120
import numexpr as ne
121
122
def profile_expression(expression, variables, iterations=100):
123
"""Profile an expression's performance characteristics."""
124
125
# Validate first
126
try:
127
result_info = ne.validate(expression, local_dict=variables)
128
result_type, result_shape, uses_vml = result_info
129
print(f"Expression valid: {result_type} {result_shape}, VML: {uses_vml}")
130
except Exception as e:
131
print(f"Validation failed: {e}")
132
return None
133
134
# Create compiled version
135
compiled_expr = ne.NumExpr(expression)
136
print("Disassembly:")
137
print(ne.disassemble(compiled_expr))
138
139
# Time evaluation methods
140
methods = [
141
("evaluate()", lambda: ne.evaluate(expression, local_dict=variables)),
142
("compiled.run()", lambda: compiled_expr.run(**variables)),
143
("re_evaluate()", lambda: ne.re_evaluate(local_dict=variables))
144
]
145
146
results = {}
147
for method_name, method_func in methods:
148
# Prepare for re_evaluate
149
if method_name == "re_evaluate()":
150
ne.evaluate(expression, local_dict=variables) # Prime the cache
151
152
# Time the method
153
start = time.time()
154
for _ in range(iterations):
155
result = method_func()
156
elapsed = time.time() - start
157
158
results[method_name] = elapsed / iterations
159
print(f"{method_name}: {elapsed/iterations:.6f}s per call")
160
161
return results
162
163
# Example usage
164
data = {
165
'a': np.random.random(100000),
166
'b': np.random.random(100000),
167
'c': np.random.random(100000)
168
}
169
170
profile_expression("a * sin(b) + exp(c/10)", data)
171
```
172
173
### Type and Shape Analysis
174
175
```python
176
def analyze_expression_compatibility(expressions, data_sets):
177
"""Analyze multiple expressions against multiple data sets."""
178
179
for expr_name, expression in expressions.items():
180
print(f"\nAnalyzing: {expr_name}")
181
print(f"Expression: {expression}")
182
183
# Note: Variable dependency analysis requires inspection of the expression
184
# This would typically be done through expression validation
185
186
for data_name, data_dict in data_sets.items():
187
print(f"\n Testing with {data_name}:")
188
189
# Check variable availability through validation
190
available = set(data_dict.keys())
191
print(f" Available variables: {sorted(available)}")
192
193
# Validate compatibility
194
try:
195
result_info = ne.validate(expression, local_dict=data_dict)
196
result_type, result_shape, uses_vml = result_info
197
print(f" Valid: {result_type} {result_shape}, VML: {uses_vml}")
198
except Exception as e:
199
print(f" Invalid: {e}")
200
201
# Example usage
202
expressions = {
203
"arithmetic": "a + b * c",
204
"trigonometric": "sin(a) + cos(b)",
205
"mixed": "a * sin(b) + sqrt(c**2 + d**2)"
206
}
207
208
data_sets = {
209
"2D arrays": {
210
'a': np.random.random((100, 50)),
211
'b': np.random.random((100, 50)),
212
'c': np.random.random((100, 50)),
213
'd': np.random.random((100, 50))
214
},
215
"1D arrays": {
216
'a': np.random.random(1000),
217
'b': np.random.random(1000),
218
'c': np.random.random(1000)
219
# Note: 'd' missing - will be detected
220
}
221
}
222
223
analyze_expression_compatibility(expressions, data_sets)
224
```
225
226
### Memory Usage Analysis
227
228
```python
229
import sys
230
import gc
231
import numpy as np
232
import numexpr as ne
233
234
def analyze_memory_usage(expression, array_sizes):
235
"""Analyze memory usage patterns for different array sizes."""
236
237
print(f"Analyzing memory usage for: {expression}")
238
239
for size in array_sizes:
240
print(f"\nArray size: {size:,} elements")
241
242
# Create test data
243
data = {
244
'a': np.random.random(size),
245
'b': np.random.random(size),
246
'c': np.random.random(size)
247
}
248
249
# Measure baseline memory
250
gc.collect()
251
baseline = sys.getsizeof(data['a']) + sys.getsizeof(data['b']) + sys.getsizeof(data['c'])
252
print(f" Input data memory: {baseline / 1024 / 1024:.2f} MB")
253
254
# Validate and get result info
255
result_info = ne.validate(expression, local_dict=data)
256
result_type, result_shape, uses_vml = result_info
257
258
expected_output_size = np.prod(result_shape) * np.dtype(result_type).itemsize
259
print(f" Expected output: {expected_output_size / 1024 / 1024:.2f} MB")
260
261
# NumExpr should use minimal additional memory due to chunking
262
print(f" Chunk size: {ne.__BLOCK_SIZE1__} elements")
263
chunk_memory = ne.__BLOCK_SIZE1__ * np.dtype(result_type).itemsize
264
print(f" Estimated chunk memory: {chunk_memory / 1024:.2f} KB")
265
266
# Example usage
267
analyze_memory_usage("a * sin(b) + exp(c/10)", [1000, 10000, 100000, 1000000])
268
```
269
270
## Debugging Common Issues
271
272
### Expression Syntax Problems
273
274
```python
275
def debug_expression_syntax(expressions):
276
"""Debug common expression syntax issues."""
277
278
for expr in expressions:
279
print(f"\nTesting: '{expr}'")
280
try:
281
# Try validation with common variable names (a, b, c, etc.)
282
dummy_data = {chr(97+i): np.array([1.0]) for i in range(10)} # a-j
283
validation_result = ne.validate(expr, local_dict=dummy_data)
284
if validation_result is None:
285
print(f" Syntax appears valid")
286
else:
287
print(f" Validation issue: {validation_result}")
288
289
except Exception as e:
290
print(f" Syntax error: {type(e).__name__}: {e}")
291
292
# Test problematic expressions
293
problematic_expressions = [
294
"a + b * c", # Valid
295
"a +* b", # Invalid operator sequence
296
"sin(a + b", # Missing closing parenthesis
297
"a ** b ** c", # Valid but potentially confusing precedence
298
"a and b", # Invalid - should use &
299
"a = b + c", # Invalid - assignment not allowed
300
"sin(a) + cos(b", # Missing closing parenthesis
301
"where(a > 0, b, c)" # Valid conditional expression
302
]
303
304
debug_expression_syntax(problematic_expressions)
305
```
306
307
### Performance Debugging
308
309
```python
310
def debug_performance_issues(expression, data_dict):
311
"""Debug common performance issues."""
312
313
print(f"Performance debugging for: {expression}")
314
315
# Check array sizes
316
total_elements = sum(np.prod(arr.shape) for arr in data_dict.values())
317
print(f"Total array elements: {total_elements:,}")
318
319
if total_elements < 10000:
320
print(" Warning: Small arrays may not benefit from NumExpr")
321
322
# Check expression complexity
323
compiled = ne.NumExpr(expression)
324
disasm = ne.disassemble(compiled)
325
opcode_count = len([line for line in disasm.split('\n') if line.strip()])
326
print(f" Opcodes in compiled expression: {opcode_count}")
327
328
# Check threading configuration
329
print(f" NumExpr threads: {ne.get_num_threads()}")
330
if ne.use_vml:
331
print(f" VML threads: {ne.get_vml_num_threads()}")
332
print(f" VML functions detected: {', '.join(func for func in ['sin', 'cos', 'exp', 'log', 'sqrt'] if func in expression)}")
333
334
# Threading recommendation
335
optimal_threads = min(4, ne.detect_number_of_cores())
336
if ne.get_num_threads() != optimal_threads:
337
print(f" Suggestion: Try ne.set_num_threads({optimal_threads})")
338
339
# Example usage
340
large_data = {
341
'a': np.random.random(1000000),
342
'b': np.random.random(1000000)
343
}
344
345
debug_performance_issues("sin(a) * exp(b) + sqrt(a * b)", large_data)
346
```
347
348
Expression analysis and debugging tools are essential for understanding NumExpr behavior, optimizing performance, and troubleshooting issues during development. They provide insights into compilation decisions, memory usage patterns, and execution characteristics that help developers make informed optimization choices.