Fast numerical expression evaluator for NumPy that accelerates array operations through optimized implementations and multi-threading
—
Tools for analyzing, validating, and debugging expressions including disassembly of compiled expressions. These utilities help developers understand expression behavior, optimize performance, and troubleshoot issues during development.
Validate expressions without executing them, providing type and shape analysis for debugging and development workflows.
def validate(ex, local_dict=None, global_dict=None, out=None, order='K', casting='safe', **kwargs):
"""
Validate a mathematical expression without evaluating it.
Validates an expression by performing parsing, type checking, and
compatibility analysis without executing the computation. Returns None
on successful validation or an Exception object if validation fails.
Parameters:
- ex (str): Mathematical expression string to validate
- local_dict (dict, optional): Local variable bindings for type checking
- global_dict (dict, optional): Global variable bindings
- out (ndarray, optional): Output array for compatibility checking
- order (str): Memory layout order ('K', 'A', 'C', 'F')
- casting (str): Casting safety level ('no', 'equiv', 'safe', 'same_kind', 'unsafe')
- **kwargs: Additional variables for validation
Returns:
None or Exception: None if expression is valid, Exception object if invalid
Note:
After successful validation, you can proceed directly to re_evaluate()
"""Usage Examples:
import numpy as np
import numexpr as ne
# Validate expression before expensive computation
a = np.random.random((1000, 1000))
b = np.random.random((1000, 1000))
validation_result = ne.validate("a * sin(b) + sqrt(a**2 + b**2)",
local_dict={'a': a, 'b': b})
if validation_result is None:
print("Expression is valid")
# Now safe to evaluate
result = ne.evaluate("a * sin(b) + sqrt(a**2 + b**2)", local_dict={'a': a, 'b': b})
else:
print(f"Expression validation failed: {validation_result}")
# Validate output array compatibility
output = np.empty((1000, 1000), dtype=np.float32)
validation_result = ne.validate("a + b", local_dict={'a': a, 'b': b}, out=output, casting='safe')
if validation_result is None:
print("Output array compatible")
else:
print(f"Output array incompatible: {validation_result}")Examine the internal representation of compiled expressions to understand optimization and execution paths.
def disassemble(nex):
"""
Disassemble a NumExpr object to show internal opcodes.
Provides a human-readable representation of the compiled expression's
internal virtual machine opcodes, useful for understanding optimization
decisions and debugging performance issues.
Parameters:
- nex (NumExpr): Compiled expression object to disassemble
Returns:
str: Human-readable disassembly showing opcodes, registers, and operations
Raises:
TypeError: If input is not a compiled NumExpr object
"""Usage Examples:
# Create and disassemble a compiled expression
expr = ne.NumExpr("a * b + sin(c) * exp(d)")
disassembly = ne.disassemble(expr)
print("Expression disassembly:")
print(disassembly)
# Compare simple vs complex expressions
simple_expr = ne.NumExpr("a + b")
complex_expr = ne.NumExpr("sin(a) * cos(b) + exp(c/10) * sqrt(d)")
print("Simple expression:")
print(ne.disassemble(simple_expr))
print("\nComplex expression:")
print(ne.disassemble(complex_expr))
# Analyze optimization decisions
memory_intensive = ne.NumExpr("a * b * c * d * e") # Many temporaries
print("\nMemory-intensive expression:")
print(ne.disassemble(memory_intensive))import time
import numpy as np
import numexpr as ne
def profile_expression(expression, variables, iterations=100):
"""Profile an expression's performance characteristics."""
# Validate first
try:
result_info = ne.validate(expression, local_dict=variables)
result_type, result_shape, uses_vml = result_info
print(f"Expression valid: {result_type} {result_shape}, VML: {uses_vml}")
except Exception as e:
print(f"Validation failed: {e}")
return None
# Create compiled version
compiled_expr = ne.NumExpr(expression)
print("Disassembly:")
print(ne.disassemble(compiled_expr))
# Time evaluation methods
methods = [
("evaluate()", lambda: ne.evaluate(expression, local_dict=variables)),
("compiled.run()", lambda: compiled_expr.run(**variables)),
("re_evaluate()", lambda: ne.re_evaluate(local_dict=variables))
]
results = {}
for method_name, method_func in methods:
# Prepare for re_evaluate
if method_name == "re_evaluate()":
ne.evaluate(expression, local_dict=variables) # Prime the cache
# Time the method
start = time.time()
for _ in range(iterations):
result = method_func()
elapsed = time.time() - start
results[method_name] = elapsed / iterations
print(f"{method_name}: {elapsed/iterations:.6f}s per call")
return results
# Example usage
data = {
'a': np.random.random(100000),
'b': np.random.random(100000),
'c': np.random.random(100000)
}
profile_expression("a * sin(b) + exp(c/10)", data)def analyze_expression_compatibility(expressions, data_sets):
"""Analyze multiple expressions against multiple data sets."""
for expr_name, expression in expressions.items():
print(f"\nAnalyzing: {expr_name}")
print(f"Expression: {expression}")
# Note: Variable dependency analysis requires inspection of the expression
# This would typically be done through expression validation
for data_name, data_dict in data_sets.items():
print(f"\n Testing with {data_name}:")
# Check variable availability through validation
available = set(data_dict.keys())
print(f" Available variables: {sorted(available)}")
# Validate compatibility
try:
result_info = ne.validate(expression, local_dict=data_dict)
result_type, result_shape, uses_vml = result_info
print(f" Valid: {result_type} {result_shape}, VML: {uses_vml}")
except Exception as e:
print(f" Invalid: {e}")
# Example usage
expressions = {
"arithmetic": "a + b * c",
"trigonometric": "sin(a) + cos(b)",
"mixed": "a * sin(b) + sqrt(c**2 + d**2)"
}
data_sets = {
"2D arrays": {
'a': np.random.random((100, 50)),
'b': np.random.random((100, 50)),
'c': np.random.random((100, 50)),
'd': np.random.random((100, 50))
},
"1D arrays": {
'a': np.random.random(1000),
'b': np.random.random(1000),
'c': np.random.random(1000)
# Note: 'd' missing - will be detected
}
}
analyze_expression_compatibility(expressions, data_sets)import sys
import gc
import numpy as np
import numexpr as ne
def analyze_memory_usage(expression, array_sizes):
"""Analyze memory usage patterns for different array sizes."""
print(f"Analyzing memory usage for: {expression}")
for size in array_sizes:
print(f"\nArray size: {size:,} elements")
# Create test data
data = {
'a': np.random.random(size),
'b': np.random.random(size),
'c': np.random.random(size)
}
# Measure baseline memory
gc.collect()
baseline = sys.getsizeof(data['a']) + sys.getsizeof(data['b']) + sys.getsizeof(data['c'])
print(f" Input data memory: {baseline / 1024 / 1024:.2f} MB")
# Validate and get result info
result_info = ne.validate(expression, local_dict=data)
result_type, result_shape, uses_vml = result_info
expected_output_size = np.prod(result_shape) * np.dtype(result_type).itemsize
print(f" Expected output: {expected_output_size / 1024 / 1024:.2f} MB")
# NumExpr should use minimal additional memory due to chunking
print(f" Chunk size: {ne.__BLOCK_SIZE1__} elements")
chunk_memory = ne.__BLOCK_SIZE1__ * np.dtype(result_type).itemsize
print(f" Estimated chunk memory: {chunk_memory / 1024:.2f} KB")
# Example usage
analyze_memory_usage("a * sin(b) + exp(c/10)", [1000, 10000, 100000, 1000000])def debug_expression_syntax(expressions):
"""Debug common expression syntax issues."""
for expr in expressions:
print(f"\nTesting: '{expr}'")
try:
# Try validation with common variable names (a, b, c, etc.)
dummy_data = {chr(97+i): np.array([1.0]) for i in range(10)} # a-j
validation_result = ne.validate(expr, local_dict=dummy_data)
if validation_result is None:
print(f" Syntax appears valid")
else:
print(f" Validation issue: {validation_result}")
except Exception as e:
print(f" Syntax error: {type(e).__name__}: {e}")
# Test problematic expressions
problematic_expressions = [
"a + b * c", # Valid
"a +* b", # Invalid operator sequence
"sin(a + b", # Missing closing parenthesis
"a ** b ** c", # Valid but potentially confusing precedence
"a and b", # Invalid - should use &
"a = b + c", # Invalid - assignment not allowed
"sin(a) + cos(b", # Missing closing parenthesis
"where(a > 0, b, c)" # Valid conditional expression
]
debug_expression_syntax(problematic_expressions)def debug_performance_issues(expression, data_dict):
"""Debug common performance issues."""
print(f"Performance debugging for: {expression}")
# Check array sizes
total_elements = sum(np.prod(arr.shape) for arr in data_dict.values())
print(f"Total array elements: {total_elements:,}")
if total_elements < 10000:
print(" Warning: Small arrays may not benefit from NumExpr")
# Check expression complexity
compiled = ne.NumExpr(expression)
disasm = ne.disassemble(compiled)
opcode_count = len([line for line in disasm.split('\n') if line.strip()])
print(f" Opcodes in compiled expression: {opcode_count}")
# Check threading configuration
print(f" NumExpr threads: {ne.get_num_threads()}")
if ne.use_vml:
print(f" VML threads: {ne.get_vml_num_threads()}")
print(f" VML functions detected: {', '.join(func for func in ['sin', 'cos', 'exp', 'log', 'sqrt'] if func in expression)}")
# Threading recommendation
optimal_threads = min(4, ne.detect_number_of_cores())
if ne.get_num_threads() != optimal_threads:
print(f" Suggestion: Try ne.set_num_threads({optimal_threads})")
# Example usage
large_data = {
'a': np.random.random(1000000),
'b': np.random.random(1000000)
}
debug_performance_issues("sin(a) * exp(b) + sqrt(a * b)", large_data)Expression analysis and debugging tools are essential for understanding NumExpr behavior, optimizing performance, and troubleshooting issues during development. They provide insights into compilation decisions, memory usage patterns, and execution characteristics that help developers make informed optimization choices.
Install with Tessl CLI
npx tessl i tessl/pypi-numexpr