tessl/pypi-numexpr

Fast numerical expression evaluator for NumPy that accelerates array operations through optimized implementations and multi-threading

—

Pending

Overview

Eval results

Files

Expression Analysis and Debugging

Name: tessl/pypi-numexpr
Author: tessl

Tools for analyzing, validating, and debugging expressions including disassembly of compiled expressions. These utilities help developers understand expression behavior, optimize performance, and troubleshoot issues during development.

Capabilities

Expression Validation

Validate expressions without executing them, providing type and shape analysis for debugging and development workflows.

def validate(ex, local_dict=None, global_dict=None, out=None, order='K', casting='safe', **kwargs):
    """
    Validate a mathematical expression without evaluating it.
    
    Validates an expression by performing parsing, type checking, and 
    compatibility analysis without executing the computation. Returns None
    on successful validation or an Exception object if validation fails.
    
    Parameters:
    - ex (str): Mathematical expression string to validate
    - local_dict (dict, optional): Local variable bindings for type checking
    - global_dict (dict, optional): Global variable bindings
    - out (ndarray, optional): Output array for compatibility checking
    - order (str): Memory layout order ('K', 'A', 'C', 'F')
    - casting (str): Casting safety level ('no', 'equiv', 'safe', 'same_kind', 'unsafe')
    - **kwargs: Additional variables for validation
    
    Returns:
    None or Exception: None if expression is valid, Exception object if invalid
    
    Note:
    After successful validation, you can proceed directly to re_evaluate()
    """

Usage Examples:

import numpy as np
import numexpr as ne

# Validate expression before expensive computation
a = np.random.random((1000, 1000))
b = np.random.random((1000, 1000))

validation_result = ne.validate("a * sin(b) + sqrt(a**2 + b**2)", 
                                local_dict={'a': a, 'b': b})
if validation_result is None:
    print("Expression is valid")
    # Now safe to evaluate
    result = ne.evaluate("a * sin(b) + sqrt(a**2 + b**2)", local_dict={'a': a, 'b': b})
else:
    print(f"Expression validation failed: {validation_result}")
    
# Validate output array compatibility
output = np.empty((1000, 1000), dtype=np.float32)
validation_result = ne.validate("a + b", local_dict={'a': a, 'b': b}, out=output, casting='safe')
if validation_result is None:
    print("Output array compatible")
else:
    print(f"Output array incompatible: {validation_result}")

Expression Disassembly

Examine the internal representation of compiled expressions to understand optimization and execution paths.

def disassemble(nex):
    """
    Disassemble a NumExpr object to show internal opcodes.
    
    Provides a human-readable representation of the compiled expression's
    internal virtual machine opcodes, useful for understanding optimization
    decisions and debugging performance issues.
    
    Parameters:
    - nex (NumExpr): Compiled expression object to disassemble
    
    Returns:
    str: Human-readable disassembly showing opcodes, registers, and operations
    
    Raises:
    TypeError: If input is not a compiled NumExpr object
    """

Usage Examples:

# Create and disassemble a compiled expression
expr = ne.NumExpr("a * b + sin(c) * exp(d)")
disassembly = ne.disassemble(expr)
print("Expression disassembly:")
print(disassembly)

# Compare simple vs complex expressions
simple_expr = ne.NumExpr("a + b")
complex_expr = ne.NumExpr("sin(a) * cos(b) + exp(c/10) * sqrt(d)")

print("Simple expression:")
print(ne.disassemble(simple_expr))
print("\nComplex expression:")
print(ne.disassemble(complex_expr))

# Analyze optimization decisions
memory_intensive = ne.NumExpr("a * b * c * d * e")  # Many temporaries
print("\nMemory-intensive expression:")
print(ne.disassemble(memory_intensive))

Advanced Analysis Techniques

Performance Profiling

import time
import numpy as np
import numexpr as ne

def profile_expression(expression, variables, iterations=100):
    """Profile an expression's performance characteristics."""
    
    # Validate first
    try:
        result_info = ne.validate(expression, local_dict=variables)
        result_type, result_shape, uses_vml = result_info
        print(f"Expression valid: {result_type} {result_shape}, VML: {uses_vml}")
    except Exception as e:
        print(f"Validation failed: {e}")
        return None
    
    # Create compiled version
    compiled_expr = ne.NumExpr(expression)
    print("Disassembly:")
    print(ne.disassemble(compiled_expr))
    
    # Time evaluation methods
    methods = [
        ("evaluate()", lambda: ne.evaluate(expression, local_dict=variables)),
        ("compiled.run()", lambda: compiled_expr.run(**variables)),
        ("re_evaluate()", lambda: ne.re_evaluate(local_dict=variables))
    ]
    
    results = {}
    for method_name, method_func in methods:
        # Prepare for re_evaluate
        if method_name == "re_evaluate()":
            ne.evaluate(expression, local_dict=variables)  # Prime the cache
        
        # Time the method
        start = time.time()
        for _ in range(iterations):
            result = method_func()
        elapsed = time.time() - start
        
        results[method_name] = elapsed / iterations
        print(f"{method_name}: {elapsed/iterations:.6f}s per call")
    
    return results

# Example usage
data = {
    'a': np.random.random(100000),
    'b': np.random.random(100000), 
    'c': np.random.random(100000)
}

profile_expression("a * sin(b) + exp(c/10)", data)

Type and Shape Analysis

def analyze_expression_compatibility(expressions, data_sets):
    """Analyze multiple expressions against multiple data sets."""
    
    for expr_name, expression in expressions.items():
        print(f"\nAnalyzing: {expr_name}")
        print(f"Expression: {expression}")
        
        # Note: Variable dependency analysis requires inspection of the expression
        # This would typically be done through expression validation
        
        for data_name, data_dict in data_sets.items():
            print(f"\n  Testing with {data_name}:")
            
            # Check variable availability through validation
            available = set(data_dict.keys())
            print(f"    Available variables: {sorted(available)}")
            
            # Validate compatibility
            try:
                result_info = ne.validate(expression, local_dict=data_dict)
                result_type, result_shape, uses_vml = result_info
                print(f"    Valid: {result_type} {result_shape}, VML: {uses_vml}")
            except Exception as e:
                print(f"    Invalid: {e}")

# Example usage
expressions = {
    "arithmetic": "a + b * c",
    "trigonometric": "sin(a) + cos(b)",
    "mixed": "a * sin(b) + sqrt(c**2 + d**2)"
}

data_sets = {
    "2D arrays": {
        'a': np.random.random((100, 50)),
        'b': np.random.random((100, 50)),
        'c': np.random.random((100, 50)),
        'd': np.random.random((100, 50))
    },
    "1D arrays": {
        'a': np.random.random(1000),
        'b': np.random.random(1000), 
        'c': np.random.random(1000)
        # Note: 'd' missing - will be detected
    }
}

analyze_expression_compatibility(expressions, data_sets)

Memory Usage Analysis

import sys
import gc
import numpy as np
import numexpr as ne

def analyze_memory_usage(expression, array_sizes):
    """Analyze memory usage patterns for different array sizes."""
    
    print(f"Analyzing memory usage for: {expression}")
    
    for size in array_sizes:
        print(f"\nArray size: {size:,} elements")
        
        # Create test data
        data = {
            'a': np.random.random(size),
            'b': np.random.random(size),
            'c': np.random.random(size)
        }
        
        # Measure baseline memory
        gc.collect()
        baseline = sys.getsizeof(data['a']) + sys.getsizeof(data['b']) + sys.getsizeof(data['c'])
        print(f"  Input data memory: {baseline / 1024 / 1024:.2f} MB")
        
        # Validate and get result info
        result_info = ne.validate(expression, local_dict=data)
        result_type, result_shape, uses_vml = result_info
        
        expected_output_size = np.prod(result_shape) * np.dtype(result_type).itemsize
        print(f"  Expected output: {expected_output_size / 1024 / 1024:.2f} MB")
        
        # NumExpr should use minimal additional memory due to chunking
        print(f"  Chunk size: {ne.__BLOCK_SIZE1__} elements")
        chunk_memory = ne.__BLOCK_SIZE1__ * np.dtype(result_type).itemsize
        print(f"  Estimated chunk memory: {chunk_memory / 1024:.2f} KB")

# Example usage  
analyze_memory_usage("a * sin(b) + exp(c/10)", [1000, 10000, 100000, 1000000])

Debugging Common Issues

Expression Syntax Problems

def debug_expression_syntax(expressions):
    """Debug common expression syntax issues."""
    
    for expr in expressions:
        print(f"\nTesting: '{expr}'")
        try:
            # Try validation with common variable names (a, b, c, etc.)
            dummy_data = {chr(97+i): np.array([1.0]) for i in range(10)}  # a-j
            validation_result = ne.validate(expr, local_dict=dummy_data)
            if validation_result is None:
                print(f"  Syntax appears valid")
            else:
                print(f"  Validation issue: {validation_result}")
            
        except Exception as e:
            print(f"  Syntax error: {type(e).__name__}: {e}")

# Test problematic expressions
problematic_expressions = [
    "a + b * c",           # Valid
    "a +* b",              # Invalid operator sequence
    "sin(a + b",           # Missing closing parenthesis  
    "a ** b ** c",         # Valid but potentially confusing precedence
    "a and b",             # Invalid - should use &
    "a = b + c",           # Invalid - assignment not allowed
    "sin(a) + cos(b",      # Missing closing parenthesis
    "where(a > 0, b, c)"   # Valid conditional expression
]

debug_expression_syntax(problematic_expressions)

Performance Debugging

def debug_performance_issues(expression, data_dict):
    """Debug common performance issues."""
    
    print(f"Performance debugging for: {expression}")
    
    # Check array sizes
    total_elements = sum(np.prod(arr.shape) for arr in data_dict.values())
    print(f"Total array elements: {total_elements:,}")
    
    if total_elements < 10000:
        print("  Warning: Small arrays may not benefit from NumExpr")
    
    # Check expression complexity
    compiled = ne.NumExpr(expression)
    disasm = ne.disassemble(compiled)
    opcode_count = len([line for line in disasm.split('\n') if line.strip()])
    print(f"  Opcodes in compiled expression: {opcode_count}")
    
    # Check threading configuration
    print(f"  NumExpr threads: {ne.get_num_threads()}")
    if ne.use_vml:
        print(f"  VML threads: {ne.get_vml_num_threads()}")
        print(f"  VML functions detected: {', '.join(func for func in ['sin', 'cos', 'exp', 'log', 'sqrt'] if func in expression)}")
    
    # Threading recommendation
    optimal_threads = min(4, ne.detect_number_of_cores())
    if ne.get_num_threads() != optimal_threads:
        print(f"  Suggestion: Try ne.set_num_threads({optimal_threads})")

# Example usage
large_data = {
    'a': np.random.random(1000000),
    'b': np.random.random(1000000)
}

debug_performance_issues("sin(a) * exp(b) + sqrt(a * b)", large_data)

Expression analysis and debugging tools are essential for understanding NumExpr behavior, optimizing performance, and troubleshooting issues during development. They provide insights into compilation decisions, memory usage patterns, and execution characteristics that help developers make informed optimization choices.

Install with Tessl CLI