Manipulate JSON-like data with NumPy-like idioms for scientific computing and high-energy physics.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Seamless integration with high-performance computing frameworks including Numba JIT compilation, JAX automatic differentiation, and specialized backends for GPU computing and scientific workflows. These integrations enable awkward arrays to participate in high-performance computing pipelines while maintaining their flexible data model.
Just-in-time compilation support for high-performance computing with awkward arrays, enabling compiled functions that work directly with nested data structures.
import awkward.numba
def enable_numba():
"""
Enable Numba integration for awkward arrays.
This function registers awkward array types with Numba's type system,
allowing awkward arrays to be used in @numba.jit decorated functions.
"""
# Numba-compilable operations
@numba.jit
def compute_with_awkward(array):
"""
Example of Numba-compiled function working with awkward arrays.
Parameters:
- array: Awkward array that will be compiled
Returns:
Computed result with full JIT performance
"""The Numba integration provides:
Automatic differentiation and GPU computing support through JAX integration, enabling machine learning and scientific computing workflows.
import awkward.jax
def register_jax():
"""
Register awkward arrays with JAX transformation system.
Enables awkward arrays to participate in JAX transformations like
jit, grad, vmap, and pmap for automatic differentiation and
parallelization.
"""
# JAX transformation support
def jax_compatible_function(array):
"""
Function that can be transformed by JAX (jit, grad, etc.).
Parameters:
- array: Awkward array compatible with JAX transformations
Returns:
Result that supports automatic differentiation
"""JAX integration features:
Unified interface for managing computational backends and moving arrays between different execution environments.
def backend(array):
"""
Get the computational backend currently used by array.
Parameters:
- array: Array to check backend for
Returns:
str indicating current backend ("cpu", "cuda", "jax", etc.)
"""
def to_backend(array, backend, highlevel=True, behavior=None):
"""
Move array to specified computational backend.
Parameters:
- array: Array to move
- backend: str, target backend name
- "cpu": Standard CPU backend using NumPy
- "cuda": CUDA backend using CuPy
- "jax": JAX backend for automatic differentiation
- "typetracer": Type inference backend without data
- highlevel: bool, if True return Array, if False return Content layout
- behavior: dict, custom behavior for the result
Returns:
Array moved to target backend
"""
def copy_to(array, backend):
"""
Copy array data to different backend.
Parameters:
- array: Array to copy
- backend: str, destination backend
Returns:
Array copy on target backend
"""Lazy type inference system that analyzes array operations without materializing data, enabling static analysis and optimization.
import awkward.typetracer
class TypeTracer:
"""
Lazy evaluation system for type inference without data materialization.
TypeTracer arrays track type information and operations without
storing actual data, enabling:
- Static type checking
- Memory usage analysis
- Operation optimization
- Schema inference
"""
def touch_data(self, recursive=True):
"""
Mark data as accessed for dependency tracking.
Parameters:
- recursive: bool, if True mark nested data as touched
"""
def touch_shape(self, recursive=True):
"""
Mark shape information as accessed.
Parameters:
- recursive: bool, if True mark nested shapes as touched
"""
def typetracer_with_report(array):
"""
Create type tracer that generates access reports.
Parameters:
- array: Array to create type tracer for
Returns:
tuple of (TypeTracer array, report function)
"""
def typetracer_from_form(form):
"""
Create type tracer directly from Form description.
Parameters:
- form: Form object describing array structure
Returns:
TypeTracer array matching the form
"""C++ interoperability through cppyy, enabling integration with C++ libraries and ROOT ecosystem common in high-energy physics.
import awkward.cppyy
def register_cppyy():
"""
Register awkward types with cppyy for C++ interoperability.
Enables:
- Passing awkward arrays to C++ functions
- Converting C++ containers to awkward arrays
- Integration with ROOT data analysis framework
- Zero-copy data sharing where possible
"""
def cpp_interface(array):
"""
Create C++-compatible interface for array.
Parameters:
- array: Awkward array to create C++ interface for
Returns:
C++-compatible proxy object
"""Functions for GPU-accelerated computing using CUDA and related frameworks.
def to_cuda(array):
"""
Move array to CUDA GPU memory.
Parameters:
- array: Array to move to GPU
Returns:
Array with data in GPU memory
"""
def from_cuda(array):
"""
Move array from GPU to CPU memory.
Parameters:
- array: GPU array to move to CPU
Returns:
Array with data in CPU memory
"""
def is_cuda(array):
"""
Test if array data resides in GPU memory.
Parameters:
- array: Array to test
Returns:
bool indicating if array is on GPU
"""Helper functions for specific integration scenarios and framework compatibility.
def numba_array_typer(array_type):
"""
Create Numba type signature for awkward array type.
Parameters:
- array_type: Awkward array type
Returns:
Numba type signature for compilation
"""
def jax_pytree_flatten(array):
"""
Flatten awkward array for JAX pytree operations.
Parameters:
- array: Array to flatten
Returns:
tuple of (leaves, tree_def) for JAX pytree operations
"""
def jax_pytree_unflatten(tree_def, leaves):
"""
Reconstruct awkward array from JAX pytree components.
Parameters:
- tree_def: Tree definition from flatten operation
- leaves: Leaf values from flatten operation
Returns:
Reconstructed awkward array
"""
def dispatch_map():
"""
Get mapping of operations to backend-specific implementations.
Returns:
dict mapping operation names to backend implementations
"""Tools for analyzing and optimizing performance across different backends and integration scenarios.
def benchmark_backends(array, operation, backends=None):
"""
Benchmark operation performance across different backends.
Parameters:
- array: Array to benchmark with
- operation: Function to benchmark
- backends: list of str, backends to test (None for all available)
Returns:
dict mapping backend names to timing results
"""
def memory_usage(array, backend=None):
"""
Analyze memory usage of array on specified backend.
Parameters:
- array: Array to analyze
- backend: str, backend to check (None for current)
Returns:
dict with memory usage statistics
"""
def optimize_for_backend(array, backend, operation_hint=None):
"""
Optimize array layout for specific backend and operation.
Parameters:
- array: Array to optimize
- backend: str, target backend
- operation_hint: str, hint about intended operations
Returns:
Array optimized for target backend
"""import awkward as ak
import numba
import numpy as np
# Enable numba integration
ak.numba.register()
@numba.jit
def fast_computation(events):
"""JIT-compiled function working with nested data."""
total = 0.0
for event in events:
for particle in event.particles:
if particle.pt > 10.0:
total += particle.pt * particle.pt
return total
# Use with nested data
events = ak.Array([
{"particles": [{"pt": 15.0}, {"pt": 5.0}]},
{"particles": [{"pt": 25.0}, {"pt": 12.0}]}
])
result = fast_computation(events) # Runs at compiled speedimport awkward as ak
import jax
import jax.numpy as jnp
# Register awkward arrays as JAX pytrees
ak.jax.register()
def physics_calculation(events):
"""Function that can be JAX-transformed."""
pts = events.particles.pt
return ak.sum(pts * pts, axis=1)
# Apply JAX transformations
jit_calc = jax.jit(physics_calculation)
vectorized_calc = jax.vmap(physics_calculation)
# Automatic differentiation
def loss_function(events, weights):
result = physics_calculation(events)
return jnp.sum(result * weights)
gradient_fn = jax.grad(loss_function, argnums=1)import awkward as ak
import cupy as cp
# Create array on CPU
cpu_array = ak.Array([[1, 2, 3], [4, 5]])
print(ak.backend(cpu_array)) # "cpu"
# Move to GPU
gpu_array = ak.to_backend(cpu_array, "cuda")
print(ak.backend(gpu_array)) # "cuda"
# Check if CUDA is available
if cp.cuda.is_available():
# Perform GPU computation
gpu_result = ak.sum(gpu_array * gpu_array)
# Move result back to CPU
cpu_result = ak.to_backend(gpu_result, "cpu")import awkward as ak
# Create type tracer for schema analysis
form = ak.forms.RecordForm([
ak.forms.ListForm("i64", "i64", ak.forms.NumpyForm("float64")),
ak.forms.NumpyForm("int32")
], ["particles", "event_id"])
tracer = ak.typetracer.typetracer_from_form(form)
def analyze_operation(data):
"""Function to analyze without data."""
return ak.sum(data.particles, axis=1) + data.event_id
# Trace operation to understand access patterns
traced_result = analyze_operation(tracer)
print(f"Result type: {ak.type(traced_result)}")import awkward as ak
import cppyy
# Register awkward arrays with cppyy
ak.cppyy.register()
# Define C++ function (example)
cppyy.cppdef("""
double compute_mass(const std::vector<double>& pt,
const std::vector<double>& eta) {
double total = 0.0;
for(size_t i = 0; i < pt.size(); ++i) {
total += pt[i] * cosh(eta[i]);
}
return total;
}
""")
# Use with awkward arrays
particles = ak.Array({
"pt": [[10.0, 20.0], [15.0]],
"eta": [[1.0, 0.5], [1.2]]
})
# Convert to C++ compatible format and call
for event in particles:
mass = cppyy.gbl.compute_mass(event.pt, event.eta)
print(f"Event mass: {mass}")import awkward as ak
import time
# Create test data
large_array = ak.Array([
[i + j for j in range(1000)]
for i in range(1000)
])
def benchmark_operation(array, backend_name):
"""Benchmark array operation on specific backend."""
# Move to backend
backend_array = ak.to_backend(array, backend_name)
# Time the operation
start = time.time()
result = ak.sum(backend_array * backend_array, axis=1)
end = time.time()
return end - start
# Compare backends
backends = ["cpu"]
if ak.backend.cuda_available():
backends.append("cuda")
if ak.backend.jax_available():
backends.append("jax")
for backend in backends:
duration = benchmark_operation(large_array, backend)
print(f"{backend}: {duration:.4f} seconds")import awkward as ak
def optimize_for_computation(array, target_backend="cpu", operation="reduction"):
"""Optimize array for specific computation pattern."""
# Pack array for better memory layout
packed = ak.to_packed(array)
# Move to target backend
backend_array = ak.to_backend(packed, target_backend)
# Apply operation-specific optimizations
if operation == "reduction" and target_backend == "cuda":
# Use specific CUDA optimizations
return ak.with_parameter(backend_array, "gpu_optimized", True)
return backend_array
# Example usage
data = ak.Array([[1, 2, 3], [4, 5, 6, 7], [8, 9]])
optimized = optimize_for_computation(data, "cuda", "reduction")
result = ak.sum(optimized, axis=1) # Runs with optimizationsInstall with Tessl CLI
npx tessl i tessl/pypi-awkward