Manipulate JSON-like data with NumPy-like idioms for scientific computing and high-energy physics.
npx @tessl/cli install tessl/pypi-awkward@2.8.0A comprehensive Python library for manipulating JSON-like data with NumPy-like idioms. Awkward Array enables efficient processing of nested, variable-sized data structures commonly found in scientific computing, particularly high-energy physics applications. It provides the performance of NumPy with the flexibility to handle complex, heterogeneous data that doesn't fit into regular arrays.
pip install awkwardimport awkward as akFor behavior customization:
import awkward.behaviorFor integration with specific frameworks:
import awkward.numba # Numba JIT compilation
import awkward.jax # JAX automatic differentiationimport awkward as ak
import numpy as np
# Create arrays from Python data
nested_list = [[1, 2, 3], [], [4, 5]]
array = ak.Array(nested_list)
print(array)
# [[1, 2, 3], [], [4, 5]]
# Mathematical operations work element-wise
squared = array ** 2
print(squared)
# [[1, 4, 9], [], [16, 25]]
# Reduction operations handle variable-length structure
sums = ak.sum(array, axis=1)
print(sums)
# [6, 0, 9]
# Complex nested structures
records = ak.Array([
{"x": [1, 2], "y": {"a": 10, "b": 20}},
{"x": [3], "y": {"a": 30, "b": 40}}
])
print(records.x)
# [[1, 2], [3]]
print(records.y.a)
# [10, 30]Awkward Array's layered architecture provides both performance and flexibility:
Array, Record, ArrayBuilder): User-friendly containers that provide NumPy-like behavior for complex data structuresThis design enables awkward to serve as a bridge between irregular scientific data and the NumPy ecosystem, providing the performance needed for large-scale scientific computing while maintaining the expressiveness required for complex data analysis workflows.
Comprehensive functions for creating arrays from various data sources including Python iterables, NumPy arrays, JSON data, and binary formats. Supports incremental building through ArrayBuilder for complex nested structures.
def from_iter(iterable, *, allow_record=True, highlevel=True, behavior=None, attrs=None, initial=1024, resize=8): ...
def from_numpy(array, highlevel=True, behavior=None): ...
def from_json(source, highlevel=True, behavior=None): ...
def from_arrow(array, highlevel=True, behavior=None): ...
def from_parquet(path, **kwargs): ...
class ArrayBuilder:
def null(self): ...
def boolean(self, x): ...
def integer(self, x): ...
def real(self, x): ...
def complex(self, real, imag=0): ...
def string(self, x): ...
def bytestring(self, x): ...
def datetime(self, x): ...
def timedelta(self, x): ...
def append(self, x): ...
def extend(self, iterable): ...
def begin_list(self): ...
def end_list(self): ...
def begin_tuple(self, numfields): ...
def end_tuple(self): ...
def begin_record(self, name=None): ...
def end_record(self): ...
def field(self, key): ...
def index(self, i): ...Structural operations for reshaping, filtering, combining, and transforming arrays while preserving type information and handling variable-length data gracefully.
def concatenate(arrays, axis=0): ...
def zip(arrays, depth_limit=None): ...
def flatten(array, axis=1): ...
def unflatten(array, counts, axis=0): ...
def mask(array, selection): ...
def combinations(array, n, axis=1): ...
def cartesian(arrays, axis=1): ...
def with_field(array, what, where): ...
def without_field(array, where): ...Full suite of mathematical operations including reductions, element-wise functions, linear algebra, and statistical analysis that handle missing data and nested structures appropriately.
def sum(array, axis=None, *, keepdims=False, mask_identity=False, highlevel=True, behavior=None, attrs=None): ...
def mean(array, axis=None, keepdims=False): ...
def var(array, axis=None, ddof=0, keepdims=False): ...
def std(array, axis=None, ddof=0, keepdims=False): ...
def min(array, axis=None, keepdims=False): ...
def max(array, axis=None, keepdims=False): ...
def argmin(array, axis=None, keepdims=False): ...
def argmax(array, axis=None, keepdims=False): ...
def linear_fit(x, y, axis=None): ...
def corr(x, y, axis=None): ...Extensive support for reading from and writing to various data formats including Arrow, Parquet, JSON, NumPy, and integration with popular frameworks like PyTorch, TensorFlow, and JAX.
def to_arrow(array): ...
def to_parquet(array, destination, **kwargs): ...
def to_numpy(array): ...
def to_json(array, **kwargs): ...
def to_list(array): ...
def from_torch(array): ...
def to_torch(array): ...
def from_tensorflow(array): ...
def to_tensorflow(array): ...
def to_dataframe(array): ...Comprehensive string processing capabilities modeled after Apache Arrow's compute functions, providing efficient operations on arrays of strings including pattern matching, transformations, and analysis.
def str.length(array): ...
def str.lower(array): ...
def str.upper(array): ...
def str.split_pattern(array, pattern): ...
def str.replace_substring(array, pattern, replacement): ...
def str.match_substring_regex(array, pattern): ...
def str.starts_with(array, pattern): ...
def str.extract_regex(array, pattern): ...Rich type system providing precise descriptions of nested data structures, enabling static analysis, optimization, and cross-language interoperability. Includes schema management and metadata handling.
def type(array): ...
def typeof(array): ...
class ArrayType: ...
class ListType: ...
class RecordType: ...
class OptionType: ...
def with_parameter(array, key, value): ...
def parameters(array): ...
def validity_error(array): ...Seamless integration with high-performance computing frameworks including Numba JIT compilation, JAX automatic differentiation, and specialized backends for GPU computing and scientific workflows.
import awkward.numba
import awkward.jax
import awkward.typetracer
def to_backend(array, backend): ...
def backend(array): ...The primary user-facing class representing a multi-dimensional, possibly nested array with variable-length sublists and heterogeneous data types.
class Array:
def __init__(self, data, behavior=None): ...
def to_list(self): ...
def to_numpy(self): ...
@property
def type(self): ...
@property
def layout(self): ...
def show(self, limit_rows=20): ...Incremental array construction with support for complex nested structures and mixed data types.
class ArrayBuilder:
def __init__(self, behavior=None): ...
def snapshot(self): ...
def list(self): ... # Context manager
def record(self): ... # Context managerSingle record (row) extracted from an Array, providing dict-like access to fields while maintaining type information.
class Record:
def __init__(self, array, at): ...
def to_list(self): ...
@property
def fields(self): ...