Manipulate JSON-like data with NumPy-like idioms for scientific computing and high-energy physics.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Rich type system providing precise descriptions of nested data structures, enabling static analysis, optimization, and cross-language interoperability. The type system includes schema management, metadata handling, and comprehensive validation capabilities for complex heterogeneous data.
Functions for examining and working with array type information, enabling introspection and type-driven programming patterns.
def type(array):
"""
Get complete type information for array.
Parameters:
- array: Array to get type information for
Returns:
Type object describing the array's structure and element types
"""
def typeof(array):
"""
Get type information as string representation.
Parameters:
- array: Array to get type string for
Returns:
str containing human-readable type description
"""
def typestr(array):
"""
Get concise type string representation.
Parameters:
- array: Array to get type string for
Returns:
str containing compact type description
"""Core type classes that represent the structure and semantics of awkward arrays, providing a rich type system for nested, heterogeneous data.
class Type:
"""
Base class for all awkward array types.
"""
@property
def parameters(self):
"""Get type parameters dict."""
def __repr__(self):
"""String representation of type."""
def __eq__(self, other):
"""Test type equality."""
class ArrayType(Type):
"""
Type representing a complete array with known length.
"""
def __init__(self, content_type, length, parameters=None):
"""
Parameters:
- content_type: Type of array elements
- length: int, length of array
- parameters: dict, optional type parameters
"""
@property
def content(self):
"""Get content type."""
@property
def length(self):
"""Get array length."""
class ScalarType(Type):
"""
Type wrapper for scalar (single element) values.
"""
def __init__(self, content_type, parameters=None):
"""
Parameters:
- content_type: Type of the scalar value
- parameters: dict, optional type parameters
"""
@property
def content(self):
"""Get content type."""
class ListType(Type):
"""
Type for variable-length lists.
"""
def __init__(self, content_type, parameters=None):
"""
Parameters:
- content_type: Type of list elements
- parameters: dict, optional type parameters
"""
@property
def content(self):
"""Get element type."""
class RegularType(Type):
"""
Type for fixed-length arrays/lists.
"""
def __init__(self, content_type, size, parameters=None):
"""
Parameters:
- content_type: Type of array elements
- size: int, fixed size of arrays
- parameters: dict, optional type parameters
"""
@property
def content(self):
"""Get element type."""
@property
def size(self):
"""Get fixed size."""
class RecordType(Type):
"""
Type for record/struct data with named fields.
"""
def __init__(self, contents, fields=None, parameters=None):
"""
Parameters:
- contents: dict mapping field names to types, or list of types
- fields: list of str, field names (if contents is list)
- parameters: dict, optional type parameters
"""
@property
def contents(self):
"""Get field types."""
@property
def fields(self):
"""Get field names."""
def field(self, name):
"""Get type of specific field."""
class OptionType(Type):
"""
Type for data that may contain None/missing values.
"""
def __init__(self, content_type, parameters=None):
"""
Parameters:
- content_type: Type of non-None values
- parameters: dict, optional type parameters
"""
@property
def content(self):
"""Get content type (when not None)."""
class UnionType(Type):
"""
Type for data that can be one of several different types.
"""
def __init__(self, contents, parameters=None):
"""
Parameters:
- contents: list of Types that can appear in the union
- parameters: dict, optional type parameters
"""
@property
def contents(self):
"""Get possible types."""
def content(self, index):
"""Get type at specific index."""
class NumpyType(Type):
"""
Type for NumPy primitive data types.
"""
def __init__(self, primitive, parameters=None):
"""
Parameters:
- primitive: str, NumPy dtype name (e.g., 'int64', 'float32')
- parameters: dict, optional type parameters
"""
@property
def primitive(self):
"""Get primitive type name."""
class UnknownType(Type):
"""
Type for data with undetermined or unknown structure.
"""
def __init__(self, parameters=None):
"""
Parameters:
- parameters: dict, optional type parameters
"""Helper functions for working with types, converting between type representations, and type checking operations.
def from_datashape(datashape_str):
"""
Create Type from datashape string representation.
Parameters:
- datashape_str: str, datashape type specification
Returns:
Type object representing the datashape
"""
def dtype_to_primitive(dtype):
"""
Convert NumPy dtype to primitive type name.
Parameters:
- dtype: numpy.dtype object
Returns:
str representing primitive type name
"""
def primitive_to_dtype(primitive):
"""
Convert primitive type name to NumPy dtype.
Parameters:
- primitive: str, primitive type name
Returns:
numpy.dtype object
"""
def is_primitive(type_obj):
"""
Test if type represents a primitive (non-composite) type.
Parameters:
- type_obj: Type object to test
Returns:
bool indicating if type is primitive
"""Functions for managing type parameters that provide metadata and customization for array behavior and interpretation.
def parameters(array):
"""
Get parameters from array's type.
Parameters:
- array: Array to get parameters from
Returns:
dict containing type parameters
"""
def with_parameter(array, key, value, highlevel=True, behavior=None):
"""
Add or modify a parameter in array's type.
Parameters:
- array: Array to modify
- key: str, parameter name
- value: parameter value (any JSON-serializable type)
- highlevel: bool, if True return Array, if False return Content layout
- behavior: dict, custom behavior for the result
Returns:
Array with parameter added to type
"""
def without_parameters(array, highlevel=True, behavior=None):
"""
Remove all parameters from array's type.
Parameters:
- array: Array to modify
- highlevel: bool, if True return Array, if False return Content layout
- behavior: dict, custom behavior for the result
Returns:
Array with all parameters removed from type
"""
def with_name(array, name, highlevel=True, behavior=None):
"""
Add a name to the array's type for semantic identification.
Parameters:
- array: Array to name
- name: str, name to assign to type
- highlevel: bool, if True return Array, if False return Content layout
- behavior: dict, custom behavior for the result
Returns:
Array with named type
"""Form classes provide schema descriptions that can be serialized and used to reconstruct arrays from buffers, enabling efficient serialization and cross-language interoperability.
class Form:
"""
Base class for describing array structure/schema.
"""
def to_dict(self):
"""Convert form to dictionary representation."""
def to_json(self):
"""Convert form to JSON string."""
@classmethod
def from_dict(cls, data):
"""Create form from dictionary."""
@classmethod
def from_json(cls, json_str):
"""Create form from JSON string."""
class NumpyForm(Form):
"""
Form for NumPy array structure.
"""
def __init__(self, primitive, shape=(), has_identifier=False, parameters=None, form_key=None):
"""
Parameters:
- primitive: str, NumPy dtype name
- shape: tuple, shape of inner dimensions
- has_identifier: bool, whether form has identifier
- parameters: dict, form parameters
- form_key: str, unique form identifier
"""
class ListForm(Form):
"""
Form for variable-length list structure.
"""
def __init__(self, starts, stops, content, has_identifier=False, parameters=None, form_key=None):
"""
Parameters:
- starts: str, index type for list starts
- stops: str, index type for list stops
- content: Form, form of list elements
- has_identifier: bool, whether form has identifier
- parameters: dict, form parameters
- form_key: str, unique form identifier
"""
class ListOffsetForm(Form):
"""
Form for offset-based list structure.
"""
def __init__(self, offsets, content, has_identifier=False, parameters=None, form_key=None):
"""
Parameters:
- offsets: str, index type for offsets
- content: Form, form of list elements
- has_identifier: bool, whether form has identifier
- parameters: dict, form parameters
- form_key: str, unique form identifier
"""
class RegularForm(Form):
"""
Form for regular (fixed-length) array structure.
"""
def __init__(self, content, size, has_identifier=False, parameters=None, form_key=None):
"""
Parameters:
- content: Form, form of array elements
- size: int, fixed size of arrays
- has_identifier: bool, whether form has identifier
- parameters: dict, form parameters
- form_key: str, unique form identifier
"""
class RecordForm(Form):
"""
Form for record/struct structure.
"""
def __init__(self, contents, fields=None, has_identifier=False, parameters=None, form_key=None):
"""
Parameters:
- contents: list of Forms for each field
- fields: list of str, field names (None for tuple-like records)
- has_identifier: bool, whether form has identifier
- parameters: dict, form parameters
- form_key: str, unique form identifier
"""
class IndexedForm(Form):
"""
Form for indexed array structure.
"""
def __init__(self, index, content, has_identifier=False, parameters=None, form_key=None):
"""
Parameters:
- index: str, index type
- content: Form, form of indexed content
- has_identifier: bool, whether form has identifier
- parameters: dict, form parameters
- form_key: str, unique form identifier
"""
class IndexedOptionForm(Form):
"""
Form for indexed array with optional/missing values.
"""
def __init__(self, index, content, has_identifier=False, parameters=None, form_key=None):
"""
Parameters:
- index: str, index type
- content: Form, form of non-None content
- has_identifier: bool, whether form has identifier
- parameters: dict, form parameters
- form_key: str, unique form identifier
"""
class UnionForm(Form):
"""
Form for union type structure.
"""
def __init__(self, tags, index, contents, has_identifier=False, parameters=None, form_key=None):
"""
Parameters:
- tags: str, tag index type
- index: str, content index type
- contents: list of Forms for union alternatives
- has_identifier: bool, whether form has identifier
- parameters: dict, form parameters
- form_key: str, unique form identifier
"""
class UnmaskedForm(Form):
"""
Form for unmasked optional array structure.
"""
def __init__(self, content, has_identifier=False, parameters=None, form_key=None):
"""
Parameters:
- content: Form, form of content that could be None
- has_identifier: bool, whether form has identifier
- parameters: dict, form parameters
- form_key: str, unique form identifier
"""
class ByteMaskedForm(Form):
"""
Form for byte-masked array structure.
"""
def __init__(self, mask, content, valid_when, has_identifier=False, parameters=None, form_key=None):
"""
Parameters:
- mask: str, mask array type
- content: Form, form of masked content
- valid_when: bool, mask value indicating valid data
- has_identifier: bool, whether form has identifier
- parameters: dict, form parameters
- form_key: str, unique form identifier
"""
class BitMaskedForm(Form):
"""
Form for bit-masked array structure.
"""
def __init__(self, mask, content, valid_when, lsb_order, has_identifier=False, parameters=None, form_key=None):
"""
Parameters:
- mask: str, mask array type
- content: Form, form of masked content
- valid_when: bool, mask bit value indicating valid data
- lsb_order: bool, bit order (LSB first if True)
- has_identifier: bool, whether form has identifier
- parameters: dict, form parameters
- form_key: str, unique form identifier
"""
class EmptyForm(Form):
"""
Form for empty array structure.
"""
def __init__(self, has_identifier=False, parameters=None, form_key=None):
"""
Parameters:
- has_identifier: bool, whether form has identifier
- parameters: dict, form parameters
- form_key: str, unique form identifier
"""Functions for creating forms from various sources and converting between form representations.
def from_type(type_obj):
"""
Create Form from Type object.
Parameters:
- type_obj: Type object to convert
Returns:
Form representing the type structure
"""
def from_dtype(dtype):
"""
Create Form from NumPy dtype.
Parameters:
- dtype: numpy.dtype to convert
Returns:
NumpyForm representing the dtype
"""Functions for validating array structure and detecting inconsistencies or errors in data layout.
def validity_error(array, exception=False):
"""
Check array for validity errors.
Parameters:
- array: Array to validate
- exception: bool, if True raise exception on error
Returns:
str describing any validity errors (empty string if valid)
Raises:
Exception if exception=True and array is invalid
"""
def is_valid(array):
"""
Test if array has valid structure.
Parameters:
- array: Array to test
Returns:
Array of booleans indicating validity of each element
"""
def is_none(array):
"""
Test which elements are None/missing.
Parameters:
- array: Array to test
Returns:
Array of booleans indicating which elements are None
"""
def is_categorical(array):
"""
Test if array uses categorical representation.
Parameters:
- array: Array to test
Returns:
bool indicating if array is categorical
"""
def is_tuple(array):
"""
Test if array represents tuple data (records without field names).
Parameters:
- array: Array to test
Returns:
bool indicating if array contains tuples
"""Functions for enforcing specific types and converting between compatible type representations.
def enforce_type(array, type_obj, highlevel=True, behavior=None):
"""
Convert array to match specified type structure.
Parameters:
- array: Array to convert
- type_obj: Type or str specifying target type
- highlevel: bool, if True return Array, if False return Content layout
- behavior: dict, custom behavior for the result
Returns:
Array converted to match target type
Raises:
TypeError if conversion is not possible
"""
def merge_option_of_records(array, highlevel=True, behavior=None):
"""
Merge record fields that may be None into a single optional record.
Parameters:
- array: Array with optional records to merge
- highlevel: bool, if True return Array, if False return Content layout
- behavior: dict, custom behavior for the result
Returns:
Array with merged optional record structure
"""
def merge_union_of_records(array, highlevel=True, behavior=None):
"""
Merge records in a union type into a single record type.
Parameters:
- array: Array with union of records to merge
- highlevel: bool, if True return Array, if False return Content layout
- behavior: dict, custom behavior for the result
Returns:
Array with merged record structure
"""import awkward as ak
# Create nested array with mixed types
data = ak.Array([
{"x": [1, 2, 3], "y": 3.14, "name": "alice"},
{"x": [4], "y": 2.71, "name": "bob"}
])
# Inspect type information
print(ak.type(data))
# 2 * {"x": var * int64, "y": float64, "name": string}
print(ak.typeof(data))
# "2 * {x: var * int64, y: float64, name: string}"
# Check specific properties
print(ak.is_tuple(data)) # False (has field names)
print(ak.is_categorical(data)) # False
print(ak.fields(data)) # ["x", "y", "name"]import awkward as ak
# Add semantic meaning via parameters
physics_data = ak.Array([[1.0, 2.0], [3.0, 4.0]])
momentum = ak.with_parameter(physics_data, "units", "GeV/c")
momentum = ak.with_parameter(momentum, "quantity", "momentum")
# Access parameters
print(ak.parameters(momentum))
# {"units": "GeV/c", "quantity": "momentum"}
# Name the type for clarity
named_momentum = ak.with_name(momentum, "Momentum")
print(ak.typeof(named_momentum))
# Contains type name "Momentum"import awkward as ak
# Get form from array
data = ak.Array([{"a": [1, 2], "b": 3}, {"a": [4], "b": 5}])
form = data.layout.form
# Serialize to JSON
form_json = form.to_json()
print(form_json)
# Recreate form from JSON
restored_form = ak.forms.Form.from_json(form_json)
# Forms can be used with buffers to reconstruct arraysimport awkward as ak
import numpy as np
# Create potentially invalid data
data = ak.Array([[1, 2, 3], [4, np.nan], []])
# Check validity
validity = ak.is_valid(data) # [True, True, True]
none_check = ak.is_none(data) # [False, False, False]
# Check for structural errors
error_msg = ak.validity_error(data)
if error_msg:
print(f"Validation error: {error_msg}")
else:
print("Array is valid")import awkward as ak
# Create array that could be regularized
irregular = ak.Array([[1, 2], [3, 4], [5, 6]]) # All length 2
# Convert to regular array
regular = ak.to_regular(irregular)
print(ak.type(regular)) # 3 * 2 * int64
# Enforce specific type
target_type = "var * float64"
float_array = ak.enforce_type(ak.Array([[1, 2], [3]]), target_type)
print(ak.type(float_array)) # 2 * var * float64import awkward as ak
# Union type (multiple possible types per element)
mixed = ak.Array([1, "hello", [1, 2, 3], {"x": 5}])
print(ak.type(mixed)) # Shows union of int64, string, list, record
# Optional records (may be None)
optional_records = ak.Array([{"a": 1}, None, {"a": 2}])
print(ak.type(optional_records)) # option type containing record
# Nested complex structures
nested = ak.Array([
[{"particles": [{"pt": 10.0, "eta": 1.0}]}, None],
[{"particles": []}]
])
print(ak.type(nested)) # Deep nesting with optionsInstall with Tessl CLI
npx tessl i tessl/pypi-awkward