A hierarchical data modeling framework for modern science data standards
—
HDMF provides a comprehensive set of utilities for parameter validation, argument handling, type checking, and data manipulation. These utilities form the backbone of HDMF's robust type system and are essential for developing extensions and working with HDMF data structures.
The core decorator system for parameter validation and documentation generation throughout HDMF.
def docval(*args, **kwargs):
"""
Decorator for documenting and enforcing method parameter types and constraints.
Args:
*args: Parameter specifications as dictionaries
**kwargs: Additional validation options
Parameter specification format:
{
'name': str, # Parameter name
'type': type/tuple, # Expected type(s)
'doc': str, # Documentation string
'default': any, # Default value (optional)
'shape': tuple, # Expected array shape (optional)
'allow_none': bool # Allow None values (optional)
}
Returns:
Decorated function with validation
"""
def getargs(arg_names, kwargs: dict):
"""
Retrieve specified arguments from dictionary with validation.
Args:
arg_names: String or list of argument names to retrieve
kwargs: Dictionary containing arguments
Returns:
Single value if arg_names is string, tuple if list
Raises:
TypeError: If required arguments are missing
"""
def popargs(arg_names, kwargs: dict):
"""
Retrieve and remove specified arguments from dictionary.
Args:
arg_names: String or list of argument names to retrieve
kwargs: Dictionary to retrieve and modify
Returns:
Single value if arg_names is string, tuple if list
"""
def popargs_to_dict(arg_names: list, kwargs: dict) -> dict:
"""
Extract multiple arguments to a new dictionary.
Args:
arg_names: List of argument names to extract
kwargs: Source dictionary
Returns:
Dictionary containing extracted arguments
"""
def get_docval(func) -> tuple:
"""
Get docval arguments for a function.
Args:
func: Function to inspect
Returns:
Tuple of docval argument specifications
"""Comprehensive type checking utilities for validating data types, shapes, and constraints.
def check_type(value, type_, name: str = None) -> bool:
"""
Check if value matches expected type with detailed error reporting.
Args:
value: Value to check
type_: Expected type or tuple of types
name: Name for error messages
Returns:
True if type matches
Raises:
TypeError: If type doesn't match
"""
class ExtenderMeta(type):
"""
Metaclass for extending base class initialization with additional functionality.
Enables automatic method extension and initialization customization.
"""
def __new__(cls, name, bases, namespace, **kwargs):
"""Create new class with extended functionality."""
class LabelledDict(dict):
"""
Dictionary wrapper with attribute-based querying and labeling capabilities.
Provides enhanced dictionary functionality with label-based access patterns.
"""
def __init__(self, label: str, key_class=None, **kwargs):
"""
Initialize labelled dictionary.
Args:
label: Label for the dictionary
key_class: Expected class for dictionary keys
"""
def __getattribute__(self, item):
"""Enable attribute-based access to dictionary values."""
class StrDataset:
"""
String dataset wrapper for HDF5 compatibility.
Handles string encoding/decoding for HDF5 storage backends.
"""
def __init__(self, data, **kwargs):
"""
Initialize string dataset.
Args:
data: String data to wrap
"""Utilities for working with array data, shapes, and data type conversion.
def get_data_shape(data) -> tuple:
"""
Determine shape of array-like data including ragged arrays.
Args:
data: Array-like data object
Returns:
Tuple representing data shape
"""
def pystr(s) -> str:
"""
Convert bytes to Python string with proper encoding handling.
Args:
s: String or bytes to convert
Returns:
Python string
"""
def to_uint_array(data) -> np.ndarray:
"""
Convert array to unsigned integers with validation.
Args:
data: Array-like data to convert
Returns:
NumPy array with unsigned integer dtype
Raises:
ValueError: If conversion would result in data loss
"""
def is_ragged(data) -> bool:
"""
Test if array-like data is ragged (has inconsistent dimensions).
Args:
data: Array-like data to test
Returns:
True if data is ragged, False otherwise
"""
def get_basic_array_info(data) -> dict:
"""
Get basic information about array-like data.
Args:
data: Array-like data to analyze
Returns:
Dictionary with keys: 'shape', 'dtype', 'size', 'is_ragged'
"""
def generate_array_html_repr(data, max_elements: int = 1000) -> str:
"""
Generate HTML representation of arrays for Jupyter notebooks.
Args:
data: Array data to represent
max_elements: Maximum elements to display
Returns:
HTML string representation
"""Utilities for version comparison and compatibility checking.
def is_newer_version(version1: str, version2: str) -> bool:
"""
Compare version strings to determine if first is newer than second.
Args:
version1: First version string (e.g., '1.2.3')
version2: Second version string (e.g., '1.2.0')
Returns:
True if version1 is newer than version2
Examples:
>>> is_newer_version('1.2.3', '1.2.0')
True
>>> is_newer_version('2.0.0', '1.9.9')
True
>>> is_newer_version('1.0.0', '1.0.0')
False
"""Important constants and enumerations used throughout HDMF for validation and configuration.
class AllowPositional(Enum):
"""
Enumeration for controlling positional argument handling in docval.
Values:
- NONE: No positional arguments allowed
- SOME: Some positional arguments allowed
- ALL: All arguments can be positional
"""
NONE = 'none'
SOME = 'some'
ALL = 'all'
# Type macros for docval validation
array_data = 'array_data' # Macro for array-like data types
scalar_data = 'scalar_data' # Macro for scalar data types
data = 'data' # Generic data macrofrom hdmf.utils import docval, getargs
class DataProcessor:
@docval(
{'name': 'data', 'type': ('array_data', list), 'doc': 'Input data to process'},
{'name': 'method', 'type': str, 'doc': 'Processing method', 'default': 'mean'},
{'name': 'axis', 'type': int, 'doc': 'Axis for processing', 'default': 0, 'allow_none': True}
)
def process(self, **kwargs):
data, method, axis = getargs('data', 'method', 'axis', kwargs)
if method == 'mean':
return np.mean(data, axis=axis)
elif method == 'sum':
return np.sum(data, axis=axis)
else:
raise ValueError(f"Unknown method: {method}")
# Usage
processor = DataProcessor()
result = processor.process(data=[[1, 2], [3, 4]], method='mean', axis=0)from hdmf.utils import check_type, is_ragged, get_data_shape
import numpy as np
# Type checking
data = np.array([1, 2, 3])
check_type(data, np.ndarray, 'data') # Passes
# Check for ragged arrays
regular_array = [[1, 2], [3, 4]]
ragged_array = [[1, 2, 3], [4, 5]]
print(is_ragged(regular_array)) # False
print(is_ragged(ragged_array)) # True
# Get shape information
shape = get_data_shape(ragged_array)
print(shape) # (2, None) - indicates ragged structurefrom hdmf.utils import LabelledDict
# Create labelled dictionary
config = LabelledDict(label='experiment_config', key_class=str)
config['sampling_rate'] = 30000
config['duration'] = 3600
config['channels'] = ['ch1', 'ch2', 'ch3']
# Access via attributes (if keys are valid identifiers)
print(config.sampling_rate) # 30000
print(config.duration) # 3600from hdmf.utils import is_newer_version
# Check software compatibility
current_version = '4.1.0'
required_version = '4.0.0'
if is_newer_version(current_version, required_version):
print("Version is compatible")
else:
print("Version upgrade required")from hdmf.utils import get_basic_array_info
import numpy as np
# Analyze different data types
regular_data = np.random.randn(100, 50)
ragged_data = [[1, 2], [3, 4, 5], [6]]
info1 = get_basic_array_info(regular_data)
info2 = get_basic_array_info(ragged_data)
print("Regular array info:", info1)
# {'shape': (100, 50), 'dtype': 'float64', 'size': 5000, 'is_ragged': False}
print("Ragged array info:", info2)
# {'shape': (3, None), 'dtype': 'object', 'size': 3, 'is_ragged': True}Install with Tessl CLI
npx tessl i tessl/pypi-hdmf