A hierarchical data modeling framework for modern science data standards
—
HDMF's build system converts container objects to storage representations and manages type mappings between specifications and Python classes. It provides the bridge between HDMF's object-oriented container system and the storage backend formats, enabling serialization and deserialization with validation.
Core builder classes for creating storage representations of different data elements.
class Builder:
"""
Abstract base class for all builders.
Builders create storage representations that can be written to
different backends (HDF5, Zarr, etc.) while maintaining metadata
and structural information.
"""
def __init__(self, name: str, **kwargs):
"""
Initialize builder.
Args:
name: Name of the data element
**kwargs: Additional builder properties:
- attributes: Dictionary of attributes
- object_id: Unique identifier
- parent: Parent builder
"""
@property
def name(self) -> str:
"""Name of the data element."""
@property
def attributes(self) -> dict:
"""Dictionary of attributes."""
def set_attribute(self, name: str, value):
"""
Set attribute value.
Args:
name: Attribute name
value: Attribute value
"""
class GroupBuilder(Builder):
"""
Builder for group (container) structures.
Creates hierarchical storage representations that can contain
datasets, nested groups, and links with associated metadata.
"""
def __init__(self, name: str, **kwargs):
"""
Initialize group builder.
Args:
name: Name of the group
**kwargs: Group builder properties:
- groups: Dictionary of nested group builders
- datasets: Dictionary of dataset builders
- links: Dictionary of link builders
"""
def set_group(self, builder: 'GroupBuilder'):
"""
Add nested group builder.
Args:
builder: Group builder to add
"""
def set_dataset(self, builder: 'DatasetBuilder'):
"""
Add dataset builder.
Args:
builder: Dataset builder to add
"""
def set_link(self, builder: 'LinkBuilder'):
"""
Add link builder.
Args:
builder: Link builder to add
"""
@property
def groups(self) -> dict:
"""Dictionary of nested group builders."""
@property
def datasets(self) -> dict:
"""Dictionary of dataset builders."""
### Build Error Classes
Exception classes for build system errors and warnings with detailed error reporting.
```python { .api }
class BuildError(Exception):
"""
Base exception for build system errors.
Raised when build operations fail due to invalid specifications,
missing dependencies, or incompatible data structures.
"""
pass
class OrphanContainerBuildError(BuildError):
"""
Error for orphaned containers during build process.
Raised when containers lack proper parent relationships
required for building storage representations.
"""
pass
class ReferenceTargetNotBuiltError(BuildError):
"""
Error for unresolved reference targets during build.
Raised when referenced objects haven't been built yet
or are missing from the build context.
"""
pass
class ContainerConfigurationError(BuildError):
"""
Error for invalid container configurations.
Raised when container configurations violate specification
requirements or have incompatible settings.
"""
pass
class ConstructError(BuildError):
"""
Error during object construction from builders.
Raised when builders cannot be converted to container objects
due to specification mismatches or invalid data.
"""
pass
# Build warnings
class BuildWarning(UserWarning):
"""Base warning for build system issues."""
pass
class MissingRequiredBuildWarning(BuildWarning):
"""Warning for missing required build components."""
pass
class DtypeConversionWarning(BuildWarning):
"""Warning for data type conversion during build."""
pass
class IncorrectQuantityBuildWarning(BuildWarning):
"""Warning for incorrect quantities during build."""
pass
class OrphanContainerWarning(BuildWarning):
"""Warning for orphaned containers."""
pass@property
def links(self) -> dict:
"""Dictionary of link builders."""class DatasetBuilder(Builder): """ Builder for dataset structures.
Creates storage representations for data arrays with metadata,
chunking, compression, and other storage-specific properties.
"""
def __init__(self, name: str, data, **kwargs):
"""
Initialize dataset builder.
Args:
name: Name of the dataset
data: Dataset content
**kwargs: Dataset builder properties:
- dtype: Data type
- shape: Data shape
- maxshape: Maximum shape for resizable datasets
- chunks: Chunk configuration
- compression: Compression settings
"""
@property
def data(self):
"""Dataset content."""
@property
def shape(self) -> tuple:
"""Shape of the dataset."""
@property
def dtype(self):
"""Data type of the dataset."""class LinkBuilder(Builder): """ Builder for link structures.
Creates storage representations for links between data elements,
enabling references and relationships within the data hierarchy.
"""
def __init__(self, builder: Builder, name: str, **kwargs):
"""
Initialize link builder.
Args:
builder: Target builder for the link
name: Name of the link
"""
@property
def builder(self) -> Builder:
"""Target builder for the link."""class ReferenceBuilder(Builder): """ Builder for object references.
Creates storage representations for references to other objects
within the data hierarchy, enabling complex relationships and cross-references.
"""
def __init__(self, builder: Builder, **kwargs):
"""
Initialize reference builder.
Args:
builder: Target builder for the reference
"""
@property
def builder(self) -> Builder:
"""Target builder for the reference."""### Build Management
Core management classes for coordinating the build process and type mappings.
```python { .api }
class BuildManager:
"""
Manager for building containers into storage representations.
Coordinates the conversion process from container objects to
builder objects, handling validation, type mapping, and error reporting.
"""
def __init__(self, type_map: 'TypeMap'):
"""
Initialize build manager.
Args:
type_map: Type mapping for container-to-builder conversion
"""
def build(self, container, source: str = None, **kwargs) -> Builder:
"""
Build storage representation from container.
Args:
container: Container object to build
source: Source identifier for tracking
**kwargs: Build options:
- root: Whether this is the root container
- exhaust_dci: Whether to exhaust data chunk iterators
Returns:
Builder object representing the container
"""
def construct(self, builder: Builder, **kwargs):
"""
Construct container from storage representation.
Args:
builder: Builder object to construct from
**kwargs: Construction options
Returns:
Container object constructed from builder
"""
def get_builder(self, container) -> Builder:
"""
Get existing builder for container if available.
Args:
container: Container to find builder for
Returns:
Builder object or None
"""
class TypeMap:
"""
Mapping between specifications, container classes, and object mappers.
Manages the relationships between data type specifications,
Python container classes, and the mappers that convert between them.
"""
def __init__(self, namespaces: 'NamespaceCatalog', type_map=None):
"""
Initialize type map.
Args:
namespaces: Catalog of available namespaces
type_map: Parent type map for inheritance
"""
def register_container_type(self, namespace: str, data_type: str, container_cls):
"""
Register container class for a data type.
Args:
namespace: Namespace containing the data type
data_type: Name of the data type
container_cls: Python class for the container
"""
def register_map(self, container_cls, mapper_cls):
"""
Register object mapper for a container class.
Args:
container_cls: Container class
mapper_cls: Mapper class for serialization
"""
def get_container_cls(self, namespace: str, data_type: str):
"""
Get container class for a data type.
Args:
namespace: Namespace containing the data type
data_type: Name of the data type
Returns:
Container class
"""
def get_map(self, container):
"""
Get object mapper for a container.
Args:
container: Container object
Returns:
ObjectMapper instance
"""
def copy(self) -> 'TypeMap':
"""
Create a copy of this type map.
Returns:
New TypeMap instance with same mappings
"""Classes for mapping between container objects and builder representations.
class ObjectMapper:
"""
Base class for mapping container objects to/from builders.
Provides the interface for converting between container objects
and their storage representations with validation and error handling.
"""
def __init__(self, spec):
"""
Initialize object mapper.
Args:
spec: Specification for the data type
"""
def build(self, container, manager: BuildManager, **kwargs) -> Builder:
"""
Build storage representation from container.
Args:
container: Container object to build
manager: Build manager coordinating the process
Returns:
Builder object
"""
def construct(self, builder: Builder, manager: BuildManager, **kwargs):
"""
Construct container from storage representation.
Args:
builder: Builder object to construct from
manager: Build manager coordinating the process
Returns:
Container object
"""
@property
def spec(self):
"""Specification for this mapper."""
class CustomClassGenerator:
"""
Generator for creating custom container classes from specifications.
Dynamically creates Python classes that match specification requirements
with proper validation, attributes, and methods.
"""
def __init__(self, **kwargs):
"""Initialize custom class generator."""
def generate_class(self, namespace: str, data_type: str, spec, parent_cls=None):
"""
Generate custom container class from specification.
Args:
namespace: Namespace containing the specification
data_type: Name of the data type
spec: Specification object
parent_cls: Parent class for inheritance
Returns:
Generated container class
"""
class MCIClassGenerator(CustomClassGenerator):
"""
Generator for Multi-Container Interface classes.
Creates classes that can hold multiple containers of the same type
with auto-generated methods for access and manipulation.
"""
def generate_class(self, namespace: str, data_type: str, spec, parent_cls=None):
"""
Generate MCI container class from specification.
Args:
namespace: Namespace containing the specification
data_type: Name of the data type
spec: Specification object
parent_cls: Parent class for inheritance
Returns:
Generated MCI container class
"""Exception and warning classes for build process error handling.
class BuildError(Exception):
"""Base exception for build process errors."""
pass
class OrphanContainerBuildError(BuildError):
"""Exception raised when trying to build orphaned containers."""
def __init__(self, container):
"""
Initialize with orphaned container.
Args:
container: Container that lacks proper parent relationships
"""
class ReferenceTargetNotBuiltError(BuildError):
"""Exception raised when reference target hasn't been built yet."""
def __init__(self, reference, target):
"""
Initialize with reference details.
Args:
reference: Reference that failed
target: Target that hasn't been built
"""
class ContainerConfigurationError(BuildError):
"""Exception raised for container configuration problems."""
pass
class ConstructError(Exception):
"""Exception raised during container construction from builders."""
pass
# Warning classes
class BuildWarning(UserWarning):
"""Base warning for build process issues."""
pass
class MissingRequiredBuildWarning(BuildWarning):
"""Warning for missing required fields during build."""
pass
class DtypeConversionWarning(BuildWarning):
"""Warning for data type conversions during build."""
passfrom hdmf.build import BuildManager, TypeMap
from hdmf.common import get_type_map
from hdmf import Container, Data
import numpy as np
# Get type map with HDMF common types
type_map = get_type_map()
build_manager = BuildManager(type_map)
# Create container structure
data_array = np.random.randn(100, 50)
data_container = Data(name='neural_data', data=data_array)
container = Container(name='experiment')
container.add_child(data_container)
# Build storage representation
builder = build_manager.build(container)
print(f"Built {builder.name} with {len(builder.datasets)} datasets")
# Construct container back from builder
constructed = build_manager.construct(builder)
print(f"Constructed: {constructed.name}")from hdmf.build import TypeMap, ObjectMapper
from hdmf.spec import GroupSpec, AttributeSpec
from hdmf import Container
from hdmf.common import get_manager
# Define custom container class
class ExperimentSession(Container):
@docval({'name': 'name', 'type': str, 'doc': 'Name of the session'},
{'name': 'session_id', 'type': str, 'doc': 'Unique session identifier'},
{'name': 'start_time', 'type': str, 'doc': 'Session start time'})
def __init__(self, **kwargs):
name, session_id, start_time = getargs('name', 'session_id', 'start_time', kwargs)
super().__init__(name=name)
self.session_id = session_id
self.start_time = start_time
# Create specification for the custom type
session_spec = GroupSpec(
doc='Container for experimental session data',
neurodata_type_def='ExperimentSession',
attributes=[
AttributeSpec('session_id', 'Unique session identifier', dtype='text'),
AttributeSpec('start_time', 'Session start time', dtype='text')
]
)
# Create custom mapper
class ExperimentSessionMapper(ObjectMapper):
def __init__(self, spec):
super().__init__(spec)
def construct(self, builder, manager, **kwargs):
return ExperimentSession(
name=builder.name,
session_id=builder.attributes['session_id'],
start_time=builder.attributes['start_time']
)
# Register the custom type
type_map = get_type_map()
type_map.register_container_type('custom', 'ExperimentSession', ExperimentSession)
type_map.register_map(ExperimentSession, ExperimentSessionMapper)from hdmf.build import GroupBuilder, DatasetBuilder, LinkBuilder
import numpy as np
# Create dataset builder with specific storage properties
data_builder = DatasetBuilder(
name='high_res_data',
data=np.random.randn(10000, 1000),
dtype='float64',
chunks=(1000, 100), # Optimized chunk size
compression='gzip',
compression_opts=9
)
# Create group builder with nested structure
analysis_group = GroupBuilder(name='analysis')
analysis_group.set_dataset(data_builder)
# Add metadata attributes
analysis_group.set_attribute('analysis_type', 'spike_detection')
analysis_group.set_attribute('algorithm_version', '2.1.0')
analysis_group.set_attribute('parameters', {
'threshold': -50.0,
'min_interval': 0.001,
'detection_method': 'threshold_crossing'
})
# Create main experiment group
experiment_group = GroupBuilder(name='experiment_001')
experiment_group.set_group(analysis_group)
# Create link to reference data from multiple locations
data_link = LinkBuilder(data_builder, name='reference_data')
experiment_group.set_link(data_link)
print(f"Built experiment with {len(experiment_group.groups)} groups")
print(f"Dataset shape: {data_builder.shape}")from hdmf.build import (BuildManager, BuildError, OrphanContainerBuildError,
ReferenceTargetNotBuiltError)
from hdmf import Container
def safe_build_container(container, build_manager):
"""Safely build container with comprehensive error handling."""
try:
builder = build_manager.build(container)
return builder
except OrphanContainerBuildError as e:
print(f"Container {e.args[0].name} is orphaned - no parent relationship")
# Fix by adding to a parent or marking as root
return build_manager.build(container, root=True)
except ReferenceTargetNotBuiltError as e:
print(f"Reference target not built: {e.args[1]}")
# Build target first, then retry
target_builder = build_manager.build(e.args[1])
return build_manager.build(container)
except BuildError as e:
print(f"Build error: {e}")
return None
# Usage
container = Container(name='test_container')
type_map = get_type_map()
build_manager = BuildManager(type_map)
builder = safe_build_container(container, build_manager)
if builder:
print("Successfully built container")from hdmf.build import CustomClassGenerator
from hdmf.spec import GroupSpec, DatasetSpec, AttributeSpec
from hdmf.utils import docval, getargs
# Create specification for dynamic class
recording_spec = GroupSpec(
doc='Neural recording container',
neurodata_type_def='NeuralRecording',
datasets=[
DatasetSpec('data', 'Raw recording data', dtype='int16',
shape=(None, None), dims=['time', 'channels']),
DatasetSpec('timestamps', 'Sample timestamps', dtype='float64',
shape=(None,), dims=['time'])
],
attributes=[
AttributeSpec('sampling_rate', 'Sampling rate in Hz', dtype='float64'),
AttributeSpec('num_channels', 'Number of recording channels', dtype='int')
]
)
# Generate custom class from specification
generator = CustomClassGenerator()
NeuralRecording = generator.generate_class(
namespace='custom',
data_type='NeuralRecording',
spec=recording_spec,
parent_cls=Container
)
# Use the dynamically generated class
recording = NeuralRecording(
name='session_001',
data=np.random.randint(-1000, 1000, (30000, 64)),
timestamps=np.arange(30000) / 30000.0,
sampling_rate=30000.0,
num_channels=64
)
print(f"Created {recording.__class__.__name__} with {recording.num_channels} channels")import warnings
from hdmf.build import BuildWarning, MissingRequiredBuildWarning, DtypeConversionWarning
def build_with_monitoring(container, build_manager):
"""Build container with detailed monitoring and warning handling."""
with warnings.catch_warnings(record=True) as warning_list:
warnings.simplefilter("always")
builder = build_manager.build(container)
# Process warnings
for warning in warning_list:
if issubclass(warning.category, MissingRequiredBuildWarning):
print(f"Missing required field: {warning.message}")
elif issubclass(warning.category, DtypeConversionWarning):
print(f"Data type conversion: {warning.message}")
elif issubclass(warning.category, BuildWarning):
print(f"Build warning: {warning.message}")
return builder
# Usage
container = Container(name='monitored_build')
type_map = get_type_map()
build_manager = BuildManager(type_map)
builder = build_with_monitoring(container, build_manager)
print(f"Build completed for: {builder.name}")Install with Tessl CLI
npx tessl i tessl/pypi-hdmf