CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-hdmf

A hierarchical data modeling framework for modern science data standards

Pending
Overview
Eval results
Files

build-system.mddocs/

Build System

HDMF's build system converts container objects to storage representations and manages type mappings between specifications and Python classes. It provides the bridge between HDMF's object-oriented container system and the storage backend formats, enabling serialization and deserialization with validation.

Capabilities

Builder Classes

Core builder classes for creating storage representations of different data elements.

class Builder:
    """
    Abstract base class for all builders.
    
    Builders create storage representations that can be written to
    different backends (HDF5, Zarr, etc.) while maintaining metadata
    and structural information.
    """
    
    def __init__(self, name: str, **kwargs):
        """
        Initialize builder.
        
        Args:
            name: Name of the data element
            **kwargs: Additional builder properties:
                - attributes: Dictionary of attributes
                - object_id: Unique identifier
                - parent: Parent builder
        """
    
    @property
    def name(self) -> str:
        """Name of the data element."""
    
    @property
    def attributes(self) -> dict:
        """Dictionary of attributes."""
    
    def set_attribute(self, name: str, value):
        """
        Set attribute value.
        
        Args:
            name: Attribute name
            value: Attribute value
        """

class GroupBuilder(Builder):
    """
    Builder for group (container) structures.
    
    Creates hierarchical storage representations that can contain
    datasets, nested groups, and links with associated metadata.
    """
    
    def __init__(self, name: str, **kwargs):
        """
        Initialize group builder.
        
        Args:
            name: Name of the group
            **kwargs: Group builder properties:
                - groups: Dictionary of nested group builders
                - datasets: Dictionary of dataset builders
                - links: Dictionary of link builders
        """
    
    def set_group(self, builder: 'GroupBuilder'):
        """
        Add nested group builder.
        
        Args:
            builder: Group builder to add
        """
    
    def set_dataset(self, builder: 'DatasetBuilder'):
        """
        Add dataset builder.
        
        Args:
            builder: Dataset builder to add
        """
    
    def set_link(self, builder: 'LinkBuilder'):
        """
        Add link builder.
        
        Args:
            builder: Link builder to add
        """
    
    @property
    def groups(self) -> dict:
        """Dictionary of nested group builders."""
    
    @property
    def datasets(self) -> dict:
        """Dictionary of dataset builders."""

### Build Error Classes

Exception classes for build system errors and warnings with detailed error reporting.

```python { .api }
class BuildError(Exception):
    """
    Base exception for build system errors.
    
    Raised when build operations fail due to invalid specifications,
    missing dependencies, or incompatible data structures.
    """
    pass

class OrphanContainerBuildError(BuildError):
    """
    Error for orphaned containers during build process.
    
    Raised when containers lack proper parent relationships
    required for building storage representations.
    """
    pass

class ReferenceTargetNotBuiltError(BuildError):
    """
    Error for unresolved reference targets during build.
    
    Raised when referenced objects haven't been built yet
    or are missing from the build context.
    """
    pass

class ContainerConfigurationError(BuildError):
    """
    Error for invalid container configurations.
    
    Raised when container configurations violate specification
    requirements or have incompatible settings.
    """
    pass

class ConstructError(BuildError):
    """
    Error during object construction from builders.
    
    Raised when builders cannot be converted to container objects
    due to specification mismatches or invalid data.
    """
    pass

# Build warnings
class BuildWarning(UserWarning):
    """Base warning for build system issues."""
    pass

class MissingRequiredBuildWarning(BuildWarning):
    """Warning for missing required build components."""
    pass

class DtypeConversionWarning(BuildWarning):
    """Warning for data type conversion during build."""
    pass

class IncorrectQuantityBuildWarning(BuildWarning):
    """Warning for incorrect quantities during build."""
    pass

class OrphanContainerWarning(BuildWarning):
    """Warning for orphaned containers."""
    pass
@property
def links(self) -> dict:
    """Dictionary of link builders."""

class DatasetBuilder(Builder): """ Builder for dataset structures.

Creates storage representations for data arrays with metadata,
chunking, compression, and other storage-specific properties.
"""

def __init__(self, name: str, data, **kwargs):
    """
    Initialize dataset builder.
    
    Args:
        name: Name of the dataset
        data: Dataset content
        **kwargs: Dataset builder properties:
            - dtype: Data type
            - shape: Data shape
            - maxshape: Maximum shape for resizable datasets
            - chunks: Chunk configuration
            - compression: Compression settings
    """

@property
def data(self):
    """Dataset content."""

@property
def shape(self) -> tuple:
    """Shape of the dataset."""

@property
def dtype(self):
    """Data type of the dataset."""

class LinkBuilder(Builder): """ Builder for link structures.

Creates storage representations for links between data elements,
enabling references and relationships within the data hierarchy.
"""

def __init__(self, builder: Builder, name: str, **kwargs):
    """
    Initialize link builder.
    
    Args:
        builder: Target builder for the link
        name: Name of the link
    """

@property
def builder(self) -> Builder:
    """Target builder for the link."""

class ReferenceBuilder(Builder): """ Builder for object references.

Creates storage representations for references to other objects
within the data hierarchy, enabling complex relationships and cross-references.
"""

def __init__(self, builder: Builder, **kwargs):
    """
    Initialize reference builder.
    
    Args:
        builder: Target builder for the reference
    """

@property
def builder(self) -> Builder:
    """Target builder for the reference."""
### Build Management

Core management classes for coordinating the build process and type mappings.

```python { .api }
class BuildManager:
    """
    Manager for building containers into storage representations.
    
    Coordinates the conversion process from container objects to
    builder objects, handling validation, type mapping, and error reporting.
    """
    
    def __init__(self, type_map: 'TypeMap'):
        """
        Initialize build manager.
        
        Args:
            type_map: Type mapping for container-to-builder conversion
        """
    
    def build(self, container, source: str = None, **kwargs) -> Builder:
        """
        Build storage representation from container.
        
        Args:
            container: Container object to build
            source: Source identifier for tracking
            **kwargs: Build options:
                - root: Whether this is the root container
                - exhaust_dci: Whether to exhaust data chunk iterators
        
        Returns:
            Builder object representing the container
        """
    
    def construct(self, builder: Builder, **kwargs):
        """
        Construct container from storage representation.
        
        Args:
            builder: Builder object to construct from
            **kwargs: Construction options
            
        Returns:
            Container object constructed from builder
        """
    
    def get_builder(self, container) -> Builder:
        """
        Get existing builder for container if available.
        
        Args:
            container: Container to find builder for
            
        Returns:
            Builder object or None
        """

class TypeMap:
    """
    Mapping between specifications, container classes, and object mappers.
    
    Manages the relationships between data type specifications,
    Python container classes, and the mappers that convert between them.
    """
    
    def __init__(self, namespaces: 'NamespaceCatalog', type_map=None):
        """
        Initialize type map.
        
        Args:
            namespaces: Catalog of available namespaces
            type_map: Parent type map for inheritance
        """
    
    def register_container_type(self, namespace: str, data_type: str, container_cls):
        """
        Register container class for a data type.
        
        Args:
            namespace: Namespace containing the data type
            data_type: Name of the data type
            container_cls: Python class for the container
        """
    
    def register_map(self, container_cls, mapper_cls):
        """
        Register object mapper for a container class.
        
        Args:
            container_cls: Container class
            mapper_cls: Mapper class for serialization
        """
    
    def get_container_cls(self, namespace: str, data_type: str):
        """
        Get container class for a data type.
        
        Args:
            namespace: Namespace containing the data type
            data_type: Name of the data type
            
        Returns:
            Container class
        """
    
    def get_map(self, container):
        """
        Get object mapper for a container.
        
        Args:
            container: Container object
            
        Returns:
            ObjectMapper instance
        """
    
    def copy(self) -> 'TypeMap':
        """
        Create a copy of this type map.
        
        Returns:
            New TypeMap instance with same mappings
        """

Object Mapping

Classes for mapping between container objects and builder representations.

class ObjectMapper:
    """
    Base class for mapping container objects to/from builders.
    
    Provides the interface for converting between container objects
    and their storage representations with validation and error handling.
    """
    
    def __init__(self, spec):
        """
        Initialize object mapper.
        
        Args:
            spec: Specification for the data type
        """
    
    def build(self, container, manager: BuildManager, **kwargs) -> Builder:
        """
        Build storage representation from container.
        
        Args:
            container: Container object to build
            manager: Build manager coordinating the process
            
        Returns:
            Builder object
        """
    
    def construct(self, builder: Builder, manager: BuildManager, **kwargs):
        """
        Construct container from storage representation.
        
        Args:
            builder: Builder object to construct from
            manager: Build manager coordinating the process
            
        Returns:
            Container object
        """
    
    @property
    def spec(self):
        """Specification for this mapper."""

class CustomClassGenerator:
    """
    Generator for creating custom container classes from specifications.
    
    Dynamically creates Python classes that match specification requirements
    with proper validation, attributes, and methods.
    """
    
    def __init__(self, **kwargs):
        """Initialize custom class generator."""
    
    def generate_class(self, namespace: str, data_type: str, spec, parent_cls=None):
        """
        Generate custom container class from specification.
        
        Args:
            namespace: Namespace containing the specification
            data_type: Name of the data type
            spec: Specification object
            parent_cls: Parent class for inheritance
            
        Returns:
            Generated container class
        """

class MCIClassGenerator(CustomClassGenerator):
    """
    Generator for Multi-Container Interface classes.
    
    Creates classes that can hold multiple containers of the same type
    with auto-generated methods for access and manipulation.
    """
    
    def generate_class(self, namespace: str, data_type: str, spec, parent_cls=None):
        """
        Generate MCI container class from specification.
        
        Args:
            namespace: Namespace containing the specification
            data_type: Name of the data type
            spec: Specification object
            parent_cls: Parent class for inheritance
            
        Returns:
            Generated MCI container class
        """

Build Errors and Warnings

Exception and warning classes for build process error handling.

class BuildError(Exception):
    """Base exception for build process errors."""
    pass

class OrphanContainerBuildError(BuildError):
    """Exception raised when trying to build orphaned containers."""
    
    def __init__(self, container):
        """
        Initialize with orphaned container.
        
        Args:
            container: Container that lacks proper parent relationships
        """

class ReferenceTargetNotBuiltError(BuildError):
    """Exception raised when reference target hasn't been built yet."""
    
    def __init__(self, reference, target):
        """
        Initialize with reference details.
        
        Args:
            reference: Reference that failed
            target: Target that hasn't been built
        """

class ContainerConfigurationError(BuildError):
    """Exception raised for container configuration problems."""
    pass

class ConstructError(Exception):
    """Exception raised during container construction from builders."""
    pass

# Warning classes
class BuildWarning(UserWarning):
    """Base warning for build process issues."""
    pass

class MissingRequiredBuildWarning(BuildWarning):
    """Warning for missing required fields during build."""
    pass

class DtypeConversionWarning(BuildWarning):
    """Warning for data type conversions during build."""
    pass

Usage Examples

Basic Building and Construction

from hdmf.build import BuildManager, TypeMap
from hdmf.common import get_type_map
from hdmf import Container, Data
import numpy as np

# Get type map with HDMF common types
type_map = get_type_map()
build_manager = BuildManager(type_map)

# Create container structure
data_array = np.random.randn(100, 50)
data_container = Data(name='neural_data', data=data_array)

container = Container(name='experiment')
container.add_child(data_container)

# Build storage representation
builder = build_manager.build(container)
print(f"Built {builder.name} with {len(builder.datasets)} datasets")

# Construct container back from builder
constructed = build_manager.construct(builder)
print(f"Constructed: {constructed.name}")

Custom Type Registration

from hdmf.build import TypeMap, ObjectMapper
from hdmf.spec import GroupSpec, AttributeSpec
from hdmf import Container
from hdmf.common import get_manager

# Define custom container class
class ExperimentSession(Container):
    
    @docval({'name': 'name', 'type': str, 'doc': 'Name of the session'},
            {'name': 'session_id', 'type': str, 'doc': 'Unique session identifier'},
            {'name': 'start_time', 'type': str, 'doc': 'Session start time'})
    def __init__(self, **kwargs):
        name, session_id, start_time = getargs('name', 'session_id', 'start_time', kwargs)
        super().__init__(name=name)
        self.session_id = session_id
        self.start_time = start_time

# Create specification for the custom type
session_spec = GroupSpec(
    doc='Container for experimental session data',
    neurodata_type_def='ExperimentSession',
    attributes=[
        AttributeSpec('session_id', 'Unique session identifier', dtype='text'),
        AttributeSpec('start_time', 'Session start time', dtype='text')
    ]
)

# Create custom mapper
class ExperimentSessionMapper(ObjectMapper):
    
    def __init__(self, spec):
        super().__init__(spec)
    
    def construct(self, builder, manager, **kwargs):
        return ExperimentSession(
            name=builder.name,
            session_id=builder.attributes['session_id'],
            start_time=builder.attributes['start_time']
        )

# Register the custom type
type_map = get_type_map()
type_map.register_container_type('custom', 'ExperimentSession', ExperimentSession)
type_map.register_map(ExperimentSession, ExperimentSessionMapper)

Advanced Builder Manipulation

from hdmf.build import GroupBuilder, DatasetBuilder, LinkBuilder
import numpy as np

# Create dataset builder with specific storage properties
data_builder = DatasetBuilder(
    name='high_res_data',
    data=np.random.randn(10000, 1000),
    dtype='float64',
    chunks=(1000, 100),  # Optimized chunk size
    compression='gzip',
    compression_opts=9
)

# Create group builder with nested structure
analysis_group = GroupBuilder(name='analysis')
analysis_group.set_dataset(data_builder)

# Add metadata attributes
analysis_group.set_attribute('analysis_type', 'spike_detection')
analysis_group.set_attribute('algorithm_version', '2.1.0')
analysis_group.set_attribute('parameters', {
    'threshold': -50.0,
    'min_interval': 0.001,
    'detection_method': 'threshold_crossing'
})

# Create main experiment group
experiment_group = GroupBuilder(name='experiment_001')
experiment_group.set_group(analysis_group)

# Create link to reference data from multiple locations
data_link = LinkBuilder(data_builder, name='reference_data')
experiment_group.set_link(data_link)

print(f"Built experiment with {len(experiment_group.groups)} groups")
print(f"Dataset shape: {data_builder.shape}")

Error Handling and Validation

from hdmf.build import (BuildManager, BuildError, OrphanContainerBuildError,
                        ReferenceTargetNotBuiltError)
from hdmf import Container

def safe_build_container(container, build_manager):
    """Safely build container with comprehensive error handling."""
    
    try:
        builder = build_manager.build(container)
        return builder
        
    except OrphanContainerBuildError as e:
        print(f"Container {e.args[0].name} is orphaned - no parent relationship")
        # Fix by adding to a parent or marking as root
        return build_manager.build(container, root=True)
        
    except ReferenceTargetNotBuiltError as e:
        print(f"Reference target not built: {e.args[1]}")
        # Build target first, then retry
        target_builder = build_manager.build(e.args[1])
        return build_manager.build(container)
        
    except BuildError as e:
        print(f"Build error: {e}")
        return None

# Usage
container = Container(name='test_container')
type_map = get_type_map()
build_manager = BuildManager(type_map)

builder = safe_build_container(container, build_manager)
if builder:
    print("Successfully built container")

Dynamic Class Generation

from hdmf.build import CustomClassGenerator
from hdmf.spec import GroupSpec, DatasetSpec, AttributeSpec
from hdmf.utils import docval, getargs

# Create specification for dynamic class
recording_spec = GroupSpec(
    doc='Neural recording container',
    neurodata_type_def='NeuralRecording',
    datasets=[
        DatasetSpec('data', 'Raw recording data', dtype='int16', 
                   shape=(None, None), dims=['time', 'channels']),
        DatasetSpec('timestamps', 'Sample timestamps', dtype='float64',
                   shape=(None,), dims=['time'])
    ],
    attributes=[
        AttributeSpec('sampling_rate', 'Sampling rate in Hz', dtype='float64'),
        AttributeSpec('num_channels', 'Number of recording channels', dtype='int')
    ]
)

# Generate custom class from specification
generator = CustomClassGenerator()
NeuralRecording = generator.generate_class(
    namespace='custom',
    data_type='NeuralRecording', 
    spec=recording_spec,
    parent_cls=Container
)

# Use the dynamically generated class
recording = NeuralRecording(
    name='session_001',
    data=np.random.randint(-1000, 1000, (30000, 64)),
    timestamps=np.arange(30000) / 30000.0,
    sampling_rate=30000.0,
    num_channels=64
)

print(f"Created {recording.__class__.__name__} with {recording.num_channels} channels")

Build Process Monitoring

import warnings
from hdmf.build import BuildWarning, MissingRequiredBuildWarning, DtypeConversionWarning

def build_with_monitoring(container, build_manager):
    """Build container with detailed monitoring and warning handling."""
    
    with warnings.catch_warnings(record=True) as warning_list:
        warnings.simplefilter("always")
        
        builder = build_manager.build(container)
        
        # Process warnings
        for warning in warning_list:
            if issubclass(warning.category, MissingRequiredBuildWarning):
                print(f"Missing required field: {warning.message}")
            elif issubclass(warning.category, DtypeConversionWarning):
                print(f"Data type conversion: {warning.message}")
            elif issubclass(warning.category, BuildWarning):
                print(f"Build warning: {warning.message}")
        
        return builder

# Usage
container = Container(name='monitored_build')
type_map = get_type_map()
build_manager = BuildManager(type_map)

builder = build_with_monitoring(container, build_manager)
print(f"Build completed for: {builder.name}")

Install with Tessl CLI

npx tessl i tessl/pypi-hdmf

docs

build-system.md

common-data.md

containers.md

data-utils.md

index.md

io-backends.md

query.md

specification.md

term-sets.md

utils.md

validation.md

tile.json