tessl/pypi-hdmf

A hierarchical data modeling framework for modern science data standards

—

Pending

Overview

Eval results

Files

I/O Backends

Name: tessl/pypi-hdmf
Author: tessl

HDMF provides a pluggable I/O system supporting multiple storage backends including HDF5 and Zarr. The I/O system handles reading and writing hierarchical data structures with support for compression, chunking, and efficient data access patterns.

Capabilities

Base I/O Interface

Abstract base class defining the interface for all HDMF I/O backends.

class HDMFIO:
    """
    Abstract base class for HDMF I/O operations.
    
    Provides the interface contract for all storage backend implementations.
    """
    
    def __init__(self, path: str, mode: str = 'r', **kwargs):
        """
        Initialize I/O backend.
        
        Args:
            path: Path to the file or storage location
            mode: File access mode ('r', 'w', 'a', 'r+')
        """
    
    def write(self, container, **kwargs):
        """
        Write container to storage backend.
        
        Args:
            container: Container object to write
        """
    
    def read(self, **kwargs):
        """
        Read data from storage backend.
        
        Returns:
            Container object with loaded data
        """
    
    def close(self):
        """Close the I/O backend and release resources."""
    
    def __enter__(self):
        """Context manager entry."""
        
    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit with cleanup."""

HDF5 I/O Backend

Primary I/O backend for reading and writing HDF5 files with full HDMF feature support.

class HDF5IO(HDMFIO):
    """
    HDF5 I/O backend for reading and writing HDMF data to HDF5 files.
    
    Supports all HDMF features including hierarchical containers, metadata,
    compression, chunking, and cross-platform compatibility.
    """
    
    def __init__(self, path: str, mode: str = 'r', manager=None, **kwargs):
        """
        Initialize HDF5 I/O.
        
        Args:
            path: Path to HDF5 file
            mode: File access mode ('r', 'w', 'a', 'r+')
            manager: Build manager for container conversion
            **kwargs: Additional HDF5 file options
        """
    
    def write(self, container, **kwargs):
        """
        Write container to HDF5 file.
        
        Args:
            container: Container object to write
            **kwargs: Write options including:
                - cache_spec: Whether to cache specification (default: True)
                - exhaust_dci: Whether to exhaust data chunk iterators
                - link_data: Whether to link external data
        """
    
    def read(self, **kwargs):
        """
        Read container from HDF5 file.
        
        Args:
            **kwargs: Read options
            
        Returns:
            Container object loaded from file
        """
    
    def export(self, src_io, container, **kwargs):
        """
        Export container from another I/O source to this HDF5 file.
        
        Args:
            src_io: Source I/O object
            container: Container to export
        """
    
    def close(self):
        """Close HDF5 file and release resources."""
    
    @property
    def file(self):
        """Access to underlying h5py File object."""

HDF5 Data I/O Configuration

Configuration wrapper for customizing how data is written to HDF5 files.

class H5DataIO:
    """
    HDF5 data I/O configuration wrapper for controlling storage options.
    
    Provides fine-grained control over compression, chunking, filtering,
    and other HDF5 dataset creation properties.
    """
    
    def __init__(self, data, **kwargs):
        """
        Initialize H5DataIO wrapper.
        
        Args:
            data: Data to be written
            **kwargs: HDF5 dataset creation options:
                - compression: Compression filter ('gzip', 'lzf', 'szip')
                - compression_opts: Compression level (0-9 for gzip)
                - shuffle: Enable shuffle filter for better compression
                - fletcher32: Enable Fletcher32 checksum filter
                - chunks: Chunk shape for datasets
                - maxshape: Maximum shape for resizable datasets
                - fillvalue: Fill value for uninitialized data
                - track_times: Track dataset creation/modification times
        """
    
    @property
    def data(self):
        """Access to wrapped data."""
    
    @property
    def io_settings(self) -> dict:
        """Dictionary of I/O settings for this data."""

HDF5 Specification I/O

Specialized classes for reading and writing HDMF specifications to HDF5 files.

class H5SpecWriter:
    """
    Writer for HDMF specifications in HDF5 format.
    
    Handles storage of namespace and specification information within HDF5 files.
    """
    
    def __init__(self, io: HDF5IO):
        """
        Initialize specification writer.
        
        Args:
            io: HDF5IO object for file access
        """
    
    def write_spec(self, spec_catalog, spec_namespace):
        """
        Write specification catalog and namespace to HDF5 file.
        
        Args:
            spec_catalog: Specification catalog to write
            spec_namespace: Namespace information
        """

class H5SpecReader:
    """
    Reader for HDMF specifications from HDF5 format.
    
    Loads namespace and specification information from HDF5 files.
    """
    
    def __init__(self, io: HDF5IO):
        """
        Initialize specification reader.
        
        Args:
            io: HDF5IO object for file access
        """
    
    def read_spec(self) -> tuple:
        """
        Read specification from HDF5 file.
        
        Returns:
            Tuple of (spec_catalog, spec_namespace)
        """

HDF5 Utilities and Tools

Utility functions and tools for working with HDF5 files and datasets.

class H5Dataset:
    """
    Wrapper for HDF5 datasets providing enhanced functionality.
    
    Adds HDMF-specific features to h5py dataset objects including
    lazy loading, data transformation, and metadata handling.
    """
    
    def __init__(self, dataset, io: HDF5IO, **kwargs):
        """
        Initialize H5Dataset wrapper.
        
        Args:
            dataset: h5py dataset object
            io: Parent HDF5IO object
        """
    
    def __getitem__(self, key):
        """Get data slice from dataset."""
    
    def __setitem__(self, key, value):
        """Set data slice in dataset."""
    
    @property
    def shape(self) -> tuple:
        """Shape of the dataset."""
    
    @property
    def dtype(self):
        """Data type of the dataset."""
    
    @property
    def size(self) -> int:
        """Total number of elements in dataset."""

# HDF5 utility functions
def get_h5_version() -> str:
    """
    Get HDF5 library version.
    
    Returns:
        HDF5 version string
    """

def check_h5_version(min_version: str = None) -> bool:
    """
    Check if HDF5 version meets minimum requirements.
    
    Args:
        min_version: Minimum required version
        
    Returns:
        True if version is sufficient
    """

Usage Examples

Basic HDF5 I/O Operations

from hdmf.backends.hdf5 import HDF5IO, H5DataIO
from hdmf import Container, Data
import numpy as np

# Create sample data
data_array = np.random.randn(1000, 100)
data_container = Data(name='neural_data', data=data_array)

container = Container(name='experiment')
container.add_child(data_container)

# Write to HDF5 file
with HDF5IO('experiment.h5', mode='w') as io:
    io.write(container)

# Read from HDF5 file
with HDF5IO('experiment.h5', mode='r') as io:
    read_container = io.read()
    print(f"Container: {read_container.name}")
    print(f"Data shape: {read_container.neural_data.shape}")

Advanced HDF5 Data Configuration

from hdmf.backends.hdf5 import H5DataIO
import numpy as np

# Create large dataset with compression
large_data = np.random.randn(10000, 1000)

# Configure compression and chunking
compressed_data = H5DataIO(
    data=large_data,
    compression='gzip',
    compression_opts=9,        # Maximum compression
    shuffle=True,              # Better compression for numeric data
    fletcher32=True,           # Checksums for data integrity
    chunks=(1000, 100),        # Chunk size for efficient access
    maxshape=(None, 1000)      # Allow resizing along first dimension
)

data_container = Data(name='compressed_data', data=compressed_data)

# Write with advanced options
with HDF5IO('compressed_experiment.h5', mode='w') as io:
    io.write(container, cache_spec=True, exhaust_dci=False)

Working with External Data Links

from hdmf.backends.hdf5 import HDF5IO
from hdmf import Data

# Create external data reference
external_data = H5DataIO(
    data='path/to/external/data.h5',
    link_data=True  # Link instead of copying
)

data_container = Data(name='external_data', data=external_data)

# Write with external links
with HDF5IO('main_file.h5', mode='w') as io:
    io.write(container, link_data=True)

Reading Subsets of Large Datasets

from hdmf.backends.hdf5 import HDF5IO

# Open file in read mode
with HDF5IO('large_experiment.h5', mode='r') as io:
    container = io.read()
    
    # Access dataset without loading all data
    dataset = container.neural_data.data
    
    # Read specific slices
    first_100_samples = dataset[:100, :]
    specific_channels = dataset[:, [0, 5, 10]]
    time_window = dataset[1000:2000, :]
    
    print(f"Dataset shape: {dataset.shape}")
    print(f"Slice shape: {first_100_samples.shape}")

Appending Data to Existing Files

from hdmf.backends.hdf5 import HDF5IO, H5DataIO
import numpy as np

# Initial data with resizable configuration
initial_data = H5DataIO(
    data=np.random.randn(100, 50),
    maxshape=(None, 50),  # Allow growth along first dimension
    chunks=(10, 50)
)

data_container = Data(name='growing_data', data=initial_data)

# Write initial data
with HDF5IO('growing_experiment.h5', mode='w') as io:
    io.write(container)

# Append new data
with HDF5IO('growing_experiment.h5', mode='a') as io:
    container = io.read()
    new_data = np.random.randn(50, 50)
    
    # Append to existing dataset
    container.growing_data.append(new_data)
    
    # Write updated container
    io.write(container)

Cross-Platform File Operations

from hdmf.backends.hdf5 import HDF5IO
import os

def process_hdmf_file(input_path: str, output_path: str):
    """Process HDMF file across different platforms."""
    
    # Read from any platform
    with HDF5IO(input_path, mode='r') as src_io:
        container = src_io.read()
        
        # Process data
        for child in container.children:
            if hasattr(child, 'data'):
                # Apply processing to data
                processed_data = child.data * 1.5
                child.data = processed_data
    
    # Write to new location
    with HDF5IO(output_path, mode='w') as dst_io:
        dst_io.write(container, cache_spec=True)
    
    print(f"Processed file written to: {output_path}")

# Cross-platform usage
if os.name == 'nt':  # Windows
    input_file = r'C:\data\experiment.h5'
    output_file = r'C:\processed\experiment_processed.h5'
else:  # Unix-like systems
    input_file = '/data/experiment.h5'
    output_file = '/processed/experiment_processed.h5'

process_hdmf_file(input_file, output_file)

Install with Tessl CLI