A hierarchical data modeling framework for modern science data standards
—
HDMF provides a pluggable I/O system supporting multiple storage backends including HDF5 and Zarr. The I/O system handles reading and writing hierarchical data structures with support for compression, chunking, and efficient data access patterns.
Abstract base class defining the interface for all HDMF I/O backends.
class HDMFIO:
"""
Abstract base class for HDMF I/O operations.
Provides the interface contract for all storage backend implementations.
"""
def __init__(self, path: str, mode: str = 'r', **kwargs):
"""
Initialize I/O backend.
Args:
path: Path to the file or storage location
mode: File access mode ('r', 'w', 'a', 'r+')
"""
def write(self, container, **kwargs):
"""
Write container to storage backend.
Args:
container: Container object to write
"""
def read(self, **kwargs):
"""
Read data from storage backend.
Returns:
Container object with loaded data
"""
def close(self):
"""Close the I/O backend and release resources."""
def __enter__(self):
"""Context manager entry."""
def __exit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit with cleanup."""Primary I/O backend for reading and writing HDF5 files with full HDMF feature support.
class HDF5IO(HDMFIO):
"""
HDF5 I/O backend for reading and writing HDMF data to HDF5 files.
Supports all HDMF features including hierarchical containers, metadata,
compression, chunking, and cross-platform compatibility.
"""
def __init__(self, path: str, mode: str = 'r', manager=None, **kwargs):
"""
Initialize HDF5 I/O.
Args:
path: Path to HDF5 file
mode: File access mode ('r', 'w', 'a', 'r+')
manager: Build manager for container conversion
**kwargs: Additional HDF5 file options
"""
def write(self, container, **kwargs):
"""
Write container to HDF5 file.
Args:
container: Container object to write
**kwargs: Write options including:
- cache_spec: Whether to cache specification (default: True)
- exhaust_dci: Whether to exhaust data chunk iterators
- link_data: Whether to link external data
"""
def read(self, **kwargs):
"""
Read container from HDF5 file.
Args:
**kwargs: Read options
Returns:
Container object loaded from file
"""
def export(self, src_io, container, **kwargs):
"""
Export container from another I/O source to this HDF5 file.
Args:
src_io: Source I/O object
container: Container to export
"""
def close(self):
"""Close HDF5 file and release resources."""
@property
def file(self):
"""Access to underlying h5py File object."""Configuration wrapper for customizing how data is written to HDF5 files.
class H5DataIO:
"""
HDF5 data I/O configuration wrapper for controlling storage options.
Provides fine-grained control over compression, chunking, filtering,
and other HDF5 dataset creation properties.
"""
def __init__(self, data, **kwargs):
"""
Initialize H5DataIO wrapper.
Args:
data: Data to be written
**kwargs: HDF5 dataset creation options:
- compression: Compression filter ('gzip', 'lzf', 'szip')
- compression_opts: Compression level (0-9 for gzip)
- shuffle: Enable shuffle filter for better compression
- fletcher32: Enable Fletcher32 checksum filter
- chunks: Chunk shape for datasets
- maxshape: Maximum shape for resizable datasets
- fillvalue: Fill value for uninitialized data
- track_times: Track dataset creation/modification times
"""
@property
def data(self):
"""Access to wrapped data."""
@property
def io_settings(self) -> dict:
"""Dictionary of I/O settings for this data."""Specialized classes for reading and writing HDMF specifications to HDF5 files.
class H5SpecWriter:
"""
Writer for HDMF specifications in HDF5 format.
Handles storage of namespace and specification information within HDF5 files.
"""
def __init__(self, io: HDF5IO):
"""
Initialize specification writer.
Args:
io: HDF5IO object for file access
"""
def write_spec(self, spec_catalog, spec_namespace):
"""
Write specification catalog and namespace to HDF5 file.
Args:
spec_catalog: Specification catalog to write
spec_namespace: Namespace information
"""
class H5SpecReader:
"""
Reader for HDMF specifications from HDF5 format.
Loads namespace and specification information from HDF5 files.
"""
def __init__(self, io: HDF5IO):
"""
Initialize specification reader.
Args:
io: HDF5IO object for file access
"""
def read_spec(self) -> tuple:
"""
Read specification from HDF5 file.
Returns:
Tuple of (spec_catalog, spec_namespace)
"""Utility functions and tools for working with HDF5 files and datasets.
class H5Dataset:
"""
Wrapper for HDF5 datasets providing enhanced functionality.
Adds HDMF-specific features to h5py dataset objects including
lazy loading, data transformation, and metadata handling.
"""
def __init__(self, dataset, io: HDF5IO, **kwargs):
"""
Initialize H5Dataset wrapper.
Args:
dataset: h5py dataset object
io: Parent HDF5IO object
"""
def __getitem__(self, key):
"""Get data slice from dataset."""
def __setitem__(self, key, value):
"""Set data slice in dataset."""
@property
def shape(self) -> tuple:
"""Shape of the dataset."""
@property
def dtype(self):
"""Data type of the dataset."""
@property
def size(self) -> int:
"""Total number of elements in dataset."""
# HDF5 utility functions
def get_h5_version() -> str:
"""
Get HDF5 library version.
Returns:
HDF5 version string
"""
def check_h5_version(min_version: str = None) -> bool:
"""
Check if HDF5 version meets minimum requirements.
Args:
min_version: Minimum required version
Returns:
True if version is sufficient
"""from hdmf.backends.hdf5 import HDF5IO, H5DataIO
from hdmf import Container, Data
import numpy as np
# Create sample data
data_array = np.random.randn(1000, 100)
data_container = Data(name='neural_data', data=data_array)
container = Container(name='experiment')
container.add_child(data_container)
# Write to HDF5 file
with HDF5IO('experiment.h5', mode='w') as io:
io.write(container)
# Read from HDF5 file
with HDF5IO('experiment.h5', mode='r') as io:
read_container = io.read()
print(f"Container: {read_container.name}")
print(f"Data shape: {read_container.neural_data.shape}")from hdmf.backends.hdf5 import H5DataIO
import numpy as np
# Create large dataset with compression
large_data = np.random.randn(10000, 1000)
# Configure compression and chunking
compressed_data = H5DataIO(
data=large_data,
compression='gzip',
compression_opts=9, # Maximum compression
shuffle=True, # Better compression for numeric data
fletcher32=True, # Checksums for data integrity
chunks=(1000, 100), # Chunk size for efficient access
maxshape=(None, 1000) # Allow resizing along first dimension
)
data_container = Data(name='compressed_data', data=compressed_data)
# Write with advanced options
with HDF5IO('compressed_experiment.h5', mode='w') as io:
io.write(container, cache_spec=True, exhaust_dci=False)from hdmf.backends.hdf5 import HDF5IO
from hdmf import Data
# Create external data reference
external_data = H5DataIO(
data='path/to/external/data.h5',
link_data=True # Link instead of copying
)
data_container = Data(name='external_data', data=external_data)
# Write with external links
with HDF5IO('main_file.h5', mode='w') as io:
io.write(container, link_data=True)from hdmf.backends.hdf5 import HDF5IO
# Open file in read mode
with HDF5IO('large_experiment.h5', mode='r') as io:
container = io.read()
# Access dataset without loading all data
dataset = container.neural_data.data
# Read specific slices
first_100_samples = dataset[:100, :]
specific_channels = dataset[:, [0, 5, 10]]
time_window = dataset[1000:2000, :]
print(f"Dataset shape: {dataset.shape}")
print(f"Slice shape: {first_100_samples.shape}")from hdmf.backends.hdf5 import HDF5IO, H5DataIO
import numpy as np
# Initial data with resizable configuration
initial_data = H5DataIO(
data=np.random.randn(100, 50),
maxshape=(None, 50), # Allow growth along first dimension
chunks=(10, 50)
)
data_container = Data(name='growing_data', data=initial_data)
# Write initial data
with HDF5IO('growing_experiment.h5', mode='w') as io:
io.write(container)
# Append new data
with HDF5IO('growing_experiment.h5', mode='a') as io:
container = io.read()
new_data = np.random.randn(50, 50)
# Append to existing dataset
container.growing_data.append(new_data)
# Write updated container
io.write(container)from hdmf.backends.hdf5 import HDF5IO
import os
def process_hdmf_file(input_path: str, output_path: str):
"""Process HDMF file across different platforms."""
# Read from any platform
with HDF5IO(input_path, mode='r') as src_io:
container = src_io.read()
# Process data
for child in container.children:
if hasattr(child, 'data'):
# Apply processing to data
processed_data = child.data * 1.5
child.data = processed_data
# Write to new location
with HDF5IO(output_path, mode='w') as dst_io:
dst_io.write(container, cache_spec=True)
print(f"Processed file written to: {output_path}")
# Cross-platform usage
if os.name == 'nt': # Windows
input_file = r'C:\data\experiment.h5'
output_file = r'C:\processed\experiment_processed.h5'
else: # Unix-like systems
input_file = '/data/experiment.h5'
output_file = '/processed/experiment_processed.h5'
process_hdmf_file(input_file, output_file)Install with Tessl CLI
npx tessl i tessl/pypi-hdmf