A hierarchical data modeling framework for modern science data standards
—
HDMF provides pre-built data structures for scientific data including dynamic tables, vector data, sparse matrices, and multi-container systems. These structures are automatically generated from specifications and provide standardized patterns for organizing complex scientific datasets.
Flexible table structures that can accommodate varying column types and dynamic schema evolution.
class DynamicTable(Container):
"""
Dynamic table implementation with flexible columns and metadata support.
Provides a table structure where columns can be added dynamically
and rows can contain different data types with full metadata preservation.
"""
def __init__(self, name: str, description: str, **kwargs):
"""
Initialize dynamic table.
Args:
name: Name of the table
description: Description of the table's purpose
**kwargs: Additional table properties:
- id: VectorData for row identifiers
- columns: List of VectorData columns
- colnames: List of column names
"""
def add_column(self, name: str, description: str, data=None, **kwargs):
"""
Add a column to the table.
Args:
name: Column name
description: Column description
data: Initial data for the column
**kwargs: Additional column properties:
- dtype: Data type for the column
- index: Whether this column needs an index
"""
def add_row(self, **kwargs):
"""
Add a row to the table.
Args:
**kwargs: Column values for the new row
"""
def get_column(self, name: str) -> 'VectorData':
"""
Get column by name.
Args:
name: Column name
Returns:
VectorData object for the column
"""
def to_dataframe(self):
"""
Convert table to pandas DataFrame.
Returns:
pandas.DataFrame representation of the table
"""
@classmethod
def from_dataframe(cls, df, name: str, **kwargs):
"""
Create DynamicTable from pandas DataFrame.
Args:
df: Source pandas DataFrame
name: Name for the new table
**kwargs: Additional table properties
Returns:
DynamicTable instance created from DataFrame
"""
def __getitem__(self, key):
"""Get rows or columns by index/name."""
def __len__(self) -> int:
"""Number of rows in the table."""
@property
def columns(self) -> tuple:
"""Tuple of column objects."""
@property
def colnames(self) -> tuple:
"""Tuple of column names."""
class AlignedDynamicTable(DynamicTable):
"""
Dynamic table with synchronized columns for related data.
Ensures that related columns maintain alignment and provides
specialized access patterns for multi-dimensional scientific data.
"""
def __init__(self, name: str, description: str, **kwargs):
"""
Initialize aligned dynamic table.
Args:
name: Name of the table
description: Description of the table
**kwargs: Additional properties:
- category_tables: Dictionary of related sub-tables
"""
def add_category_table(self, name: str, description: str, **kwargs):
"""
Add a category table for grouped data.
Args:
name: Category table name
description: Description of the category
"""
def get_category_table(self, name: str) -> DynamicTable:
"""
Get category table by name.
Args:
name: Category table name
Returns:
DynamicTable for the category
"""Core data structures for storing and indexing vector data with support for ragged arrays.
class VectorData(Data):
"""
Vector data implementation for table columns and array data.
Stores 1D array data with metadata and provides indexing capabilities
for both regular and ragged array structures.
"""
def __init__(self, name: str, description: str, data, **kwargs):
"""
Initialize vector data.
Args:
name: Name of the vector data
description: Description of the data
data: Array-like data content
**kwargs: Additional properties:
- unit: Unit of measurement
- resolution: Data resolution
- conversion: Conversion factor
"""
def append(self, data):
"""
Append data to the vector.
Args:
data: Data to append
"""
def extend(self, data):
"""
Extend vector with iterable data.
Args:
data: Iterable data to extend with
"""
@property
def unit(self) -> str:
"""Unit of measurement for the data."""
@property
def resolution(self) -> float:
"""Resolution of the data."""
class VectorIndex(VectorData):
"""
Vector index implementation for indexing into ragged arrays.
Provides indexing capabilities for VectorData that contains
variable-length elements, enabling efficient access to ragged data structures.
"""
def __init__(self, name: str, data, target: VectorData, **kwargs):
"""
Initialize vector index.
Args:
name: Name of the index
data: Index data (cumulative counts)
target: Target VectorData being indexed
**kwargs: Additional properties
"""
def __getitem__(self, key):
"""Get indexed data slice."""
def add_vector(self, data):
"""
Add a vector to the indexed data.
Args:
data: Vector data to add
"""
@property
def target(self) -> VectorData:
"""Target VectorData being indexed."""
class ElementIdentifiers(Data):
"""
Element identifier implementation for unique element tracking.
Stores unique identifiers for data elements, enabling
cross-referencing and relationship tracking within datasets.
"""
def __init__(self, name: str = 'element_id', data=None, **kwargs):
"""
Initialize element identifiers.
Args:
name: Name for the identifiers (default: 'element_id')
data: Initial identifier data
"""
def add_ref(self, container):
"""
Add reference to a container.
Args:
container: Container to reference
Returns:
Identifier for the reference
"""Specialized structures for referencing and linking table data.
class DynamicTableRegion(VectorData):
"""
Dynamic table region for referencing rows in DynamicTable objects.
Enables creation of references to specific rows or ranges of rows
in DynamicTable instances, supporting complex data relationships.
"""
def __init__(self, name: str, data, description: str, table: DynamicTable, **kwargs):
"""
Initialize dynamic table region.
Args:
name: Name of the region
data: Row indices or boolean mask
description: Description of the region
table: Target DynamicTable being referenced
"""
@property
def table(self) -> DynamicTable:
"""Target table being referenced."""
def get_referenced_tables(self) -> list:
"""
Get list of tables referenced by this region.
Returns:
List of DynamicTable instances
"""
def __getitem__(self, key):
"""Get referenced rows."""Efficient storage and manipulation of sparse data matrices.
class CSRMatrix(Container):
"""
Compressed Sparse Row matrix implementation.
Provides memory-efficient storage for sparse matrices using
the CSR (Compressed Sparse Row) format with full metadata support.
"""
def __init__(self, data, indices, indptr, shape: tuple, **kwargs):
"""
Initialize CSR matrix.
Args:
data: Non-zero values array
indices: Column indices for non-zero values
indptr: Index pointers for row starts
shape: Shape of the full matrix (rows, cols)
**kwargs: Additional properties:
- name: Name for the matrix
- description: Matrix description
"""
def to_scipy_sparse(self):
"""
Convert to scipy sparse matrix.
Returns:
scipy.sparse.csr_matrix instance
"""
def to_dense(self):
"""
Convert to dense numpy array.
Returns:
Dense numpy array representation
"""
@classmethod
def from_scipy_sparse(cls, sparse_matrix, **kwargs):
"""
Create CSRMatrix from scipy sparse matrix.
Args:
sparse_matrix: scipy sparse matrix
**kwargs: Additional properties
Returns:
CSRMatrix instance
"""
@property
def data(self):
"""Non-zero values array."""
@property
def indices(self):
"""Column indices array."""
@property
def indptr(self):
"""Index pointers array."""
@property
def shape(self) -> tuple:
"""Shape of the matrix."""
@property
def nnz(self) -> int:
"""Number of non-zero elements."""Specialized containers for managing collections of related objects.
class SimpleMultiContainer(Container, MultiContainerInterface):
"""
Simple multi-container implementation for holding multiple objects.
Provides a straightforward container for managing collections
of related objects with dictionary-like access patterns.
"""
def __init__(self, name: str, **kwargs):
"""
Initialize simple multi-container.
Args:
name: Name of the container
**kwargs: Additional container properties
"""
def add_container(self, container: Container):
"""
Add a container to the collection.
Args:
container: Container to add
"""
def get_container(self, name: str) -> Container:
"""
Get container by name.
Args:
name: Container name
Returns:
Container object
"""
def __iter__(self):
"""Iterate over contained objects."""
def __len__(self) -> int:
"""Number of contained objects."""Experimental and specialized data structures for advanced use cases.
class EnumData(VectorData):
"""
Enumeration data (experimental) for categorical data with controlled vocabularies.
Stores categorical data with predefined value sets and provides
validation and conversion capabilities for enumerated types.
"""
def __init__(self, name: str, description: str, data, elements: list, **kwargs):
"""
Initialize enumeration data.
Args:
name: Name of the enumeration data
description: Description of the data
data: Enumeration values (indices or strings)
elements: List of allowed enumeration elements
"""
@property
def elements(self) -> tuple:
"""Tuple of allowed enumeration elements."""
def add_element(self, element: str):
"""
Add allowed element to enumeration.
Args:
element: Element to add
"""
class HERD(Container):
"""
Hierarchical External Resource Descriptor (experimental).
Provides structured metadata for external resources and their
relationships within the data hierarchy.
"""
def __init__(self, **kwargs):
"""
Initialize HERD container.
Args:
**kwargs: HERD properties and metadata
"""
def add_resource(self, resource_spec: dict):
"""
Add external resource specification.
Args:
resource_spec: Dictionary describing the resource
"""Functions for registering and managing common data types.
def register_class(neurodata_type: str, namespace: str, container_cls):
"""
Register container class for a data type.
Args:
neurodata_type: Name of the data type
namespace: Namespace containing the type
container_cls: Container class to register
"""
def register_map(container_cls, mapper_cls):
"""
Register object mapper for a container class.
Args:
container_cls: Container class
mapper_cls: Mapper class for serialization
"""
def get_class(neurodata_type: str, namespace: str = 'hdmf-common'):
"""
Get container class for a data type.
Args:
neurodata_type: Name of the data type
namespace: Namespace (default: 'hdmf-common')
Returns:
Container class for the data type
"""
def get_type_map():
"""
Get type map with HDMF-common extensions.
Returns:
TypeMap instance with common data types registered
"""
def get_manager():
"""
Get build manager with common data types.
Returns:
BuildManager instance configured for common types
"""
# Constants
CORE_NAMESPACE = 'hdmf-common' # Core namespace identifier
EXP_NAMESPACE = 'hdmf-experimental' # Experimental namespace identifierfrom hdmf.common import DynamicTable, VectorData
import numpy as np
# Create dynamic table
subjects_table = DynamicTable(
name='subjects',
description='Information about experimental subjects'
)
# Add columns
subjects_table.add_column('subject_id', 'Unique subject identifier')
subjects_table.add_column('age', 'Age in months', dtype='int')
subjects_table.add_column('weight', 'Weight in grams', dtype='float')
subjects_table.add_column('genotype', 'Genetic background')
# Add rows
subjects_table.add_row(subject_id='mouse_001', age=8, weight=25.3, genotype='WT')
subjects_table.add_row(subject_id='mouse_002', age=10, weight=27.1, genotype='KO')
subjects_table.add_row(subject_id='mouse_003', age=9, weight=24.8, genotype='WT')
# Access data
print(f"Table has {len(subjects_table)} rows")
print(f"Columns: {subjects_table.colnames}")
# Convert to DataFrame
df = subjects_table.to_dataframe()
print(df.head())
# Access specific columns
ages = subjects_table.get_column('age').data
print(f"Ages: {ages}")from hdmf.common import VectorData, VectorIndex
# Create ragged data (variable-length spike trains)
spike_data = [
[0.1, 0.3, 0.7, 1.2], # Trial 1: 4 spikes
[0.2, 0.8], # Trial 2: 2 spikes
[0.05, 0.4, 0.6, 0.9, 1.1], # Trial 3: 5 spikes
]
# Flatten data and create cumulative indices
flattened_spikes = []
indices = []
for trial_spikes in spike_data:
flattened_spikes.extend(trial_spikes)
indices.append(len(flattened_spikes))
# Create VectorData and VectorIndex
spike_times = VectorData(
name='spike_times',
description='Spike timestamps in seconds',
data=flattened_spikes
)
spike_index = VectorIndex(
name='spike_times_index',
data=indices,
target=spike_times
)
# Access ragged data by trial
trial_0_spikes = spike_index[0] # [0.1, 0.3, 0.7, 1.2]
trial_1_spikes = spike_index[1] # [0.2, 0.8]
trial_2_spikes = spike_index[2] # [0.05, 0.4, 0.6, 0.9, 1.1]
print(f"Trial 0 spikes: {trial_0_spikes}")
print(f"Trial 1 spikes: {trial_1_spikes}")from hdmf.common import CSRMatrix
import numpy as np
from scipy import sparse
# Create sparse data
row = np.array([0, 0, 1, 2, 2, 2])
col = np.array([0, 2, 1, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
# Create scipy sparse matrix
scipy_matrix = sparse.csr_matrix((data, (row, col)), shape=(3, 3))
# Convert to HDMF CSRMatrix
hdmf_matrix = CSRMatrix.from_scipy_sparse(
scipy_matrix,
name='connectivity_matrix',
description='Neural connectivity matrix'
)
print(f"Matrix shape: {hdmf_matrix.shape}")
print(f"Non-zero elements: {hdmf_matrix.nnz}")
# Convert back to dense for visualization
dense_matrix = hdmf_matrix.to_dense()
print("Dense representation:")
print(dense_matrix)from hdmf.common import DynamicTable, DynamicTableRegion
# Create source table
neurons_table = DynamicTable(
name='neurons',
description='Information about recorded neurons'
)
neurons_table.add_column('neuron_id', 'Unique neuron identifier')
neurons_table.add_column('brain_area', 'Brain area location')
neurons_table.add_column('cell_type', 'Cell type classification')
# Add neurons
for i in range(10):
neurons_table.add_row(
neuron_id=f'neuron_{i:03d}',
brain_area='CA1' if i < 5 else 'CA3',
cell_type='pyramidal' if i % 2 == 0 else 'interneuron'
)
# Create region referencing subset of neurons
ca1_neurons = DynamicTableRegion(
name='ca1_neurons',
data=[0, 1, 2, 3, 4], # Row indices for CA1 neurons
description='Neurons recorded from CA1 region',
table=neurons_table
)
# Access referenced data
referenced_neurons = ca1_neurons[:]
print(f"CA1 neurons: {len(referenced_neurons)} neurons")
# Use region in analysis table
analysis_table = DynamicTable(
name='spike_analysis',
description='Spike analysis results'
)
analysis_table.add_column('neurons', 'Analyzed neurons', data=[ca1_neurons])
analysis_table.add_column('firing_rate', 'Average firing rate')
analysis_table.add_row(neurons=ca1_neurons, firing_rate=15.3)from hdmf.common import AlignedDynamicTable
# Create aligned table for multi-modal data
session_data = AlignedDynamicTable(
name='session_data',
description='Aligned behavioral and neural data'
)
# Add main columns
session_data.add_column('timestamp', 'Time in seconds', dtype='float')
session_data.add_column('behavior', 'Behavioral state')
# Add category table for neural data
session_data.add_category_table(
name='neural',
description='Neural recording data'
)
neural_table = session_data.get_category_table('neural')
neural_table.add_column('spike_count', 'Number of spikes', dtype='int')
neural_table.add_column('lfp_power', 'LFP power', dtype='float')
# Add category table for stimulus data
session_data.add_category_table(
name='stimulus',
description='Stimulus presentation data'
)
stimulus_table = session_data.get_category_table('stimulus')
stimulus_table.add_column('stimulus_type', 'Type of stimulus')
stimulus_table.add_column('intensity', 'Stimulus intensity', dtype='float')
# Add synchronized data
session_data.add_row(
timestamp=1.0,
behavior='running',
neural={'spike_count': 5, 'lfp_power': 0.23},
stimulus={'stimulus_type': 'visual', 'intensity': 0.8}
)
print(f"Session data columns: {session_data.colnames}")
print(f"Neural category columns: {neural_table.colnames}")from hdmf.common import EnumData
# Create enumeration for behavioral states
behavior_states = EnumData(
name='behavior_states',
description='Behavioral state classifications',
data=[0, 1, 2, 1, 0, 2, 1], # Indices into elements
elements=['rest', 'locomotion', 'grooming']
)
# Access enumerated values
print(f"Behavior elements: {behavior_states.elements}")
print(f"Behavior data: {behavior_states.data}")
# Could also use string data directly
string_behavior = EnumData(
name='string_behavior',
description='String-based behavioral states',
data=['rest', 'locomotion', 'grooming', 'locomotion'],
elements=['rest', 'locomotion', 'grooming']
)
print(f"String behavior: {string_behavior.data}")Install with Tessl CLI
npx tessl i tessl/pypi-hdmf