tessl/pypi-hdmf

A hierarchical data modeling framework for modern science data standards

—

Pending

Overview

Eval results

Files

Common Data Structures

Name: tessl/pypi-hdmf
Author: tessl

HDMF provides pre-built data structures for scientific data including dynamic tables, vector data, sparse matrices, and multi-container systems. These structures are automatically generated from specifications and provide standardized patterns for organizing complex scientific datasets.

Capabilities

Dynamic Tables

Flexible table structures that can accommodate varying column types and dynamic schema evolution.

class DynamicTable(Container):
    """
    Dynamic table implementation with flexible columns and metadata support.
    
    Provides a table structure where columns can be added dynamically
    and rows can contain different data types with full metadata preservation.
    """
    
    def __init__(self, name: str, description: str, **kwargs):
        """
        Initialize dynamic table.
        
        Args:
            name: Name of the table
            description: Description of the table's purpose
            **kwargs: Additional table properties:
                - id: VectorData for row identifiers
                - columns: List of VectorData columns
                - colnames: List of column names
        """
    
    def add_column(self, name: str, description: str, data=None, **kwargs):
        """
        Add a column to the table.
        
        Args:
            name: Column name
            description: Column description
            data: Initial data for the column
            **kwargs: Additional column properties:
                - dtype: Data type for the column
                - index: Whether this column needs an index
        """
    
    def add_row(self, **kwargs):
        """
        Add a row to the table.
        
        Args:
            **kwargs: Column values for the new row
        """
    
    def get_column(self, name: str) -> 'VectorData':
        """
        Get column by name.
        
        Args:
            name: Column name
            
        Returns:
            VectorData object for the column
        """
    
    def to_dataframe(self):
        """
        Convert table to pandas DataFrame.
        
        Returns:
            pandas.DataFrame representation of the table
        """
    
    @classmethod
    def from_dataframe(cls, df, name: str, **kwargs):
        """
        Create DynamicTable from pandas DataFrame.
        
        Args:
            df: Source pandas DataFrame
            name: Name for the new table
            **kwargs: Additional table properties
            
        Returns:
            DynamicTable instance created from DataFrame
        """
    
    def __getitem__(self, key):
        """Get rows or columns by index/name."""
    
    def __len__(self) -> int:
        """Number of rows in the table."""
    
    @property
    def columns(self) -> tuple:
        """Tuple of column objects."""
    
    @property
    def colnames(self) -> tuple:
        """Tuple of column names."""

class AlignedDynamicTable(DynamicTable):
    """
    Dynamic table with synchronized columns for related data.
    
    Ensures that related columns maintain alignment and provides
    specialized access patterns for multi-dimensional scientific data.
    """
    
    def __init__(self, name: str, description: str, **kwargs):
        """
        Initialize aligned dynamic table.
        
        Args:
            name: Name of the table
            description: Description of the table
            **kwargs: Additional properties:
                - category_tables: Dictionary of related sub-tables
        """
    
    def add_category_table(self, name: str, description: str, **kwargs):
        """
        Add a category table for grouped data.
        
        Args:
            name: Category table name
            description: Description of the category
        """
    
    def get_category_table(self, name: str) -> DynamicTable:
        """
        Get category table by name.
        
        Args:
            name: Category table name
            
        Returns:
            DynamicTable for the category
        """

Vector Data Structures

Core data structures for storing and indexing vector data with support for ragged arrays.

class VectorData(Data):
    """
    Vector data implementation for table columns and array data.
    
    Stores 1D array data with metadata and provides indexing capabilities
    for both regular and ragged array structures.
    """
    
    def __init__(self, name: str, description: str, data, **kwargs):
        """
        Initialize vector data.
        
        Args:
            name: Name of the vector data
            description: Description of the data
            data: Array-like data content
            **kwargs: Additional properties:
                - unit: Unit of measurement
                - resolution: Data resolution
                - conversion: Conversion factor
        """
    
    def append(self, data):
        """
        Append data to the vector.
        
        Args:
            data: Data to append
        """
    
    def extend(self, data):
        """
        Extend vector with iterable data.
        
        Args:
            data: Iterable data to extend with
        """
    
    @property
    def unit(self) -> str:
        """Unit of measurement for the data."""
    
    @property
    def resolution(self) -> float:
        """Resolution of the data."""

class VectorIndex(VectorData):
    """
    Vector index implementation for indexing into ragged arrays.
    
    Provides indexing capabilities for VectorData that contains
    variable-length elements, enabling efficient access to ragged data structures.
    """
    
    def __init__(self, name: str, data, target: VectorData, **kwargs):
        """
        Initialize vector index.
        
        Args:
            name: Name of the index
            data: Index data (cumulative counts)
            target: Target VectorData being indexed
            **kwargs: Additional properties
        """
    
    def __getitem__(self, key):
        """Get indexed data slice."""
    
    def add_vector(self, data):
        """
        Add a vector to the indexed data.
        
        Args:
            data: Vector data to add
        """
    
    @property
    def target(self) -> VectorData:
        """Target VectorData being indexed."""

class ElementIdentifiers(Data):
    """
    Element identifier implementation for unique element tracking.
    
    Stores unique identifiers for data elements, enabling
    cross-referencing and relationship tracking within datasets.
    """
    
    def __init__(self, name: str = 'element_id', data=None, **kwargs):
        """
        Initialize element identifiers.
        
        Args:
            name: Name for the identifiers (default: 'element_id')
            data: Initial identifier data
        """
    
    def add_ref(self, container):
        """
        Add reference to a container.
        
        Args:
            container: Container to reference
            
        Returns:
            Identifier for the reference
        """

Table Regions and References

Specialized structures for referencing and linking table data.

class DynamicTableRegion(VectorData):
    """
    Dynamic table region for referencing rows in DynamicTable objects.
    
    Enables creation of references to specific rows or ranges of rows
    in DynamicTable instances, supporting complex data relationships.
    """
    
    def __init__(self, name: str, data, description: str, table: DynamicTable, **kwargs):
        """
        Initialize dynamic table region.
        
        Args:
            name: Name of the region
            data: Row indices or boolean mask
            description: Description of the region
            table: Target DynamicTable being referenced
        """
    
    @property
    def table(self) -> DynamicTable:
        """Target table being referenced."""
    
    def get_referenced_tables(self) -> list:
        """
        Get list of tables referenced by this region.
        
        Returns:
            List of DynamicTable instances
        """
    
    def __getitem__(self, key):
        """Get referenced rows."""

Sparse Data Structures

Efficient storage and manipulation of sparse data matrices.

class CSRMatrix(Container):
    """
    Compressed Sparse Row matrix implementation.
    
    Provides memory-efficient storage for sparse matrices using
    the CSR (Compressed Sparse Row) format with full metadata support.
    """
    
    def __init__(self, data, indices, indptr, shape: tuple, **kwargs):
        """
        Initialize CSR matrix.
        
        Args:
            data: Non-zero values array
            indices: Column indices for non-zero values
            indptr: Index pointers for row starts
            shape: Shape of the full matrix (rows, cols)
            **kwargs: Additional properties:
                - name: Name for the matrix
                - description: Matrix description
        """
    
    def to_scipy_sparse(self):
        """
        Convert to scipy sparse matrix.
        
        Returns:
            scipy.sparse.csr_matrix instance
        """
    
    def to_dense(self):
        """
        Convert to dense numpy array.
        
        Returns:
            Dense numpy array representation
        """
    
    @classmethod
    def from_scipy_sparse(cls, sparse_matrix, **kwargs):
        """
        Create CSRMatrix from scipy sparse matrix.
        
        Args:
            sparse_matrix: scipy sparse matrix
            **kwargs: Additional properties
            
        Returns:
            CSRMatrix instance
        """
    
    @property
    def data(self):
        """Non-zero values array."""
    
    @property
    def indices(self):
        """Column indices array."""
    
    @property
    def indptr(self):
        """Index pointers array."""
    
    @property
    def shape(self) -> tuple:
        """Shape of the matrix."""
    
    @property
    def nnz(self) -> int:
        """Number of non-zero elements."""

Multi-Container Systems

Specialized containers for managing collections of related objects.

class SimpleMultiContainer(Container, MultiContainerInterface):
    """
    Simple multi-container implementation for holding multiple objects.
    
    Provides a straightforward container for managing collections
    of related objects with dictionary-like access patterns.
    """
    
    def __init__(self, name: str, **kwargs):
        """
        Initialize simple multi-container.
        
        Args:
            name: Name of the container
            **kwargs: Additional container properties
        """
    
    def add_container(self, container: Container):
        """
        Add a container to the collection.
        
        Args:
            container: Container to add
        """
    
    def get_container(self, name: str) -> Container:
        """
        Get container by name.
        
        Args:
            name: Container name
            
        Returns:
            Container object
        """
    
    def __iter__(self):
        """Iterate over contained objects."""
    
    def __len__(self) -> int:
        """Number of contained objects."""

Experimental Data Types

Experimental and specialized data structures for advanced use cases.

class EnumData(VectorData):
    """
    Enumeration data (experimental) for categorical data with controlled vocabularies.
    
    Stores categorical data with predefined value sets and provides
    validation and conversion capabilities for enumerated types.
    """
    
    def __init__(self, name: str, description: str, data, elements: list, **kwargs):
        """
        Initialize enumeration data.
        
        Args:
            name: Name of the enumeration data
            description: Description of the data
            data: Enumeration values (indices or strings)
            elements: List of allowed enumeration elements
        """
    
    @property
    def elements(self) -> tuple:
        """Tuple of allowed enumeration elements."""
    
    def add_element(self, element: str):
        """
        Add allowed element to enumeration.
        
        Args:
            element: Element to add
        """

class HERD(Container):
    """
    Hierarchical External Resource Descriptor (experimental).
    
    Provides structured metadata for external resources and their
    relationships within the data hierarchy.
    """
    
    def __init__(self, **kwargs):
        """
        Initialize HERD container.
        
        Args:
            **kwargs: HERD properties and metadata
        """
    
    def add_resource(self, resource_spec: dict):
        """
        Add external resource specification.
        
        Args:
            resource_spec: Dictionary describing the resource
        """

Registration and Management Functions

Functions for registering and managing common data types.

def register_class(neurodata_type: str, namespace: str, container_cls):
    """
    Register container class for a data type.
    
    Args:
        neurodata_type: Name of the data type
        namespace: Namespace containing the type
        container_cls: Container class to register
    """

def register_map(container_cls, mapper_cls):
    """
    Register object mapper for a container class.
    
    Args:
        container_cls: Container class
        mapper_cls: Mapper class for serialization
    """

def get_class(neurodata_type: str, namespace: str = 'hdmf-common'):
    """
    Get container class for a data type.
    
    Args:
        neurodata_type: Name of the data type
        namespace: Namespace (default: 'hdmf-common')
        
    Returns:
        Container class for the data type
    """

def get_type_map():
    """
    Get type map with HDMF-common extensions.
    
    Returns:
        TypeMap instance with common data types registered
    """

def get_manager():
    """
    Get build manager with common data types.
    
    Returns:
        BuildManager instance configured for common types
    """

# Constants
CORE_NAMESPACE = 'hdmf-common'      # Core namespace identifier
EXP_NAMESPACE = 'hdmf-experimental' # Experimental namespace identifier

Usage Examples

Creating and Using Dynamic Tables

from hdmf.common import DynamicTable, VectorData
import numpy as np

# Create dynamic table
subjects_table = DynamicTable(
    name='subjects',
    description='Information about experimental subjects'
)

# Add columns
subjects_table.add_column('subject_id', 'Unique subject identifier')
subjects_table.add_column('age', 'Age in months', dtype='int')
subjects_table.add_column('weight', 'Weight in grams', dtype='float')
subjects_table.add_column('genotype', 'Genetic background')

# Add rows
subjects_table.add_row(subject_id='mouse_001', age=8, weight=25.3, genotype='WT')
subjects_table.add_row(subject_id='mouse_002', age=10, weight=27.1, genotype='KO')
subjects_table.add_row(subject_id='mouse_003', age=9, weight=24.8, genotype='WT')

# Access data
print(f"Table has {len(subjects_table)} rows")
print(f"Columns: {subjects_table.colnames}")

# Convert to DataFrame
df = subjects_table.to_dataframe()
print(df.head())

# Access specific columns
ages = subjects_table.get_column('age').data
print(f"Ages: {ages}")

Working with Ragged Arrays Using Vector Indices

from hdmf.common import VectorData, VectorIndex

# Create ragged data (variable-length spike trains)
spike_data = [
    [0.1, 0.3, 0.7, 1.2],        # Trial 1: 4 spikes
    [0.2, 0.8],                   # Trial 2: 2 spikes  
    [0.05, 0.4, 0.6, 0.9, 1.1],  # Trial 3: 5 spikes
]

# Flatten data and create cumulative indices
flattened_spikes = []
indices = []
for trial_spikes in spike_data:
    flattened_spikes.extend(trial_spikes)
    indices.append(len(flattened_spikes))

# Create VectorData and VectorIndex
spike_times = VectorData(
    name='spike_times',
    description='Spike timestamps in seconds',
    data=flattened_spikes
)

spike_index = VectorIndex(
    name='spike_times_index', 
    data=indices,
    target=spike_times
)

# Access ragged data by trial
trial_0_spikes = spike_index[0]  # [0.1, 0.3, 0.7, 1.2]
trial_1_spikes = spike_index[1]  # [0.2, 0.8]
trial_2_spikes = spike_index[2]  # [0.05, 0.4, 0.6, 0.9, 1.1]

print(f"Trial 0 spikes: {trial_0_spikes}")
print(f"Trial 1 spikes: {trial_1_spikes}")

Creating Sparse Matrices

from hdmf.common import CSRMatrix
import numpy as np
from scipy import sparse

# Create sparse data
row = np.array([0, 0, 1, 2, 2, 2])
col = np.array([0, 2, 1, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])

# Create scipy sparse matrix
scipy_matrix = sparse.csr_matrix((data, (row, col)), shape=(3, 3))

# Convert to HDMF CSRMatrix
hdmf_matrix = CSRMatrix.from_scipy_sparse(
    scipy_matrix,
    name='connectivity_matrix',
    description='Neural connectivity matrix'
)

print(f"Matrix shape: {hdmf_matrix.shape}")
print(f"Non-zero elements: {hdmf_matrix.nnz}")

# Convert back to dense for visualization
dense_matrix = hdmf_matrix.to_dense()
print("Dense representation:")
print(dense_matrix)

Using Dynamic Table Regions for References

from hdmf.common import DynamicTable, DynamicTableRegion

# Create source table
neurons_table = DynamicTable(
    name='neurons',
    description='Information about recorded neurons'
)

neurons_table.add_column('neuron_id', 'Unique neuron identifier')
neurons_table.add_column('brain_area', 'Brain area location')
neurons_table.add_column('cell_type', 'Cell type classification')

# Add neurons
for i in range(10):
    neurons_table.add_row(
        neuron_id=f'neuron_{i:03d}',
        brain_area='CA1' if i < 5 else 'CA3',
        cell_type='pyramidal' if i % 2 == 0 else 'interneuron'
    )

# Create region referencing subset of neurons
ca1_neurons = DynamicTableRegion(
    name='ca1_neurons',
    data=[0, 1, 2, 3, 4],  # Row indices for CA1 neurons
    description='Neurons recorded from CA1 region',
    table=neurons_table
)

# Access referenced data
referenced_neurons = ca1_neurons[:]
print(f"CA1 neurons: {len(referenced_neurons)} neurons")

# Use region in analysis table
analysis_table = DynamicTable(
    name='spike_analysis',
    description='Spike analysis results'
)

analysis_table.add_column('neurons', 'Analyzed neurons', data=[ca1_neurons])
analysis_table.add_column('firing_rate', 'Average firing rate')

analysis_table.add_row(neurons=ca1_neurons, firing_rate=15.3)

Creating Aligned Dynamic Tables

from hdmf.common import AlignedDynamicTable

# Create aligned table for multi-modal data
session_data = AlignedDynamicTable(
    name='session_data',
    description='Aligned behavioral and neural data'
)

# Add main columns
session_data.add_column('timestamp', 'Time in seconds', dtype='float')
session_data.add_column('behavior', 'Behavioral state')

# Add category table for neural data
session_data.add_category_table(
    name='neural',
    description='Neural recording data'
)
neural_table = session_data.get_category_table('neural')
neural_table.add_column('spike_count', 'Number of spikes', dtype='int')
neural_table.add_column('lfp_power', 'LFP power', dtype='float')

# Add category table for stimulus data  
session_data.add_category_table(
    name='stimulus',
    description='Stimulus presentation data'
)
stimulus_table = session_data.get_category_table('stimulus')
stimulus_table.add_column('stimulus_type', 'Type of stimulus')
stimulus_table.add_column('intensity', 'Stimulus intensity', dtype='float')

# Add synchronized data
session_data.add_row(
    timestamp=1.0,
    behavior='running',
    neural={'spike_count': 5, 'lfp_power': 0.23},
    stimulus={'stimulus_type': 'visual', 'intensity': 0.8}
)

print(f"Session data columns: {session_data.colnames}")
print(f"Neural category columns: {neural_table.colnames}")

Working with Enumerated Data

from hdmf.common import EnumData

# Create enumeration for behavioral states  
behavior_states = EnumData(
    name='behavior_states',
    description='Behavioral state classifications',
    data=[0, 1, 2, 1, 0, 2, 1],  # Indices into elements
    elements=['rest', 'locomotion', 'grooming']
)

# Access enumerated values
print(f"Behavior elements: {behavior_states.elements}")
print(f"Behavior data: {behavior_states.data}")

# Could also use string data directly
string_behavior = EnumData(
    name='string_behavior',
    description='String-based behavioral states',
    data=['rest', 'locomotion', 'grooming', 'locomotion'],
    elements=['rest', 'locomotion', 'grooming']
)

print(f"String behavior: {string_behavior.data}")

Install with Tessl CLI