0
# Common Data Structures
1
2
HDMF provides pre-built data structures for scientific data including dynamic tables, vector data, sparse matrices, and multi-container systems. These structures are automatically generated from specifications and provide standardized patterns for organizing complex scientific datasets.
3
4
## Capabilities
5
6
### Dynamic Tables
7
8
Flexible table structures that can accommodate varying column types and dynamic schema evolution.
9
10
```python { .api }
11
class DynamicTable(Container):
12
"""
13
Dynamic table implementation with flexible columns and metadata support.
14
15
Provides a table structure where columns can be added dynamically
16
and rows can contain different data types with full metadata preservation.
17
"""
18
19
def __init__(self, name: str, description: str, **kwargs):
20
"""
21
Initialize dynamic table.
22
23
Args:
24
name: Name of the table
25
description: Description of the table's purpose
26
**kwargs: Additional table properties:
27
- id: VectorData for row identifiers
28
- columns: List of VectorData columns
29
- colnames: List of column names
30
"""
31
32
def add_column(self, name: str, description: str, data=None, **kwargs):
33
"""
34
Add a column to the table.
35
36
Args:
37
name: Column name
38
description: Column description
39
data: Initial data for the column
40
**kwargs: Additional column properties:
41
- dtype: Data type for the column
42
- index: Whether this column needs an index
43
"""
44
45
def add_row(self, **kwargs):
46
"""
47
Add a row to the table.
48
49
Args:
50
**kwargs: Column values for the new row
51
"""
52
53
def get_column(self, name: str) -> 'VectorData':
54
"""
55
Get column by name.
56
57
Args:
58
name: Column name
59
60
Returns:
61
VectorData object for the column
62
"""
63
64
def to_dataframe(self):
65
"""
66
Convert table to pandas DataFrame.
67
68
Returns:
69
pandas.DataFrame representation of the table
70
"""
71
72
@classmethod
73
def from_dataframe(cls, df, name: str, **kwargs):
74
"""
75
Create DynamicTable from pandas DataFrame.
76
77
Args:
78
df: Source pandas DataFrame
79
name: Name for the new table
80
**kwargs: Additional table properties
81
82
Returns:
83
DynamicTable instance created from DataFrame
84
"""
85
86
def __getitem__(self, key):
87
"""Get rows or columns by index/name."""
88
89
def __len__(self) -> int:
90
"""Number of rows in the table."""
91
92
@property
93
def columns(self) -> tuple:
94
"""Tuple of column objects."""
95
96
@property
97
def colnames(self) -> tuple:
98
"""Tuple of column names."""
99
100
class AlignedDynamicTable(DynamicTable):
101
"""
102
Dynamic table with synchronized columns for related data.
103
104
Ensures that related columns maintain alignment and provides
105
specialized access patterns for multi-dimensional scientific data.
106
"""
107
108
def __init__(self, name: str, description: str, **kwargs):
109
"""
110
Initialize aligned dynamic table.
111
112
Args:
113
name: Name of the table
114
description: Description of the table
115
**kwargs: Additional properties:
116
- category_tables: Dictionary of related sub-tables
117
"""
118
119
def add_category_table(self, name: str, description: str, **kwargs):
120
"""
121
Add a category table for grouped data.
122
123
Args:
124
name: Category table name
125
description: Description of the category
126
"""
127
128
def get_category_table(self, name: str) -> DynamicTable:
129
"""
130
Get category table by name.
131
132
Args:
133
name: Category table name
134
135
Returns:
136
DynamicTable for the category
137
"""
138
```
139
140
### Vector Data Structures
141
142
Core data structures for storing and indexing vector data with support for ragged arrays.
143
144
```python { .api }
145
class VectorData(Data):
146
"""
147
Vector data implementation for table columns and array data.
148
149
Stores 1D array data with metadata and provides indexing capabilities
150
for both regular and ragged array structures.
151
"""
152
153
def __init__(self, name: str, description: str, data, **kwargs):
154
"""
155
Initialize vector data.
156
157
Args:
158
name: Name of the vector data
159
description: Description of the data
160
data: Array-like data content
161
**kwargs: Additional properties:
162
- unit: Unit of measurement
163
- resolution: Data resolution
164
- conversion: Conversion factor
165
"""
166
167
def append(self, data):
168
"""
169
Append data to the vector.
170
171
Args:
172
data: Data to append
173
"""
174
175
def extend(self, data):
176
"""
177
Extend vector with iterable data.
178
179
Args:
180
data: Iterable data to extend with
181
"""
182
183
@property
184
def unit(self) -> str:
185
"""Unit of measurement for the data."""
186
187
@property
188
def resolution(self) -> float:
189
"""Resolution of the data."""
190
191
class VectorIndex(VectorData):
192
"""
193
Vector index implementation for indexing into ragged arrays.
194
195
Provides indexing capabilities for VectorData that contains
196
variable-length elements, enabling efficient access to ragged data structures.
197
"""
198
199
def __init__(self, name: str, data, target: VectorData, **kwargs):
200
"""
201
Initialize vector index.
202
203
Args:
204
name: Name of the index
205
data: Index data (cumulative counts)
206
target: Target VectorData being indexed
207
**kwargs: Additional properties
208
"""
209
210
def __getitem__(self, key):
211
"""Get indexed data slice."""
212
213
def add_vector(self, data):
214
"""
215
Add a vector to the indexed data.
216
217
Args:
218
data: Vector data to add
219
"""
220
221
@property
222
def target(self) -> VectorData:
223
"""Target VectorData being indexed."""
224
225
class ElementIdentifiers(Data):
226
"""
227
Element identifier implementation for unique element tracking.
228
229
Stores unique identifiers for data elements, enabling
230
cross-referencing and relationship tracking within datasets.
231
"""
232
233
def __init__(self, name: str = 'element_id', data=None, **kwargs):
234
"""
235
Initialize element identifiers.
236
237
Args:
238
name: Name for the identifiers (default: 'element_id')
239
data: Initial identifier data
240
"""
241
242
def add_ref(self, container):
243
"""
244
Add reference to a container.
245
246
Args:
247
container: Container to reference
248
249
Returns:
250
Identifier for the reference
251
"""
252
```
253
254
### Table Regions and References
255
256
Specialized structures for referencing and linking table data.
257
258
```python { .api }
259
class DynamicTableRegion(VectorData):
260
"""
261
Dynamic table region for referencing rows in DynamicTable objects.
262
263
Enables creation of references to specific rows or ranges of rows
264
in DynamicTable instances, supporting complex data relationships.
265
"""
266
267
def __init__(self, name: str, data, description: str, table: DynamicTable, **kwargs):
268
"""
269
Initialize dynamic table region.
270
271
Args:
272
name: Name of the region
273
data: Row indices or boolean mask
274
description: Description of the region
275
table: Target DynamicTable being referenced
276
"""
277
278
@property
279
def table(self) -> DynamicTable:
280
"""Target table being referenced."""
281
282
def get_referenced_tables(self) -> list:
283
"""
284
Get list of tables referenced by this region.
285
286
Returns:
287
List of DynamicTable instances
288
"""
289
290
def __getitem__(self, key):
291
"""Get referenced rows."""
292
```
293
294
### Sparse Data Structures
295
296
Efficient storage and manipulation of sparse data matrices.
297
298
```python { .api }
299
class CSRMatrix(Container):
300
"""
301
Compressed Sparse Row matrix implementation.
302
303
Provides memory-efficient storage for sparse matrices using
304
the CSR (Compressed Sparse Row) format with full metadata support.
305
"""
306
307
def __init__(self, data, indices, indptr, shape: tuple, **kwargs):
308
"""
309
Initialize CSR matrix.
310
311
Args:
312
data: Non-zero values array
313
indices: Column indices for non-zero values
314
indptr: Index pointers for row starts
315
shape: Shape of the full matrix (rows, cols)
316
**kwargs: Additional properties:
317
- name: Name for the matrix
318
- description: Matrix description
319
"""
320
321
def to_scipy_sparse(self):
322
"""
323
Convert to scipy sparse matrix.
324
325
Returns:
326
scipy.sparse.csr_matrix instance
327
"""
328
329
def to_dense(self):
330
"""
331
Convert to dense numpy array.
332
333
Returns:
334
Dense numpy array representation
335
"""
336
337
@classmethod
338
def from_scipy_sparse(cls, sparse_matrix, **kwargs):
339
"""
340
Create CSRMatrix from scipy sparse matrix.
341
342
Args:
343
sparse_matrix: scipy sparse matrix
344
**kwargs: Additional properties
345
346
Returns:
347
CSRMatrix instance
348
"""
349
350
@property
351
def data(self):
352
"""Non-zero values array."""
353
354
@property
355
def indices(self):
356
"""Column indices array."""
357
358
@property
359
def indptr(self):
360
"""Index pointers array."""
361
362
@property
363
def shape(self) -> tuple:
364
"""Shape of the matrix."""
365
366
@property
367
def nnz(self) -> int:
368
"""Number of non-zero elements."""
369
```
370
371
### Multi-Container Systems
372
373
Specialized containers for managing collections of related objects.
374
375
```python { .api }
376
class SimpleMultiContainer(Container, MultiContainerInterface):
377
"""
378
Simple multi-container implementation for holding multiple objects.
379
380
Provides a straightforward container for managing collections
381
of related objects with dictionary-like access patterns.
382
"""
383
384
def __init__(self, name: str, **kwargs):
385
"""
386
Initialize simple multi-container.
387
388
Args:
389
name: Name of the container
390
**kwargs: Additional container properties
391
"""
392
393
def add_container(self, container: Container):
394
"""
395
Add a container to the collection.
396
397
Args:
398
container: Container to add
399
"""
400
401
def get_container(self, name: str) -> Container:
402
"""
403
Get container by name.
404
405
Args:
406
name: Container name
407
408
Returns:
409
Container object
410
"""
411
412
def __iter__(self):
413
"""Iterate over contained objects."""
414
415
def __len__(self) -> int:
416
"""Number of contained objects."""
417
```
418
419
### Experimental Data Types
420
421
Experimental and specialized data structures for advanced use cases.
422
423
```python { .api }
424
class EnumData(VectorData):
425
"""
426
Enumeration data (experimental) for categorical data with controlled vocabularies.
427
428
Stores categorical data with predefined value sets and provides
429
validation and conversion capabilities for enumerated types.
430
"""
431
432
def __init__(self, name: str, description: str, data, elements: list, **kwargs):
433
"""
434
Initialize enumeration data.
435
436
Args:
437
name: Name of the enumeration data
438
description: Description of the data
439
data: Enumeration values (indices or strings)
440
elements: List of allowed enumeration elements
441
"""
442
443
@property
444
def elements(self) -> tuple:
445
"""Tuple of allowed enumeration elements."""
446
447
def add_element(self, element: str):
448
"""
449
Add allowed element to enumeration.
450
451
Args:
452
element: Element to add
453
"""
454
455
class HERD(Container):
456
"""
457
Hierarchical External Resource Descriptor (experimental).
458
459
Provides structured metadata for external resources and their
460
relationships within the data hierarchy.
461
"""
462
463
def __init__(self, **kwargs):
464
"""
465
Initialize HERD container.
466
467
Args:
468
**kwargs: HERD properties and metadata
469
"""
470
471
def add_resource(self, resource_spec: dict):
472
"""
473
Add external resource specification.
474
475
Args:
476
resource_spec: Dictionary describing the resource
477
"""
478
```
479
480
### Registration and Management Functions
481
482
Functions for registering and managing common data types.
483
484
```python { .api }
485
def register_class(neurodata_type: str, namespace: str, container_cls):
486
"""
487
Register container class for a data type.
488
489
Args:
490
neurodata_type: Name of the data type
491
namespace: Namespace containing the type
492
container_cls: Container class to register
493
"""
494
495
def register_map(container_cls, mapper_cls):
496
"""
497
Register object mapper for a container class.
498
499
Args:
500
container_cls: Container class
501
mapper_cls: Mapper class for serialization
502
"""
503
504
def get_class(neurodata_type: str, namespace: str = 'hdmf-common'):
505
"""
506
Get container class for a data type.
507
508
Args:
509
neurodata_type: Name of the data type
510
namespace: Namespace (default: 'hdmf-common')
511
512
Returns:
513
Container class for the data type
514
"""
515
516
def get_type_map():
517
"""
518
Get type map with HDMF-common extensions.
519
520
Returns:
521
TypeMap instance with common data types registered
522
"""
523
524
def get_manager():
525
"""
526
Get build manager with common data types.
527
528
Returns:
529
BuildManager instance configured for common types
530
"""
531
532
# Constants
533
CORE_NAMESPACE = 'hdmf-common' # Core namespace identifier
534
EXP_NAMESPACE = 'hdmf-experimental' # Experimental namespace identifier
535
```
536
537
## Usage Examples
538
539
### Creating and Using Dynamic Tables
540
541
```python
542
from hdmf.common import DynamicTable, VectorData
543
import numpy as np
544
545
# Create dynamic table
546
subjects_table = DynamicTable(
547
name='subjects',
548
description='Information about experimental subjects'
549
)
550
551
# Add columns
552
subjects_table.add_column('subject_id', 'Unique subject identifier')
553
subjects_table.add_column('age', 'Age in months', dtype='int')
554
subjects_table.add_column('weight', 'Weight in grams', dtype='float')
555
subjects_table.add_column('genotype', 'Genetic background')
556
557
# Add rows
558
subjects_table.add_row(subject_id='mouse_001', age=8, weight=25.3, genotype='WT')
559
subjects_table.add_row(subject_id='mouse_002', age=10, weight=27.1, genotype='KO')
560
subjects_table.add_row(subject_id='mouse_003', age=9, weight=24.8, genotype='WT')
561
562
# Access data
563
print(f"Table has {len(subjects_table)} rows")
564
print(f"Columns: {subjects_table.colnames}")
565
566
# Convert to DataFrame
567
df = subjects_table.to_dataframe()
568
print(df.head())
569
570
# Access specific columns
571
ages = subjects_table.get_column('age').data
572
print(f"Ages: {ages}")
573
```
574
575
### Working with Ragged Arrays Using Vector Indices
576
577
```python
578
from hdmf.common import VectorData, VectorIndex
579
580
# Create ragged data (variable-length spike trains)
581
spike_data = [
582
[0.1, 0.3, 0.7, 1.2], # Trial 1: 4 spikes
583
[0.2, 0.8], # Trial 2: 2 spikes
584
[0.05, 0.4, 0.6, 0.9, 1.1], # Trial 3: 5 spikes
585
]
586
587
# Flatten data and create cumulative indices
588
flattened_spikes = []
589
indices = []
590
for trial_spikes in spike_data:
591
flattened_spikes.extend(trial_spikes)
592
indices.append(len(flattened_spikes))
593
594
# Create VectorData and VectorIndex
595
spike_times = VectorData(
596
name='spike_times',
597
description='Spike timestamps in seconds',
598
data=flattened_spikes
599
)
600
601
spike_index = VectorIndex(
602
name='spike_times_index',
603
data=indices,
604
target=spike_times
605
)
606
607
# Access ragged data by trial
608
trial_0_spikes = spike_index[0] # [0.1, 0.3, 0.7, 1.2]
609
trial_1_spikes = spike_index[1] # [0.2, 0.8]
610
trial_2_spikes = spike_index[2] # [0.05, 0.4, 0.6, 0.9, 1.1]
611
612
print(f"Trial 0 spikes: {trial_0_spikes}")
613
print(f"Trial 1 spikes: {trial_1_spikes}")
614
```
615
616
### Creating Sparse Matrices
617
618
```python
619
from hdmf.common import CSRMatrix
620
import numpy as np
621
from scipy import sparse
622
623
# Create sparse data
624
row = np.array([0, 0, 1, 2, 2, 2])
625
col = np.array([0, 2, 1, 0, 1, 2])
626
data = np.array([1, 2, 3, 4, 5, 6])
627
628
# Create scipy sparse matrix
629
scipy_matrix = sparse.csr_matrix((data, (row, col)), shape=(3, 3))
630
631
# Convert to HDMF CSRMatrix
632
hdmf_matrix = CSRMatrix.from_scipy_sparse(
633
scipy_matrix,
634
name='connectivity_matrix',
635
description='Neural connectivity matrix'
636
)
637
638
print(f"Matrix shape: {hdmf_matrix.shape}")
639
print(f"Non-zero elements: {hdmf_matrix.nnz}")
640
641
# Convert back to dense for visualization
642
dense_matrix = hdmf_matrix.to_dense()
643
print("Dense representation:")
644
print(dense_matrix)
645
```
646
647
### Using Dynamic Table Regions for References
648
649
```python
650
from hdmf.common import DynamicTable, DynamicTableRegion
651
652
# Create source table
653
neurons_table = DynamicTable(
654
name='neurons',
655
description='Information about recorded neurons'
656
)
657
658
neurons_table.add_column('neuron_id', 'Unique neuron identifier')
659
neurons_table.add_column('brain_area', 'Brain area location')
660
neurons_table.add_column('cell_type', 'Cell type classification')
661
662
# Add neurons
663
for i in range(10):
664
neurons_table.add_row(
665
neuron_id=f'neuron_{i:03d}',
666
brain_area='CA1' if i < 5 else 'CA3',
667
cell_type='pyramidal' if i % 2 == 0 else 'interneuron'
668
)
669
670
# Create region referencing subset of neurons
671
ca1_neurons = DynamicTableRegion(
672
name='ca1_neurons',
673
data=[0, 1, 2, 3, 4], # Row indices for CA1 neurons
674
description='Neurons recorded from CA1 region',
675
table=neurons_table
676
)
677
678
# Access referenced data
679
referenced_neurons = ca1_neurons[:]
680
print(f"CA1 neurons: {len(referenced_neurons)} neurons")
681
682
# Use region in analysis table
683
analysis_table = DynamicTable(
684
name='spike_analysis',
685
description='Spike analysis results'
686
)
687
688
analysis_table.add_column('neurons', 'Analyzed neurons', data=[ca1_neurons])
689
analysis_table.add_column('firing_rate', 'Average firing rate')
690
691
analysis_table.add_row(neurons=ca1_neurons, firing_rate=15.3)
692
```
693
694
### Creating Aligned Dynamic Tables
695
696
```python
697
from hdmf.common import AlignedDynamicTable
698
699
# Create aligned table for multi-modal data
700
session_data = AlignedDynamicTable(
701
name='session_data',
702
description='Aligned behavioral and neural data'
703
)
704
705
# Add main columns
706
session_data.add_column('timestamp', 'Time in seconds', dtype='float')
707
session_data.add_column('behavior', 'Behavioral state')
708
709
# Add category table for neural data
710
session_data.add_category_table(
711
name='neural',
712
description='Neural recording data'
713
)
714
neural_table = session_data.get_category_table('neural')
715
neural_table.add_column('spike_count', 'Number of spikes', dtype='int')
716
neural_table.add_column('lfp_power', 'LFP power', dtype='float')
717
718
# Add category table for stimulus data
719
session_data.add_category_table(
720
name='stimulus',
721
description='Stimulus presentation data'
722
)
723
stimulus_table = session_data.get_category_table('stimulus')
724
stimulus_table.add_column('stimulus_type', 'Type of stimulus')
725
stimulus_table.add_column('intensity', 'Stimulus intensity', dtype='float')
726
727
# Add synchronized data
728
session_data.add_row(
729
timestamp=1.0,
730
behavior='running',
731
neural={'spike_count': 5, 'lfp_power': 0.23},
732
stimulus={'stimulus_type': 'visual', 'intensity': 0.8}
733
)
734
735
print(f"Session data columns: {session_data.colnames}")
736
print(f"Neural category columns: {neural_table.colnames}")
737
```
738
739
### Working with Enumerated Data
740
741
```python
742
from hdmf.common import EnumData
743
744
# Create enumeration for behavioral states
745
behavior_states = EnumData(
746
name='behavior_states',
747
description='Behavioral state classifications',
748
data=[0, 1, 2, 1, 0, 2, 1], # Indices into elements
749
elements=['rest', 'locomotion', 'grooming']
750
)
751
752
# Access enumerated values
753
print(f"Behavior elements: {behavior_states.elements}")
754
print(f"Behavior data: {behavior_states.data}")
755
756
# Could also use string data directly
757
string_behavior = EnumData(
758
name='string_behavior',
759
description='String-based behavioral states',
760
data=['rest', 'locomotion', 'grooming', 'locomotion'],
761
elements=['rest', 'locomotion', 'grooming']
762
)
763
764
print(f"String behavior: {string_behavior.data}")
765
```