0
# Specification System
1
2
HDMF's specification system provides schema definition and management for data models. It enables the creation of structured schemas that define data organization, validation rules, and metadata requirements, serving as the foundation for HDMF's specification-driven architecture.
3
4
## Capabilities
5
6
### Specification Catalogs
7
8
Core catalog classes for managing collections of specifications and namespaces.
9
10
```python { .api }
11
class SpecCatalog:
12
"""
13
Catalog for managing data type specifications.
14
15
Provides registration, retrieval, and organization of specifications
16
for different data types within a namespace.
17
"""
18
19
def __init__(self):
20
"""Initialize empty specification catalog."""
21
22
def register_spec(self, spec, source_file: str = None):
23
"""
24
Register a specification in the catalog.
25
26
Args:
27
spec: Specification object to register
28
source_file: Source file path for the specification
29
"""
30
31
def get_spec(self, neurodata_type: str) -> 'BaseStorageSpec':
32
"""
33
Get specification by data type name.
34
35
Args:
36
neurodata_type: Name of the data type
37
38
Returns:
39
Specification object for the data type
40
41
Raises:
42
KeyError: If specification not found
43
"""
44
45
def get_hierarchy(self, neurodata_type: str) -> list:
46
"""
47
Get inheritance hierarchy for a data type.
48
49
Args:
50
neurodata_type: Name of the data type
51
52
Returns:
53
List of parent types in inheritance order
54
"""
55
56
class NamespaceCatalog:
57
"""
58
Catalog for managing namespaces and their associated specifications.
59
60
Organizes multiple specification catalogs under different namespaces
61
and handles namespace resolution and dependencies.
62
"""
63
64
def __init__(self, *args, **kwargs):
65
"""Initialize namespace catalog."""
66
67
def add_namespace(self, namespace: str, spec_catalog: SpecCatalog):
68
"""
69
Add namespace with its specification catalog.
70
71
Args:
72
namespace: Namespace identifier
73
spec_catalog: Specification catalog for the namespace
74
"""
75
76
def get_namespace(self, namespace: str) -> 'SpecNamespace':
77
"""
78
Get namespace by name.
79
80
Args:
81
namespace: Namespace identifier
82
83
Returns:
84
SpecNamespace object
85
"""
86
87
def load_namespaces(self, namespace_path: str, **kwargs):
88
"""
89
Load namespaces from file or directory.
90
91
Args:
92
namespace_path: Path to namespace files
93
"""
94
```
95
96
### Namespace Definitions
97
98
Classes for defining and managing specification namespaces.
99
100
```python { .api }
101
class SpecNamespace:
102
"""
103
Specification namespace containing metadata and dependencies.
104
105
Defines a namespace with version information, documentation,
106
and relationships to other namespaces.
107
"""
108
109
def __init__(self, doc: str, name: str, **kwargs):
110
"""
111
Initialize specification namespace.
112
113
Args:
114
doc: Documentation for the namespace
115
name: Namespace identifier
116
**kwargs: Additional namespace properties:
117
- version: Namespace version
118
- author: Namespace author(s)
119
- contact: Contact information
120
- dependencies: List of dependency namespaces
121
"""
122
123
@property
124
def name(self) -> str:
125
"""Namespace name."""
126
127
@property
128
def version(self) -> str:
129
"""Namespace version."""
130
131
@property
132
def doc(self) -> str:
133
"""Namespace documentation."""
134
135
@property
136
def dependencies(self) -> list:
137
"""List of namespace dependencies."""
138
139
class NamespaceBuilder:
140
"""
141
Builder for creating specification namespaces programmatically.
142
143
Provides a fluent interface for constructing namespaces with
144
specifications, dependencies, and metadata.
145
"""
146
147
def __init__(self, doc: str, name: str, **kwargs):
148
"""
149
Initialize namespace builder.
150
151
Args:
152
doc: Documentation for the namespace
153
name: Namespace identifier
154
"""
155
156
def include_namespace(self, namespace: str):
157
"""
158
Include another namespace as dependency.
159
160
Args:
161
namespace: Namespace to include
162
163
Returns:
164
Self for method chaining
165
"""
166
167
def include_type(self, type_name: str, source_file: str = None):
168
"""
169
Include a data type in the namespace.
170
171
Args:
172
type_name: Name of the data type
173
source_file: Source file containing the type
174
175
Returns:
176
Self for method chaining
177
"""
178
179
def export(self, namespace_path: str, **kwargs):
180
"""
181
Export namespace to file.
182
183
Args:
184
namespace_path: Path where to export namespace
185
"""
186
```
187
188
### Data Type Specifications
189
190
Core specification classes for defining data structures and validation rules.
191
192
```python { .api }
193
class GroupSpec:
194
"""
195
Specification for group (container) data types.
196
197
Defines hierarchical containers that can hold datasets, other groups,
198
attributes, and links with validation rules and metadata.
199
"""
200
201
def __init__(self, doc: str, name: str = None, **kwargs):
202
"""
203
Initialize group specification.
204
205
Args:
206
doc: Documentation for the group
207
name: Name of the group (None for flexible naming)
208
**kwargs: Additional specification properties:
209
- neurodata_type_def: Data type being defined
210
- neurodata_type_inc: Data type being inherited from
211
- default_name: Default name for the group
212
- linkable: Whether group can be linked
213
- attributes: List of attribute specifications
214
- datasets: List of dataset specifications
215
- groups: List of nested group specifications
216
"""
217
218
def add_attribute(self, attr_spec: 'AttributeSpec'):
219
"""
220
Add attribute specification to the group.
221
222
Args:
223
attr_spec: Attribute specification to add
224
"""
225
226
def add_dataset(self, dataset_spec: 'DatasetSpec'):
227
"""
228
Add dataset specification to the group.
229
230
Args:
231
dataset_spec: Dataset specification to add
232
"""
233
234
def add_group(self, group_spec: 'GroupSpec'):
235
"""
236
Add nested group specification.
237
238
Args:
239
group_spec: Group specification to add
240
"""
241
242
class DatasetSpec:
243
"""
244
Specification for dataset data types.
245
246
Defines data arrays with shape constraints, data type requirements,
247
and associated metadata validation rules.
248
"""
249
250
def __init__(self, doc: str, name: str = None, **kwargs):
251
"""
252
Initialize dataset specification.
253
254
Args:
255
doc: Documentation for the dataset
256
name: Name of the dataset (None for flexible naming)
257
**kwargs: Dataset properties:
258
- neurodata_type_def: Data type being defined
259
- neurodata_type_inc: Data type being inherited from
260
- dtype: Data type specification
261
- shape: Shape constraints
262
- dims: Dimension names
263
- default_name: Default name for the dataset
264
- linkable: Whether dataset can be linked
265
- attributes: List of attribute specifications
266
"""
267
268
def add_attribute(self, attr_spec: 'AttributeSpec'):
269
"""
270
Add attribute specification to the dataset.
271
272
Args:
273
attr_spec: Attribute specification to add
274
"""
275
276
class AttributeSpec:
277
"""
278
Specification for metadata attributes.
279
280
Defines key-value metadata with type constraints and validation rules.
281
"""
282
283
def __init__(self, name: str, doc: str, **kwargs):
284
"""
285
Initialize attribute specification.
286
287
Args:
288
name: Name of the attribute
289
doc: Documentation for the attribute
290
**kwargs: Attribute properties:
291
- dtype: Data type specification
292
- value: Fixed value for the attribute
293
- default_value: Default value
294
- required: Whether attribute is required
295
- shape: Shape constraints for array attributes
296
"""
297
298
class LinkSpec:
299
"""
300
Specification for links between data elements.
301
302
Defines relationships and references between different parts
303
of the hierarchical data structure.
304
"""
305
306
def __init__(self, doc: str, **kwargs):
307
"""
308
Initialize link specification.
309
310
Args:
311
doc: Documentation for the link
312
**kwargs: Link properties:
313
- target_type: Target data type for the link
314
- allow_subclasses: Allow subclasses of target type
315
"""
316
317
class RefSpec:
318
"""
319
Specification for object references.
320
321
Defines references to other objects within the data hierarchy
322
with type constraints and validation rules.
323
"""
324
325
def __init__(self, target_type: str, reftype: str, **kwargs):
326
"""
327
Initialize reference specification.
328
329
Args:
330
target_type: Target data type for references
331
reftype: Type of reference ('object', 'region')
332
"""
333
```
334
335
### Data Type Specifications
336
337
Classes for defining and managing data types within specifications.
338
339
```python { .api }
340
class DtypeSpec:
341
"""
342
Specification for data types with validation and constraints.
343
344
Defines allowable data types, value constraints, and conversion rules
345
for datasets and attributes.
346
"""
347
348
def __init__(self, name: str, doc: str, **kwargs):
349
"""
350
Initialize data type specification.
351
352
Args:
353
name: Name of the data type
354
doc: Documentation for the data type
355
**kwargs: Data type properties:
356
- dtype: Base data type
357
- constraints: Value constraints
358
- default_value: Default value
359
"""
360
361
class DtypeHelper:
362
"""
363
Helper utilities for working with data type specifications.
364
365
Provides validation, conversion, and analysis functions for
366
data types used in specifications.
367
"""
368
369
@staticmethod
370
def check_dtype(dtype_spec, value) -> bool:
371
"""
372
Check if value matches data type specification.
373
374
Args:
375
dtype_spec: Data type specification
376
value: Value to check
377
378
Returns:
379
True if value matches specification
380
"""
381
382
@staticmethod
383
def convert_dtype(dtype_spec, value):
384
"""
385
Convert value to match data type specification.
386
387
Args:
388
dtype_spec: Target data type specification
389
value: Value to convert
390
391
Returns:
392
Converted value
393
"""
394
```
395
396
### Specification I/O
397
398
Classes for reading and writing specifications to different formats.
399
400
```python { .api }
401
class SpecReader:
402
"""
403
Reader for loading specifications from files.
404
405
Supports multiple file formats including YAML and JSON for
406
specification definitions and namespace declarations.
407
"""
408
409
def __init__(self, **kwargs):
410
"""Initialize specification reader."""
411
412
def read_spec(self, spec_file: str) -> dict:
413
"""
414
Read specification from file.
415
416
Args:
417
spec_file: Path to specification file
418
419
Returns:
420
Dictionary containing specification data
421
"""
422
423
def read_namespace(self, namespace_file: str) -> dict:
424
"""
425
Read namespace from file.
426
427
Args:
428
namespace_file: Path to namespace file
429
430
Returns:
431
Dictionary containing namespace data
432
"""
433
434
class SpecWriter:
435
"""
436
Writer for saving specifications to files.
437
438
Exports specifications and namespaces to YAML or JSON format
439
for sharing and version control.
440
"""
441
442
def __init__(self, **kwargs):
443
"""Initialize specification writer."""
444
445
def write_spec(self, spec, spec_file: str):
446
"""
447
Write specification to file.
448
449
Args:
450
spec: Specification object to write
451
spec_file: Output file path
452
"""
453
454
def write_namespace(self, namespace, namespace_file: str):
455
"""
456
Write namespace to file.
457
458
Args:
459
namespace: Namespace object to write
460
namespace_file: Output file path
461
"""
462
```
463
464
### Specification Utilities
465
466
Utility functions and constants for working with specifications.
467
468
```python { .api }
469
def export_spec(namespace_builder: NamespaceBuilder, export_path: str, **kwargs):
470
"""
471
Export namespace and specifications to directory.
472
473
Args:
474
namespace_builder: NamespaceBuilder to export
475
export_path: Directory path for export
476
**kwargs: Export options
477
"""
478
479
# Constants
480
NAME_WILDCARD = '*' # Wildcard for flexible naming in specifications
481
```
482
483
## Usage Examples
484
485
### Creating Basic Specifications
486
487
```python
488
from hdmf.spec import GroupSpec, DatasetSpec, AttributeSpec, NamespaceBuilder
489
490
# Create attribute specification
491
name_attr = AttributeSpec(
492
name='name',
493
doc='Name of the experimental subject',
494
dtype='text',
495
required=True
496
)
497
498
# Create dataset specification
499
data_spec = DatasetSpec(
500
doc='Neural recording data',
501
name='data',
502
dtype='float64',
503
shape=(None, None), # Variable dimensions
504
dims=['time', 'channels'],
505
attributes=[name_attr]
506
)
507
508
# Create group specification
509
recording_spec = GroupSpec(
510
doc='Neural recording container',
511
neurodata_type_def='Recording',
512
default_name='recording',
513
datasets=[data_spec],
514
attributes=[
515
AttributeSpec('sampling_rate', 'Sampling rate in Hz', dtype='float64')
516
]
517
)
518
```
519
520
### Building and Exporting Namespaces
521
522
```python
523
from hdmf.spec import NamespaceBuilder
524
525
# Create namespace builder
526
ns_builder = NamespaceBuilder(
527
doc='Experimental neuroscience data standard',
528
name='neuro-experiment',
529
version='1.0.0',
530
author='Neuroscience Lab',
531
contact='lab@university.edu'
532
)
533
534
# Include HDMF common namespace
535
ns_builder.include_namespace('hdmf-common')
536
537
# Add custom data types
538
ns_builder.include_type('Recording', source_file='recording.yaml')
539
ns_builder.include_type('Stimulus', source_file='stimulus.yaml')
540
541
# Export namespace and specifications
542
ns_builder.export('./specs/', overwrite=True)
543
```
544
545
### Loading Existing Specifications
546
547
```python
548
from hdmf.spec import NamespaceCatalog, SpecCatalog
549
from hdmf.common import load_namespaces
550
551
# Load HDMF common specifications
552
load_namespaces()
553
554
# Load custom namespace
555
namespace_catalog = NamespaceCatalog()
556
namespace_catalog.load_namespaces('./custom_specs/namespace.yaml')
557
558
# Get specification catalog for a namespace
559
spec_catalog = namespace_catalog.get_namespace('custom-namespace').catalog
560
561
# Retrieve specific specifications
562
recording_spec = spec_catalog.get_spec('Recording')
563
print(f"Recording spec: {recording_spec.doc}")
564
565
# Get inheritance hierarchy
566
hierarchy = spec_catalog.get_hierarchy('Recording')
567
print(f"Inheritance: {hierarchy}")
568
```
569
570
### Creating Complex Specifications with Inheritance
571
572
```python
573
from hdmf.spec import GroupSpec, DatasetSpec
574
575
# Base container specification
576
base_container = GroupSpec(
577
doc='Base container for all experimental data',
578
neurodata_type_def='BaseContainer',
579
attributes=[
580
AttributeSpec('description', 'Description of the container', dtype='text'),
581
AttributeSpec('created_on', 'Creation timestamp', dtype='text')
582
]
583
)
584
585
# Specialized recording container inheriting from base
586
recording_container = GroupSpec(
587
doc='Container for neural recordings',
588
neurodata_type_def='RecordingContainer',
589
neurodata_type_inc='BaseContainer', # Inherit from BaseContainer
590
datasets=[
591
DatasetSpec(
592
doc='Raw neural data',
593
name='data',
594
dtype='int16',
595
shape=(None, None),
596
dims=['time', 'channels']
597
)
598
],
599
attributes=[
600
AttributeSpec('sampling_rate', 'Sampling rate in Hz', dtype='float64')
601
]
602
)
603
```
604
605
### Advanced Data Type Specifications
606
607
```python
608
from hdmf.spec import DatasetSpec, DtypeSpec
609
610
# Complex data type with constraints
611
constrained_dtype = DtypeSpec(
612
name='bounded_float',
613
doc='Float value between 0 and 1',
614
dtype='float64',
615
constraints={'min': 0.0, 'max': 1.0}
616
)
617
618
# Dataset with complex shape and type constraints
619
timeseries_spec = DatasetSpec(
620
doc='Time series data with metadata',
621
name='timeseries',
622
dtype=[
623
{'name': 'timestamp', 'dtype': 'float64', 'doc': 'Time in seconds'},
624
{'name': 'value', 'dtype': constrained_dtype, 'doc': 'Normalized value'},
625
{'name': 'quality', 'dtype': 'uint8', 'doc': 'Data quality flag'}
626
],
627
shape=(None,), # Variable length
628
dims=['time']
629
)
630
```
631
632
### Programmatic Specification Creation
633
634
```python
635
from hdmf.spec import GroupSpec, DatasetSpec, AttributeSpec
636
637
def create_experiment_spec(experiment_name: str) -> GroupSpec:
638
"""Create specification for a specific experiment type."""
639
640
# Common attributes for all experiments
641
common_attrs = [
642
AttributeSpec('experiment_id', 'Unique experiment identifier', dtype='text'),
643
AttributeSpec('start_time', 'Experiment start time', dtype='text'),
644
AttributeSpec('duration', 'Experiment duration in seconds', dtype='float64')
645
]
646
647
# Experiment-specific datasets
648
if experiment_name == 'electrophysiology':
649
datasets = [
650
DatasetSpec('voltage_traces', 'Voltage recordings', dtype='float64',
651
shape=(None, None), dims=['time', 'channels']),
652
DatasetSpec('spike_times', 'Detected spike timestamps', dtype='float64',
653
shape=(None,), dims=['spikes'])
654
]
655
elif experiment_name == 'behavior':
656
datasets = [
657
DatasetSpec('position', 'Animal position over time', dtype='float64',
658
shape=(None, 2), dims=['time', 'coordinates']),
659
DatasetSpec('speed', 'Movement speed', dtype='float64',
660
shape=(None,), dims=['time'])
661
]
662
else:
663
datasets = []
664
665
# Create and return specification
666
return GroupSpec(
667
doc=f'Container for {experiment_name} experiments',
668
neurodata_type_def=f'{experiment_name.title()}Experiment',
669
default_name=experiment_name,
670
attributes=common_attrs,
671
datasets=datasets
672
)
673
674
# Create specifications for different experiment types
675
ephys_spec = create_experiment_spec('electrophysiology')
676
behavior_spec = create_experiment_spec('behavior')
677
```