0
# Validation System
1
2
HDMF provides comprehensive validation of data against specifications with detailed error reporting and schema compliance checking. The validation system ensures data integrity, specification compliance, and provides detailed feedback for debugging and quality assurance.
3
4
## Capabilities
5
6
### Validator Classes
7
8
Core validator classes for different types of data validation against specifications.
9
10
```python { .api }
11
class Validator:
12
"""
13
Base validator class for validating data against specifications.
14
15
Provides the foundation for all validation operations in HDMF,
16
including schema validation, type checking, and constraint verification.
17
"""
18
19
def __init__(self, spec, **kwargs):
20
"""
21
Initialize validator.
22
23
Args:
24
spec: Specification object to validate against
25
**kwargs: Additional validator options:
26
- strict: Enable strict validation mode
27
- ignore_missing: Ignore missing optional fields
28
"""
29
30
def validate(self, builder, **kwargs) -> list:
31
"""
32
Validate builder against specification.
33
34
Args:
35
builder: Builder object to validate
36
**kwargs: Validation options
37
38
Returns:
39
List of validation errors (empty if valid)
40
"""
41
42
### Validation Error Classes
43
44
Specific error classes for different types of validation failures with detailed error reporting.
45
46
```python { .api }
47
class Error(Exception):
48
"""
49
Base class for HDMF validation errors.
50
51
Provides structured error reporting with location information
52
and detailed messages for debugging validation failures.
53
"""
54
55
def __init__(self, location: str, message: str = None):
56
"""
57
Initialize validation error.
58
59
Args:
60
location: Location where error occurred
61
message: Detailed error message
62
"""
63
64
class DtypeError(Error):
65
"""
66
Error for data type mismatches in validation.
67
68
Raised when data types don't match specification requirements.
69
"""
70
pass
71
72
class MissingError(Error):
73
"""
74
Error for missing required components.
75
76
Raised when required datasets, groups, or attributes are missing.
77
"""
78
pass
79
80
class ExpectedArrayError(Error):
81
"""
82
Error for expected array data validation failures.
83
84
Raised when array-like data doesn't meet shape or type requirements.
85
"""
86
pass
87
88
class ShapeError(Error):
89
"""
90
Error for array shape validation failures.
91
92
Raised when array shapes don't match specification constraints.
93
"""
94
pass
95
96
class MissingDataType(Error):
97
"""
98
Error for missing data type specifications.
99
100
Raised when referenced data types are not found in namespace.
101
"""
102
pass
103
104
class IllegalLinkError(Error):
105
"""
106
Error for illegal link operations in validation.
107
108
Raised when links violate specification constraints.
109
"""
110
pass
111
112
class IncorrectDataType(Error):
113
"""
114
Error for incorrect data type usage.
115
116
Raised when data types are incorrect for the context.
117
"""
118
pass
119
120
class IncorrectQuantityError(Error):
121
"""
122
Error for incorrect quantity specifications.
123
124
Raised when quantities don't match cardinality constraints.
125
"""
126
pass
127
```
128
129
def check_type(self, builder) -> list:
130
"""
131
Check data type compliance.
132
133
Args:
134
builder: Builder to check
135
136
Returns:
137
List of type validation errors
138
"""
139
140
def check_shape(self, builder) -> list:
141
"""
142
Check data shape compliance.
143
144
Args:
145
builder: Builder to check
146
147
Returns:
148
List of shape validation errors
149
"""
150
151
def check_attributes(self, builder) -> list:
152
"""
153
Check attribute requirements and values.
154
155
Args:
156
builder: Builder to check
157
158
Returns:
159
List of attribute validation errors
160
"""
161
162
@property
163
def spec(self):
164
"""Specification being validated against."""
165
166
class GroupValidator(Validator):
167
"""
168
Validator for group (container) specifications.
169
170
Validates hierarchical container structures including nested groups,
171
datasets, attributes, and links against group specifications.
172
"""
173
174
def __init__(self, spec, **kwargs):
175
"""
176
Initialize group validator.
177
178
Args:
179
spec: GroupSpec to validate against
180
"""
181
182
def validate(self, builder, **kwargs) -> list:
183
"""
184
Validate group builder against specification.
185
186
Args:
187
builder: GroupBuilder to validate
188
189
Returns:
190
List of validation errors
191
"""
192
193
def check_groups(self, builder) -> list:
194
"""
195
Check nested group requirements.
196
197
Args:
198
builder: GroupBuilder to check
199
200
Returns:
201
List of group validation errors
202
"""
203
204
def check_datasets(self, builder) -> list:
205
"""
206
Check dataset requirements.
207
208
Args:
209
builder: GroupBuilder to check
210
211
Returns:
212
List of dataset validation errors
213
"""
214
215
def check_links(self, builder) -> list:
216
"""
217
Check link requirements and targets.
218
219
Args:
220
builder: GroupBuilder to check
221
222
Returns:
223
List of link validation errors
224
"""
225
226
class DatasetValidator(Validator):
227
"""
228
Validator for dataset specifications.
229
230
Validates dataset structures including data types, shapes,
231
dimensions, and associated attributes against dataset specifications.
232
"""
233
234
def __init__(self, spec, **kwargs):
235
"""
236
Initialize dataset validator.
237
238
Args:
239
spec: DatasetSpec to validate against
240
"""
241
242
def validate(self, builder, **kwargs) -> list:
243
"""
244
Validate dataset builder against specification.
245
246
Args:
247
builder: DatasetBuilder to validate
248
249
Returns:
250
List of validation errors
251
"""
252
253
def check_data_type(self, builder) -> list:
254
"""
255
Check data type compliance including compound types.
256
257
Args:
258
builder: DatasetBuilder to check
259
260
Returns:
261
List of data type validation errors
262
"""
263
264
def check_dimensions(self, builder) -> list:
265
"""
266
Check dimension names and constraints.
267
268
Args:
269
builder: DatasetBuilder to check
270
271
Returns:
272
List of dimension validation errors
273
"""
274
275
class AttributeValidator(Validator):
276
"""
277
Validator for attribute specifications.
278
279
Validates metadata attributes including values, types,
280
and constraints against attribute specifications.
281
"""
282
283
def __init__(self, spec, **kwargs):
284
"""
285
Initialize attribute validator.
286
287
Args:
288
spec: AttributeSpec to validate against
289
"""
290
291
def validate(self, builder, **kwargs) -> list:
292
"""
293
Validate attribute against specification.
294
295
Args:
296
builder: Builder containing the attribute
297
298
Returns:
299
List of validation errors
300
"""
301
302
def check_value_constraints(self, value) -> list:
303
"""
304
Check value against specification constraints.
305
306
Args:
307
value: Attribute value to check
308
309
Returns:
310
List of constraint validation errors
311
"""
312
```
313
314
### Validator Management
315
316
Classes for managing and coordinating validation across different data types.
317
318
```python { .api }
319
class ValidatorMap:
320
"""
321
Mapping system for validators across different data types.
322
323
Manages the association between data types and their corresponding
324
validators, enabling automatic validator selection and coordination.
325
"""
326
327
def __init__(self, **kwargs):
328
"""Initialize validator map."""
329
330
def register_validator(self, neurodata_type: str, validator_class):
331
"""
332
Register validator class for a data type.
333
334
Args:
335
neurodata_type: Name of the data type
336
validator_class: Validator class to register
337
"""
338
339
def get_validator(self, neurodata_type: str, spec) -> Validator:
340
"""
341
Get validator instance for a data type.
342
343
Args:
344
neurodata_type: Name of the data type
345
spec: Specification to validate against
346
347
Returns:
348
Validator instance for the data type
349
"""
350
351
def validate_builder(self, builder, spec, **kwargs) -> list:
352
"""
353
Validate builder using appropriate validator.
354
355
Args:
356
builder: Builder to validate
357
spec: Specification to validate against
358
359
Returns:
360
List of validation errors
361
"""
362
```
363
364
### Validation Errors
365
366
Comprehensive error classes for different types of validation failures.
367
368
```python { .api }
369
class ValidationError(Exception):
370
"""Base class for validation errors."""
371
372
def __init__(self, message: str, location: str = None, **kwargs):
373
"""
374
Initialize validation error.
375
376
Args:
377
message: Error message
378
location: Location in data where error occurred
379
"""
380
super().__init__(message)
381
self.location = location
382
383
class SpecValidationError(ValidationError):
384
"""Error for specification compliance failures."""
385
386
def __init__(self, spec_type: str, message: str, **kwargs):
387
"""
388
Initialize specification validation error.
389
390
Args:
391
spec_type: Type of specification that failed
392
message: Error message
393
"""
394
super().__init__(message, **kwargs)
395
self.spec_type = spec_type
396
397
class TypeValidationError(ValidationError):
398
"""Error for data type validation failures."""
399
400
def __init__(self, expected_type, actual_type, **kwargs):
401
"""
402
Initialize type validation error.
403
404
Args:
405
expected_type: Expected data type
406
actual_type: Actual data type found
407
"""
408
message = f"Expected type {expected_type}, got {actual_type}"
409
super().__init__(message, **kwargs)
410
self.expected_type = expected_type
411
self.actual_type = actual_type
412
413
class ShapeValidationError(ValidationError):
414
"""Error for data shape validation failures."""
415
416
def __init__(self, expected_shape, actual_shape, **kwargs):
417
"""
418
Initialize shape validation error.
419
420
Args:
421
expected_shape: Expected data shape
422
actual_shape: Actual data shape found
423
"""
424
message = f"Expected shape {expected_shape}, got {actual_shape}"
425
super().__init__(message, **kwargs)
426
self.expected_shape = expected_shape
427
self.actual_shape = actual_shape
428
429
class RequiredValueError(ValidationError):
430
"""Error for missing required values."""
431
432
def __init__(self, field_name: str, **kwargs):
433
"""
434
Initialize required value error.
435
436
Args:
437
field_name: Name of required field that is missing
438
"""
439
message = f"Required field '{field_name}' is missing"
440
super().__init__(message, **kwargs)
441
self.field_name = field_name
442
443
class ConstraintViolationError(ValidationError):
444
"""Error for constraint violations."""
445
446
def __init__(self, constraint: str, value, **kwargs):
447
"""
448
Initialize constraint violation error.
449
450
Args:
451
constraint: Description of violated constraint
452
value: Value that violated the constraint
453
"""
454
message = f"Constraint violation: {constraint}, value: {value}"
455
super().__init__(message, **kwargs)
456
self.constraint = constraint
457
self.value = value
458
```
459
460
### Validation Utilities
461
462
Utility functions for performing validation operations and reporting results.
463
464
```python { .api }
465
def validate_file(file_path: str, namespace: str = None, **kwargs) -> dict:
466
"""
467
Validate entire file against namespace specifications.
468
469
Args:
470
file_path: Path to file to validate
471
namespace: Namespace to validate against (default: auto-detect)
472
**kwargs: Validation options:
473
- strict: Enable strict validation
474
- detailed: Include detailed error information
475
476
Returns:
477
Dictionary with validation results:
478
{
479
'valid': bool,
480
'errors': list,
481
'warnings': list,
482
'summary': dict
483
}
484
"""
485
486
def validate_container(container, **kwargs) -> dict:
487
"""
488
Validate container object against its specification.
489
490
Args:
491
container: Container object to validate
492
**kwargs: Validation options
493
494
Returns:
495
Dictionary with validation results
496
"""
497
498
def generate_validation_report(validation_results: dict, output_path: str = None) -> str:
499
"""
500
Generate human-readable validation report.
501
502
Args:
503
validation_results: Results from validation operation
504
output_path: Optional path to save report
505
506
Returns:
507
Formatted validation report string
508
"""
509
510
def check_specification_compliance(builder, spec, **kwargs) -> bool:
511
"""
512
Quick compliance check for builder against specification.
513
514
Args:
515
builder: Builder to check
516
spec: Specification to check against
517
518
Returns:
519
True if compliant, False otherwise
520
"""
521
```
522
523
## Usage Examples
524
525
### Basic File Validation
526
527
```python
528
from hdmf.validate import validate_file, generate_validation_report
529
from hdmf.backends.hdf5 import HDF5IO
530
531
# Validate entire HDF5 file
532
validation_results = validate_file(
533
'experiment.h5',
534
namespace='hdmf-common',
535
strict=True,
536
detailed=True
537
)
538
539
print(f"File is valid: {validation_results['valid']}")
540
print(f"Number of errors: {len(validation_results['errors'])}")
541
print(f"Number of warnings: {len(validation_results['warnings'])}")
542
543
# Generate detailed report
544
if not validation_results['valid']:
545
report = generate_validation_report(validation_results)
546
print("Validation Report:")
547
print(report)
548
549
# Save report to file
550
with open('validation_report.txt', 'w') as f:
551
f.write(report)
552
553
# Summary statistics
554
summary = validation_results['summary']
555
print(f"Total containers validated: {summary.get('containers_checked', 0)}")
556
print(f"Total datasets validated: {summary.get('datasets_checked', 0)}")
557
```
558
559
### Container-Level Validation
560
561
```python
562
from hdmf.validate import validate_container, ValidationError
563
from hdmf.common import DynamicTable, VectorData
564
from hdmf import Container
565
import numpy as np
566
567
# Create container with potential validation issues
568
table = DynamicTable(
569
name='test_table',
570
description='Test table for validation'
571
)
572
573
# Add column with correct data
574
table.add_column('valid_column', 'Valid column', data=np.arange(10))
575
576
# Add column with problematic data (wrong type)
577
try:
578
table.add_column('problem_column', 'Problematic column',
579
data=['string', 'data', 'in', 'numeric', 'column'])
580
except Exception as e:
581
print(f"Column creation warning: {e}")
582
583
# Validate the container
584
validation_results = validate_container(
585
table,
586
strict=False, # Allow some flexibility
587
detailed=True
588
)
589
590
print(f"Container validation results:")
591
print(f"Valid: {validation_results['valid']}")
592
593
for error in validation_results['errors']:
594
print(f"Error: {error}")
595
596
for warning in validation_results['warnings']:
597
print(f"Warning: {warning}")
598
```
599
600
### Custom Validator Implementation
601
602
```python
603
from hdmf.validate import Validator, ValidationError
604
from hdmf.spec import DatasetSpec
605
import numpy as np
606
607
class NeuralDataValidator(Validator):
608
"""
609
Custom validator for neural data with domain-specific checks.
610
"""
611
612
def __init__(self, spec, **kwargs):
613
super().__init__(spec, **kwargs)
614
self.sampling_rate_min = kwargs.get('sampling_rate_min', 1.0)
615
self.sampling_rate_max = kwargs.get('sampling_rate_max', 100000.0)
616
617
def validate(self, builder, **kwargs):
618
"""Validate neural data with custom rules."""
619
errors = super().validate(builder, **kwargs)
620
621
# Add domain-specific validations
622
errors.extend(self._check_neural_data_quality(builder))
623
errors.extend(self._check_sampling_rate(builder))
624
errors.extend(self._check_channel_count(builder))
625
626
return errors
627
628
def _check_neural_data_quality(self, builder):
629
"""Check neural data for quality issues."""
630
errors = []
631
632
if hasattr(builder, 'data') and builder.data is not None:
633
data = np.array(builder.data)
634
635
# Check for unrealistic voltage values
636
if np.any(np.abs(data) > 10000): # > 10mV in µV
637
errors.append(ValidationError(
638
"Neural data contains unrealistic voltage values (>10mV)",
639
location=f"{builder.name}/data"
640
))
641
642
# Check for constant channels (likely broken)
643
if len(data.shape) > 1:
644
for ch_idx in range(data.shape[1]):
645
if np.std(data[:, ch_idx]) < 1e-6:
646
errors.append(ValidationError(
647
f"Channel {ch_idx} appears to be constant (possibly broken)",
648
location=f"{builder.name}/data/channel_{ch_idx}"
649
))
650
651
return errors
652
653
def _check_sampling_rate(self, builder):
654
"""Check sampling rate is within reasonable bounds."""
655
errors = []
656
657
if 'sampling_rate' in builder.attributes:
658
rate = builder.attributes['sampling_rate']
659
660
if rate < self.sampling_rate_min:
661
errors.append(ValidationError(
662
f"Sampling rate {rate} Hz is too low (min: {self.sampling_rate_min})",
663
location=f"{builder.name}/sampling_rate"
664
))
665
666
elif rate > self.sampling_rate_max:
667
errors.append(ValidationError(
668
f"Sampling rate {rate} Hz is too high (max: {self.sampling_rate_max})",
669
location=f"{builder.name}/sampling_rate"
670
))
671
672
return errors
673
674
def _check_channel_count(self, builder):
675
"""Check channel count is reasonable."""
676
errors = []
677
678
if hasattr(builder, 'data') and builder.data is not None:
679
data = np.array(builder.data)
680
681
if len(data.shape) > 1:
682
n_channels = data.shape[1]
683
684
if n_channels > 1000:
685
errors.append(ValidationError(
686
f"Very high channel count ({n_channels}), please verify",
687
location=f"{builder.name}/data"
688
))
689
690
elif n_channels == 0:
691
errors.append(ValidationError(
692
"No channels found in neural data",
693
location=f"{builder.name}/data"
694
))
695
696
return errors
697
698
# Usage
699
neural_spec = DatasetSpec(
700
doc='Neural recording data',
701
name='neural_data',
702
dtype='float64',
703
shape=(None, None),
704
dims=['time', 'channels']
705
)
706
707
neural_validator = NeuralDataValidator(
708
neural_spec,
709
sampling_rate_min=100.0,
710
sampling_rate_max=50000.0
711
)
712
713
# Validate neural data builder
714
from hdmf.build import DatasetBuilder
715
neural_builder = DatasetBuilder(
716
name='neural_data',
717
data=np.random.randn(30000, 64) * 100, # 64 channels, 30k samples
718
attributes={'sampling_rate': 30000.0}
719
)
720
721
validation_errors = neural_validator.validate(neural_builder)
722
if validation_errors:
723
for error in validation_errors:
724
print(f"Validation error: {error}")
725
else:
726
print("Neural data passed validation")
727
```
728
729
### Batch Validation of Multiple Files
730
731
```python
732
from hdmf.validate import validate_file
733
import os
734
from pathlib import Path
735
import json
736
737
def batch_validate_files(directory_path: str, file_pattern: str = "*.h5",
738
namespace: str = 'hdmf-common') -> dict:
739
"""
740
Validate all files matching pattern in directory.
741
742
Args:
743
directory_path: Directory containing files to validate
744
file_pattern: File pattern to match
745
namespace: Namespace to validate against
746
747
Returns:
748
Dictionary with results for each file
749
"""
750
751
results = {}
752
directory = Path(directory_path)
753
754
# Find all matching files
755
files_to_validate = list(directory.glob(file_pattern))
756
print(f"Found {len(files_to_validate)} files to validate")
757
758
for file_path in files_to_validate:
759
print(f"Validating {file_path.name}...")
760
761
try:
762
validation_result = validate_file(
763
str(file_path),
764
namespace=namespace,
765
strict=False,
766
detailed=True
767
)
768
769
results[str(file_path)] = {
770
'valid': validation_result['valid'],
771
'error_count': len(validation_result['errors']),
772
'warning_count': len(validation_result['warnings']),
773
'errors': validation_result['errors'][:5], # First 5 errors
774
'summary': validation_result['summary']
775
}
776
777
except Exception as e:
778
results[str(file_path)] = {
779
'valid': False,
780
'error_count': 1,
781
'warning_count': 0,
782
'errors': [f"Validation failed: {str(e)}"],
783
'summary': {}
784
}
785
786
return results
787
788
# Run batch validation
789
validation_results = batch_validate_files(
790
'./experiment_data/',
791
file_pattern='*.h5',
792
namespace='hdmf-common'
793
)
794
795
# Generate summary report
796
total_files = len(validation_results)
797
valid_files = sum(1 for r in validation_results.values() if r['valid'])
798
total_errors = sum(r['error_count'] for r in validation_results.values())
799
800
print(f"\nBatch Validation Summary:")
801
print(f"Total files: {total_files}")
802
print(f"Valid files: {valid_files}")
803
print(f"Invalid files: {total_files - valid_files}")
804
print(f"Total errors: {total_errors}")
805
806
# Save detailed results
807
with open('batch_validation_results.json', 'w') as f:
808
json.dump(validation_results, f, indent=2)
809
810
# Print problematic files
811
print(f"\nProblematic files:")
812
for file_path, result in validation_results.items():
813
if not result['valid']:
814
print(f" {Path(file_path).name}: {result['error_count']} errors")
815
for error in result['errors'][:3]: # Show first 3 errors
816
print(f" - {error}")
817
```
818
819
### Real-time Validation During Data Creation
820
821
```python
822
from hdmf.validate import Validator, validate_container
823
from hdmf.common import DynamicTable
824
from hdmf import docval, getargs
825
import numpy as np
826
827
class ValidatedDynamicTable(DynamicTable):
828
"""
829
DynamicTable with real-time validation during data entry.
830
"""
831
832
def __init__(self, **kwargs):
833
super().__init__(**kwargs)
834
self.validation_enabled = kwargs.get('validate_on_add', True)
835
self.validation_errors = []
836
837
@docval({'name': 'data', 'type': dict, 'doc': 'Row data to add'})
838
def add_validated_row(self, **kwargs):
839
"""Add row with validation."""
840
data = getargs('data', kwargs)
841
842
if self.validation_enabled:
843
# Validate data before adding
844
validation_errors = self._validate_row_data(data)
845
846
if validation_errors:
847
error_msg = f"Row validation failed: {validation_errors}"
848
if kwargs.get('strict', True):
849
raise ValueError(error_msg)
850
else:
851
print(f"Warning: {error_msg}")
852
self.validation_errors.extend(validation_errors)
853
854
# Add row if validation passes or warnings allowed
855
self.add_row(**data)
856
857
def _validate_row_data(self, data):
858
"""Validate individual row data."""
859
errors = []
860
861
# Check required columns
862
for col_name in self.colnames:
863
if col_name not in data:
864
errors.append(f"Missing required column: {col_name}")
865
866
# Check column data types and ranges
867
for col_name, value in data.items():
868
if col_name in self.colnames:
869
column = self.get_column(col_name)
870
871
# Basic type checking
872
if hasattr(column, 'dtype'):
873
expected_dtype = column.dtype
874
if expected_dtype == 'int' and not isinstance(value, int):
875
errors.append(f"Column {col_name} expects int, got {type(value)}")
876
elif expected_dtype == 'float' and not isinstance(value, (int, float)):
877
errors.append(f"Column {col_name} expects float, got {type(value)}")
878
879
# Range checking for numeric columns
880
if col_name == 'age' and isinstance(value, (int, float)):
881
if value < 0 or value > 365: # Days
882
errors.append(f"Age {value} is outside valid range [0, 365]")
883
884
elif col_name == 'weight' and isinstance(value, (int, float)):
885
if value < 0 or value > 100: # Grams
886
errors.append(f"Weight {value} is outside valid range [0, 100]")
887
888
return errors
889
890
def validate_table(self):
891
"""Validate entire table and return results."""
892
return validate_container(self, detailed=True)
893
894
def get_validation_summary(self):
895
"""Get summary of validation issues."""
896
return {
897
'total_errors': len(self.validation_errors),
898
'errors': self.validation_errors,
899
'rows': len(self)
900
}
901
902
# Usage
903
validated_table = ValidatedDynamicTable(
904
name='subjects',
905
description='Subject data with validation',
906
validate_on_add=True
907
)
908
909
validated_table.add_column('subject_id', 'Subject ID')
910
validated_table.add_column('age', 'Age in days', dtype='int')
911
validated_table.add_column('weight', 'Weight in grams', dtype='float')
912
913
# Add valid data
914
try:
915
validated_table.add_validated_row(
916
data={'subject_id': 'mouse_001', 'age': 90, 'weight': 25.5}
917
)
918
print("Successfully added valid row")
919
except ValueError as e:
920
print(f"Validation error: {e}")
921
922
# Try to add invalid data
923
try:
924
validated_table.add_validated_row(
925
data={'subject_id': 'mouse_002', 'age': -10, 'weight': 150.0}, # Invalid values
926
strict=False # Allow warnings
927
)
928
print("Added row with warnings")
929
except ValueError as e:
930
print(f"Validation error: {e}")
931
932
# Check validation summary
933
summary = validated_table.get_validation_summary()
934
print(f"Validation summary: {summary}")
935
936
# Final table validation
937
final_validation = validated_table.validate_table()
938
print(f"Final table validation: {final_validation['valid']}")
939
```