or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

build-system.mdcommon-data.mdcontainers.mddata-utils.mdindex.mdio-backends.mdquery.mdspecification.mdterm-sets.mdutils.mdvalidation.md

validation.mddocs/

0

# Validation System

1

2

HDMF provides comprehensive validation of data against specifications with detailed error reporting and schema compliance checking. The validation system ensures data integrity, specification compliance, and provides detailed feedback for debugging and quality assurance.

3

4

## Capabilities

5

6

### Validator Classes

7

8

Core validator classes for different types of data validation against specifications.

9

10

```python { .api }

11

class Validator:

12

"""

13

Base validator class for validating data against specifications.

14

15

Provides the foundation for all validation operations in HDMF,

16

including schema validation, type checking, and constraint verification.

17

"""

18

19

def __init__(self, spec, **kwargs):

20

"""

21

Initialize validator.

22

23

Args:

24

spec: Specification object to validate against

25

**kwargs: Additional validator options:

26

- strict: Enable strict validation mode

27

- ignore_missing: Ignore missing optional fields

28

"""

29

30

def validate(self, builder, **kwargs) -> list:

31

"""

32

Validate builder against specification.

33

34

Args:

35

builder: Builder object to validate

36

**kwargs: Validation options

37

38

Returns:

39

List of validation errors (empty if valid)

40

"""

41

42

### Validation Error Classes

43

44

Specific error classes for different types of validation failures with detailed error reporting.

45

46

```python { .api }

47

class Error(Exception):

48

"""

49

Base class for HDMF validation errors.

50

51

Provides structured error reporting with location information

52

and detailed messages for debugging validation failures.

53

"""

54

55

def __init__(self, location: str, message: str = None):

56

"""

57

Initialize validation error.

58

59

Args:

60

location: Location where error occurred

61

message: Detailed error message

62

"""

63

64

class DtypeError(Error):

65

"""

66

Error for data type mismatches in validation.

67

68

Raised when data types don't match specification requirements.

69

"""

70

pass

71

72

class MissingError(Error):

73

"""

74

Error for missing required components.

75

76

Raised when required datasets, groups, or attributes are missing.

77

"""

78

pass

79

80

class ExpectedArrayError(Error):

81

"""

82

Error for expected array data validation failures.

83

84

Raised when array-like data doesn't meet shape or type requirements.

85

"""

86

pass

87

88

class ShapeError(Error):

89

"""

90

Error for array shape validation failures.

91

92

Raised when array shapes don't match specification constraints.

93

"""

94

pass

95

96

class MissingDataType(Error):

97

"""

98

Error for missing data type specifications.

99

100

Raised when referenced data types are not found in namespace.

101

"""

102

pass

103

104

class IllegalLinkError(Error):

105

"""

106

Error for illegal link operations in validation.

107

108

Raised when links violate specification constraints.

109

"""

110

pass

111

112

class IncorrectDataType(Error):

113

"""

114

Error for incorrect data type usage.

115

116

Raised when data types are incorrect for the context.

117

"""

118

pass

119

120

class IncorrectQuantityError(Error):

121

"""

122

Error for incorrect quantity specifications.

123

124

Raised when quantities don't match cardinality constraints.

125

"""

126

pass

127

```

128

129

def check_type(self, builder) -> list:

130

"""

131

Check data type compliance.

132

133

Args:

134

builder: Builder to check

135

136

Returns:

137

List of type validation errors

138

"""

139

140

def check_shape(self, builder) -> list:

141

"""

142

Check data shape compliance.

143

144

Args:

145

builder: Builder to check

146

147

Returns:

148

List of shape validation errors

149

"""

150

151

def check_attributes(self, builder) -> list:

152

"""

153

Check attribute requirements and values.

154

155

Args:

156

builder: Builder to check

157

158

Returns:

159

List of attribute validation errors

160

"""

161

162

@property

163

def spec(self):

164

"""Specification being validated against."""

165

166

class GroupValidator(Validator):

167

"""

168

Validator for group (container) specifications.

169

170

Validates hierarchical container structures including nested groups,

171

datasets, attributes, and links against group specifications.

172

"""

173

174

def __init__(self, spec, **kwargs):

175

"""

176

Initialize group validator.

177

178

Args:

179

spec: GroupSpec to validate against

180

"""

181

182

def validate(self, builder, **kwargs) -> list:

183

"""

184

Validate group builder against specification.

185

186

Args:

187

builder: GroupBuilder to validate

188

189

Returns:

190

List of validation errors

191

"""

192

193

def check_groups(self, builder) -> list:

194

"""

195

Check nested group requirements.

196

197

Args:

198

builder: GroupBuilder to check

199

200

Returns:

201

List of group validation errors

202

"""

203

204

def check_datasets(self, builder) -> list:

205

"""

206

Check dataset requirements.

207

208

Args:

209

builder: GroupBuilder to check

210

211

Returns:

212

List of dataset validation errors

213

"""

214

215

def check_links(self, builder) -> list:

216

"""

217

Check link requirements and targets.

218

219

Args:

220

builder: GroupBuilder to check

221

222

Returns:

223

List of link validation errors

224

"""

225

226

class DatasetValidator(Validator):

227

"""

228

Validator for dataset specifications.

229

230

Validates dataset structures including data types, shapes,

231

dimensions, and associated attributes against dataset specifications.

232

"""

233

234

def __init__(self, spec, **kwargs):

235

"""

236

Initialize dataset validator.

237

238

Args:

239

spec: DatasetSpec to validate against

240

"""

241

242

def validate(self, builder, **kwargs) -> list:

243

"""

244

Validate dataset builder against specification.

245

246

Args:

247

builder: DatasetBuilder to validate

248

249

Returns:

250

List of validation errors

251

"""

252

253

def check_data_type(self, builder) -> list:

254

"""

255

Check data type compliance including compound types.

256

257

Args:

258

builder: DatasetBuilder to check

259

260

Returns:

261

List of data type validation errors

262

"""

263

264

def check_dimensions(self, builder) -> list:

265

"""

266

Check dimension names and constraints.

267

268

Args:

269

builder: DatasetBuilder to check

270

271

Returns:

272

List of dimension validation errors

273

"""

274

275

class AttributeValidator(Validator):

276

"""

277

Validator for attribute specifications.

278

279

Validates metadata attributes including values, types,

280

and constraints against attribute specifications.

281

"""

282

283

def __init__(self, spec, **kwargs):

284

"""

285

Initialize attribute validator.

286

287

Args:

288

spec: AttributeSpec to validate against

289

"""

290

291

def validate(self, builder, **kwargs) -> list:

292

"""

293

Validate attribute against specification.

294

295

Args:

296

builder: Builder containing the attribute

297

298

Returns:

299

List of validation errors

300

"""

301

302

def check_value_constraints(self, value) -> list:

303

"""

304

Check value against specification constraints.

305

306

Args:

307

value: Attribute value to check

308

309

Returns:

310

List of constraint validation errors

311

"""

312

```

313

314

### Validator Management

315

316

Classes for managing and coordinating validation across different data types.

317

318

```python { .api }

319

class ValidatorMap:

320

"""

321

Mapping system for validators across different data types.

322

323

Manages the association between data types and their corresponding

324

validators, enabling automatic validator selection and coordination.

325

"""

326

327

def __init__(self, **kwargs):

328

"""Initialize validator map."""

329

330

def register_validator(self, neurodata_type: str, validator_class):

331

"""

332

Register validator class for a data type.

333

334

Args:

335

neurodata_type: Name of the data type

336

validator_class: Validator class to register

337

"""

338

339

def get_validator(self, neurodata_type: str, spec) -> Validator:

340

"""

341

Get validator instance for a data type.

342

343

Args:

344

neurodata_type: Name of the data type

345

spec: Specification to validate against

346

347

Returns:

348

Validator instance for the data type

349

"""

350

351

def validate_builder(self, builder, spec, **kwargs) -> list:

352

"""

353

Validate builder using appropriate validator.

354

355

Args:

356

builder: Builder to validate

357

spec: Specification to validate against

358

359

Returns:

360

List of validation errors

361

"""

362

```

363

364

### Validation Errors

365

366

Comprehensive error classes for different types of validation failures.

367

368

```python { .api }

369

class ValidationError(Exception):

370

"""Base class for validation errors."""

371

372

def __init__(self, message: str, location: str = None, **kwargs):

373

"""

374

Initialize validation error.

375

376

Args:

377

message: Error message

378

location: Location in data where error occurred

379

"""

380

super().__init__(message)

381

self.location = location

382

383

class SpecValidationError(ValidationError):

384

"""Error for specification compliance failures."""

385

386

def __init__(self, spec_type: str, message: str, **kwargs):

387

"""

388

Initialize specification validation error.

389

390

Args:

391

spec_type: Type of specification that failed

392

message: Error message

393

"""

394

super().__init__(message, **kwargs)

395

self.spec_type = spec_type

396

397

class TypeValidationError(ValidationError):

398

"""Error for data type validation failures."""

399

400

def __init__(self, expected_type, actual_type, **kwargs):

401

"""

402

Initialize type validation error.

403

404

Args:

405

expected_type: Expected data type

406

actual_type: Actual data type found

407

"""

408

message = f"Expected type {expected_type}, got {actual_type}"

409

super().__init__(message, **kwargs)

410

self.expected_type = expected_type

411

self.actual_type = actual_type

412

413

class ShapeValidationError(ValidationError):

414

"""Error for data shape validation failures."""

415

416

def __init__(self, expected_shape, actual_shape, **kwargs):

417

"""

418

Initialize shape validation error.

419

420

Args:

421

expected_shape: Expected data shape

422

actual_shape: Actual data shape found

423

"""

424

message = f"Expected shape {expected_shape}, got {actual_shape}"

425

super().__init__(message, **kwargs)

426

self.expected_shape = expected_shape

427

self.actual_shape = actual_shape

428

429

class RequiredValueError(ValidationError):

430

"""Error for missing required values."""

431

432

def __init__(self, field_name: str, **kwargs):

433

"""

434

Initialize required value error.

435

436

Args:

437

field_name: Name of required field that is missing

438

"""

439

message = f"Required field '{field_name}' is missing"

440

super().__init__(message, **kwargs)

441

self.field_name = field_name

442

443

class ConstraintViolationError(ValidationError):

444

"""Error for constraint violations."""

445

446

def __init__(self, constraint: str, value, **kwargs):

447

"""

448

Initialize constraint violation error.

449

450

Args:

451

constraint: Description of violated constraint

452

value: Value that violated the constraint

453

"""

454

message = f"Constraint violation: {constraint}, value: {value}"

455

super().__init__(message, **kwargs)

456

self.constraint = constraint

457

self.value = value

458

```

459

460

### Validation Utilities

461

462

Utility functions for performing validation operations and reporting results.

463

464

```python { .api }

465

def validate_file(file_path: str, namespace: str = None, **kwargs) -> dict:

466

"""

467

Validate entire file against namespace specifications.

468

469

Args:

470

file_path: Path to file to validate

471

namespace: Namespace to validate against (default: auto-detect)

472

**kwargs: Validation options:

473

- strict: Enable strict validation

474

- detailed: Include detailed error information

475

476

Returns:

477

Dictionary with validation results:

478

{

479

'valid': bool,

480

'errors': list,

481

'warnings': list,

482

'summary': dict

483

}

484

"""

485

486

def validate_container(container, **kwargs) -> dict:

487

"""

488

Validate container object against its specification.

489

490

Args:

491

container: Container object to validate

492

**kwargs: Validation options

493

494

Returns:

495

Dictionary with validation results

496

"""

497

498

def generate_validation_report(validation_results: dict, output_path: str = None) -> str:

499

"""

500

Generate human-readable validation report.

501

502

Args:

503

validation_results: Results from validation operation

504

output_path: Optional path to save report

505

506

Returns:

507

Formatted validation report string

508

"""

509

510

def check_specification_compliance(builder, spec, **kwargs) -> bool:

511

"""

512

Quick compliance check for builder against specification.

513

514

Args:

515

builder: Builder to check

516

spec: Specification to check against

517

518

Returns:

519

True if compliant, False otherwise

520

"""

521

```

522

523

## Usage Examples

524

525

### Basic File Validation

526

527

```python

528

from hdmf.validate import validate_file, generate_validation_report

529

from hdmf.backends.hdf5 import HDF5IO

530

531

# Validate entire HDF5 file

532

validation_results = validate_file(

533

'experiment.h5',

534

namespace='hdmf-common',

535

strict=True,

536

detailed=True

537

)

538

539

print(f"File is valid: {validation_results['valid']}")

540

print(f"Number of errors: {len(validation_results['errors'])}")

541

print(f"Number of warnings: {len(validation_results['warnings'])}")

542

543

# Generate detailed report

544

if not validation_results['valid']:

545

report = generate_validation_report(validation_results)

546

print("Validation Report:")

547

print(report)

548

549

# Save report to file

550

with open('validation_report.txt', 'w') as f:

551

f.write(report)

552

553

# Summary statistics

554

summary = validation_results['summary']

555

print(f"Total containers validated: {summary.get('containers_checked', 0)}")

556

print(f"Total datasets validated: {summary.get('datasets_checked', 0)}")

557

```

558

559

### Container-Level Validation

560

561

```python

562

from hdmf.validate import validate_container, ValidationError

563

from hdmf.common import DynamicTable, VectorData

564

from hdmf import Container

565

import numpy as np

566

567

# Create container with potential validation issues

568

table = DynamicTable(

569

name='test_table',

570

description='Test table for validation'

571

)

572

573

# Add column with correct data

574

table.add_column('valid_column', 'Valid column', data=np.arange(10))

575

576

# Add column with problematic data (wrong type)

577

try:

578

table.add_column('problem_column', 'Problematic column',

579

data=['string', 'data', 'in', 'numeric', 'column'])

580

except Exception as e:

581

print(f"Column creation warning: {e}")

582

583

# Validate the container

584

validation_results = validate_container(

585

table,

586

strict=False, # Allow some flexibility

587

detailed=True

588

)

589

590

print(f"Container validation results:")

591

print(f"Valid: {validation_results['valid']}")

592

593

for error in validation_results['errors']:

594

print(f"Error: {error}")

595

596

for warning in validation_results['warnings']:

597

print(f"Warning: {warning}")

598

```

599

600

### Custom Validator Implementation

601

602

```python

603

from hdmf.validate import Validator, ValidationError

604

from hdmf.spec import DatasetSpec

605

import numpy as np

606

607

class NeuralDataValidator(Validator):

608

"""

609

Custom validator for neural data with domain-specific checks.

610

"""

611

612

def __init__(self, spec, **kwargs):

613

super().__init__(spec, **kwargs)

614

self.sampling_rate_min = kwargs.get('sampling_rate_min', 1.0)

615

self.sampling_rate_max = kwargs.get('sampling_rate_max', 100000.0)

616

617

def validate(self, builder, **kwargs):

618

"""Validate neural data with custom rules."""

619

errors = super().validate(builder, **kwargs)

620

621

# Add domain-specific validations

622

errors.extend(self._check_neural_data_quality(builder))

623

errors.extend(self._check_sampling_rate(builder))

624

errors.extend(self._check_channel_count(builder))

625

626

return errors

627

628

def _check_neural_data_quality(self, builder):

629

"""Check neural data for quality issues."""

630

errors = []

631

632

if hasattr(builder, 'data') and builder.data is not None:

633

data = np.array(builder.data)

634

635

# Check for unrealistic voltage values

636

if np.any(np.abs(data) > 10000): # > 10mV in µV

637

errors.append(ValidationError(

638

"Neural data contains unrealistic voltage values (>10mV)",

639

location=f"{builder.name}/data"

640

))

641

642

# Check for constant channels (likely broken)

643

if len(data.shape) > 1:

644

for ch_idx in range(data.shape[1]):

645

if np.std(data[:, ch_idx]) < 1e-6:

646

errors.append(ValidationError(

647

f"Channel {ch_idx} appears to be constant (possibly broken)",

648

location=f"{builder.name}/data/channel_{ch_idx}"

649

))

650

651

return errors

652

653

def _check_sampling_rate(self, builder):

654

"""Check sampling rate is within reasonable bounds."""

655

errors = []

656

657

if 'sampling_rate' in builder.attributes:

658

rate = builder.attributes['sampling_rate']

659

660

if rate < self.sampling_rate_min:

661

errors.append(ValidationError(

662

f"Sampling rate {rate} Hz is too low (min: {self.sampling_rate_min})",

663

location=f"{builder.name}/sampling_rate"

664

))

665

666

elif rate > self.sampling_rate_max:

667

errors.append(ValidationError(

668

f"Sampling rate {rate} Hz is too high (max: {self.sampling_rate_max})",

669

location=f"{builder.name}/sampling_rate"

670

))

671

672

return errors

673

674

def _check_channel_count(self, builder):

675

"""Check channel count is reasonable."""

676

errors = []

677

678

if hasattr(builder, 'data') and builder.data is not None:

679

data = np.array(builder.data)

680

681

if len(data.shape) > 1:

682

n_channels = data.shape[1]

683

684

if n_channels > 1000:

685

errors.append(ValidationError(

686

f"Very high channel count ({n_channels}), please verify",

687

location=f"{builder.name}/data"

688

))

689

690

elif n_channels == 0:

691

errors.append(ValidationError(

692

"No channels found in neural data",

693

location=f"{builder.name}/data"

694

))

695

696

return errors

697

698

# Usage

699

neural_spec = DatasetSpec(

700

doc='Neural recording data',

701

name='neural_data',

702

dtype='float64',

703

shape=(None, None),

704

dims=['time', 'channels']

705

)

706

707

neural_validator = NeuralDataValidator(

708

neural_spec,

709

sampling_rate_min=100.0,

710

sampling_rate_max=50000.0

711

)

712

713

# Validate neural data builder

714

from hdmf.build import DatasetBuilder

715

neural_builder = DatasetBuilder(

716

name='neural_data',

717

data=np.random.randn(30000, 64) * 100, # 64 channels, 30k samples

718

attributes={'sampling_rate': 30000.0}

719

)

720

721

validation_errors = neural_validator.validate(neural_builder)

722

if validation_errors:

723

for error in validation_errors:

724

print(f"Validation error: {error}")

725

else:

726

print("Neural data passed validation")

727

```

728

729

### Batch Validation of Multiple Files

730

731

```python

732

from hdmf.validate import validate_file

733

import os

734

from pathlib import Path

735

import json

736

737

def batch_validate_files(directory_path: str, file_pattern: str = "*.h5",

738

namespace: str = 'hdmf-common') -> dict:

739

"""

740

Validate all files matching pattern in directory.

741

742

Args:

743

directory_path: Directory containing files to validate

744

file_pattern: File pattern to match

745

namespace: Namespace to validate against

746

747

Returns:

748

Dictionary with results for each file

749

"""

750

751

results = {}

752

directory = Path(directory_path)

753

754

# Find all matching files

755

files_to_validate = list(directory.glob(file_pattern))

756

print(f"Found {len(files_to_validate)} files to validate")

757

758

for file_path in files_to_validate:

759

print(f"Validating {file_path.name}...")

760

761

try:

762

validation_result = validate_file(

763

str(file_path),

764

namespace=namespace,

765

strict=False,

766

detailed=True

767

)

768

769

results[str(file_path)] = {

770

'valid': validation_result['valid'],

771

'error_count': len(validation_result['errors']),

772

'warning_count': len(validation_result['warnings']),

773

'errors': validation_result['errors'][:5], # First 5 errors

774

'summary': validation_result['summary']

775

}

776

777

except Exception as e:

778

results[str(file_path)] = {

779

'valid': False,

780

'error_count': 1,

781

'warning_count': 0,

782

'errors': [f"Validation failed: {str(e)}"],

783

'summary': {}

784

}

785

786

return results

787

788

# Run batch validation

789

validation_results = batch_validate_files(

790

'./experiment_data/',

791

file_pattern='*.h5',

792

namespace='hdmf-common'

793

)

794

795

# Generate summary report

796

total_files = len(validation_results)

797

valid_files = sum(1 for r in validation_results.values() if r['valid'])

798

total_errors = sum(r['error_count'] for r in validation_results.values())

799

800

print(f"\nBatch Validation Summary:")

801

print(f"Total files: {total_files}")

802

print(f"Valid files: {valid_files}")

803

print(f"Invalid files: {total_files - valid_files}")

804

print(f"Total errors: {total_errors}")

805

806

# Save detailed results

807

with open('batch_validation_results.json', 'w') as f:

808

json.dump(validation_results, f, indent=2)

809

810

# Print problematic files

811

print(f"\nProblematic files:")

812

for file_path, result in validation_results.items():

813

if not result['valid']:

814

print(f" {Path(file_path).name}: {result['error_count']} errors")

815

for error in result['errors'][:3]: # Show first 3 errors

816

print(f" - {error}")

817

```

818

819

### Real-time Validation During Data Creation

820

821

```python

822

from hdmf.validate import Validator, validate_container

823

from hdmf.common import DynamicTable

824

from hdmf import docval, getargs

825

import numpy as np

826

827

class ValidatedDynamicTable(DynamicTable):

828

"""

829

DynamicTable with real-time validation during data entry.

830

"""

831

832

def __init__(self, **kwargs):

833

super().__init__(**kwargs)

834

self.validation_enabled = kwargs.get('validate_on_add', True)

835

self.validation_errors = []

836

837

@docval({'name': 'data', 'type': dict, 'doc': 'Row data to add'})

838

def add_validated_row(self, **kwargs):

839

"""Add row with validation."""

840

data = getargs('data', kwargs)

841

842

if self.validation_enabled:

843

# Validate data before adding

844

validation_errors = self._validate_row_data(data)

845

846

if validation_errors:

847

error_msg = f"Row validation failed: {validation_errors}"

848

if kwargs.get('strict', True):

849

raise ValueError(error_msg)

850

else:

851

print(f"Warning: {error_msg}")

852

self.validation_errors.extend(validation_errors)

853

854

# Add row if validation passes or warnings allowed

855

self.add_row(**data)

856

857

def _validate_row_data(self, data):

858

"""Validate individual row data."""

859

errors = []

860

861

# Check required columns

862

for col_name in self.colnames:

863

if col_name not in data:

864

errors.append(f"Missing required column: {col_name}")

865

866

# Check column data types and ranges

867

for col_name, value in data.items():

868

if col_name in self.colnames:

869

column = self.get_column(col_name)

870

871

# Basic type checking

872

if hasattr(column, 'dtype'):

873

expected_dtype = column.dtype

874

if expected_dtype == 'int' and not isinstance(value, int):

875

errors.append(f"Column {col_name} expects int, got {type(value)}")

876

elif expected_dtype == 'float' and not isinstance(value, (int, float)):

877

errors.append(f"Column {col_name} expects float, got {type(value)}")

878

879

# Range checking for numeric columns

880

if col_name == 'age' and isinstance(value, (int, float)):

881

if value < 0 or value > 365: # Days

882

errors.append(f"Age {value} is outside valid range [0, 365]")

883

884

elif col_name == 'weight' and isinstance(value, (int, float)):

885

if value < 0 or value > 100: # Grams

886

errors.append(f"Weight {value} is outside valid range [0, 100]")

887

888

return errors

889

890

def validate_table(self):

891

"""Validate entire table and return results."""

892

return validate_container(self, detailed=True)

893

894

def get_validation_summary(self):

895

"""Get summary of validation issues."""

896

return {

897

'total_errors': len(self.validation_errors),

898

'errors': self.validation_errors,

899

'rows': len(self)

900

}

901

902

# Usage

903

validated_table = ValidatedDynamicTable(

904

name='subjects',

905

description='Subject data with validation',

906

validate_on_add=True

907

)

908

909

validated_table.add_column('subject_id', 'Subject ID')

910

validated_table.add_column('age', 'Age in days', dtype='int')

911

validated_table.add_column('weight', 'Weight in grams', dtype='float')

912

913

# Add valid data

914

try:

915

validated_table.add_validated_row(

916

data={'subject_id': 'mouse_001', 'age': 90, 'weight': 25.5}

917

)

918

print("Successfully added valid row")

919

except ValueError as e:

920

print(f"Validation error: {e}")

921

922

# Try to add invalid data

923

try:

924

validated_table.add_validated_row(

925

data={'subject_id': 'mouse_002', 'age': -10, 'weight': 150.0}, # Invalid values

926

strict=False # Allow warnings

927

)

928

print("Added row with warnings")

929

except ValueError as e:

930

print(f"Validation error: {e}")

931

932

# Check validation summary

933

summary = validated_table.get_validation_summary()

934

print(f"Validation summary: {summary}")

935

936

# Final table validation

937

final_validation = validated_table.validate_table()

938

print(f"Final table validation: {final_validation['valid']}")

939

```