or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

build-system.mdcommon-data.mdcontainers.mddata-utils.mdindex.mdio-backends.mdquery.mdspecification.mdterm-sets.mdutils.mdvalidation.md

common-data.mddocs/

0

# Common Data Structures

1

2

HDMF provides pre-built data structures for scientific data including dynamic tables, vector data, sparse matrices, and multi-container systems. These structures are automatically generated from specifications and provide standardized patterns for organizing complex scientific datasets.

3

4

## Capabilities

5

6

### Dynamic Tables

7

8

Flexible table structures that can accommodate varying column types and dynamic schema evolution.

9

10

```python { .api }

11

class DynamicTable(Container):

12

"""

13

Dynamic table implementation with flexible columns and metadata support.

14

15

Provides a table structure where columns can be added dynamically

16

and rows can contain different data types with full metadata preservation.

17

"""

18

19

def __init__(self, name: str, description: str, **kwargs):

20

"""

21

Initialize dynamic table.

22

23

Args:

24

name: Name of the table

25

description: Description of the table's purpose

26

**kwargs: Additional table properties:

27

- id: VectorData for row identifiers

28

- columns: List of VectorData columns

29

- colnames: List of column names

30

"""

31

32

def add_column(self, name: str, description: str, data=None, **kwargs):

33

"""

34

Add a column to the table.

35

36

Args:

37

name: Column name

38

description: Column description

39

data: Initial data for the column

40

**kwargs: Additional column properties:

41

- dtype: Data type for the column

42

- index: Whether this column needs an index

43

"""

44

45

def add_row(self, **kwargs):

46

"""

47

Add a row to the table.

48

49

Args:

50

**kwargs: Column values for the new row

51

"""

52

53

def get_column(self, name: str) -> 'VectorData':

54

"""

55

Get column by name.

56

57

Args:

58

name: Column name

59

60

Returns:

61

VectorData object for the column

62

"""

63

64

def to_dataframe(self):

65

"""

66

Convert table to pandas DataFrame.

67

68

Returns:

69

pandas.DataFrame representation of the table

70

"""

71

72

@classmethod

73

def from_dataframe(cls, df, name: str, **kwargs):

74

"""

75

Create DynamicTable from pandas DataFrame.

76

77

Args:

78

df: Source pandas DataFrame

79

name: Name for the new table

80

**kwargs: Additional table properties

81

82

Returns:

83

DynamicTable instance created from DataFrame

84

"""

85

86

def __getitem__(self, key):

87

"""Get rows or columns by index/name."""

88

89

def __len__(self) -> int:

90

"""Number of rows in the table."""

91

92

@property

93

def columns(self) -> tuple:

94

"""Tuple of column objects."""

95

96

@property

97

def colnames(self) -> tuple:

98

"""Tuple of column names."""

99

100

class AlignedDynamicTable(DynamicTable):

101

"""

102

Dynamic table with synchronized columns for related data.

103

104

Ensures that related columns maintain alignment and provides

105

specialized access patterns for multi-dimensional scientific data.

106

"""

107

108

def __init__(self, name: str, description: str, **kwargs):

109

"""

110

Initialize aligned dynamic table.

111

112

Args:

113

name: Name of the table

114

description: Description of the table

115

**kwargs: Additional properties:

116

- category_tables: Dictionary of related sub-tables

117

"""

118

119

def add_category_table(self, name: str, description: str, **kwargs):

120

"""

121

Add a category table for grouped data.

122

123

Args:

124

name: Category table name

125

description: Description of the category

126

"""

127

128

def get_category_table(self, name: str) -> DynamicTable:

129

"""

130

Get category table by name.

131

132

Args:

133

name: Category table name

134

135

Returns:

136

DynamicTable for the category

137

"""

138

```

139

140

### Vector Data Structures

141

142

Core data structures for storing and indexing vector data with support for ragged arrays.

143

144

```python { .api }

145

class VectorData(Data):

146

"""

147

Vector data implementation for table columns and array data.

148

149

Stores 1D array data with metadata and provides indexing capabilities

150

for both regular and ragged array structures.

151

"""

152

153

def __init__(self, name: str, description: str, data, **kwargs):

154

"""

155

Initialize vector data.

156

157

Args:

158

name: Name of the vector data

159

description: Description of the data

160

data: Array-like data content

161

**kwargs: Additional properties:

162

- unit: Unit of measurement

163

- resolution: Data resolution

164

- conversion: Conversion factor

165

"""

166

167

def append(self, data):

168

"""

169

Append data to the vector.

170

171

Args:

172

data: Data to append

173

"""

174

175

def extend(self, data):

176

"""

177

Extend vector with iterable data.

178

179

Args:

180

data: Iterable data to extend with

181

"""

182

183

@property

184

def unit(self) -> str:

185

"""Unit of measurement for the data."""

186

187

@property

188

def resolution(self) -> float:

189

"""Resolution of the data."""

190

191

class VectorIndex(VectorData):

192

"""

193

Vector index implementation for indexing into ragged arrays.

194

195

Provides indexing capabilities for VectorData that contains

196

variable-length elements, enabling efficient access to ragged data structures.

197

"""

198

199

def __init__(self, name: str, data, target: VectorData, **kwargs):

200

"""

201

Initialize vector index.

202

203

Args:

204

name: Name of the index

205

data: Index data (cumulative counts)

206

target: Target VectorData being indexed

207

**kwargs: Additional properties

208

"""

209

210

def __getitem__(self, key):

211

"""Get indexed data slice."""

212

213

def add_vector(self, data):

214

"""

215

Add a vector to the indexed data.

216

217

Args:

218

data: Vector data to add

219

"""

220

221

@property

222

def target(self) -> VectorData:

223

"""Target VectorData being indexed."""

224

225

class ElementIdentifiers(Data):

226

"""

227

Element identifier implementation for unique element tracking.

228

229

Stores unique identifiers for data elements, enabling

230

cross-referencing and relationship tracking within datasets.

231

"""

232

233

def __init__(self, name: str = 'element_id', data=None, **kwargs):

234

"""

235

Initialize element identifiers.

236

237

Args:

238

name: Name for the identifiers (default: 'element_id')

239

data: Initial identifier data

240

"""

241

242

def add_ref(self, container):

243

"""

244

Add reference to a container.

245

246

Args:

247

container: Container to reference

248

249

Returns:

250

Identifier for the reference

251

"""

252

```

253

254

### Table Regions and References

255

256

Specialized structures for referencing and linking table data.

257

258

```python { .api }

259

class DynamicTableRegion(VectorData):

260

"""

261

Dynamic table region for referencing rows in DynamicTable objects.

262

263

Enables creation of references to specific rows or ranges of rows

264

in DynamicTable instances, supporting complex data relationships.

265

"""

266

267

def __init__(self, name: str, data, description: str, table: DynamicTable, **kwargs):

268

"""

269

Initialize dynamic table region.

270

271

Args:

272

name: Name of the region

273

data: Row indices or boolean mask

274

description: Description of the region

275

table: Target DynamicTable being referenced

276

"""

277

278

@property

279

def table(self) -> DynamicTable:

280

"""Target table being referenced."""

281

282

def get_referenced_tables(self) -> list:

283

"""

284

Get list of tables referenced by this region.

285

286

Returns:

287

List of DynamicTable instances

288

"""

289

290

def __getitem__(self, key):

291

"""Get referenced rows."""

292

```

293

294

### Sparse Data Structures

295

296

Efficient storage and manipulation of sparse data matrices.

297

298

```python { .api }

299

class CSRMatrix(Container):

300

"""

301

Compressed Sparse Row matrix implementation.

302

303

Provides memory-efficient storage for sparse matrices using

304

the CSR (Compressed Sparse Row) format with full metadata support.

305

"""

306

307

def __init__(self, data, indices, indptr, shape: tuple, **kwargs):

308

"""

309

Initialize CSR matrix.

310

311

Args:

312

data: Non-zero values array

313

indices: Column indices for non-zero values

314

indptr: Index pointers for row starts

315

shape: Shape of the full matrix (rows, cols)

316

**kwargs: Additional properties:

317

- name: Name for the matrix

318

- description: Matrix description

319

"""

320

321

def to_scipy_sparse(self):

322

"""

323

Convert to scipy sparse matrix.

324

325

Returns:

326

scipy.sparse.csr_matrix instance

327

"""

328

329

def to_dense(self):

330

"""

331

Convert to dense numpy array.

332

333

Returns:

334

Dense numpy array representation

335

"""

336

337

@classmethod

338

def from_scipy_sparse(cls, sparse_matrix, **kwargs):

339

"""

340

Create CSRMatrix from scipy sparse matrix.

341

342

Args:

343

sparse_matrix: scipy sparse matrix

344

**kwargs: Additional properties

345

346

Returns:

347

CSRMatrix instance

348

"""

349

350

@property

351

def data(self):

352

"""Non-zero values array."""

353

354

@property

355

def indices(self):

356

"""Column indices array."""

357

358

@property

359

def indptr(self):

360

"""Index pointers array."""

361

362

@property

363

def shape(self) -> tuple:

364

"""Shape of the matrix."""

365

366

@property

367

def nnz(self) -> int:

368

"""Number of non-zero elements."""

369

```

370

371

### Multi-Container Systems

372

373

Specialized containers for managing collections of related objects.

374

375

```python { .api }

376

class SimpleMultiContainer(Container, MultiContainerInterface):

377

"""

378

Simple multi-container implementation for holding multiple objects.

379

380

Provides a straightforward container for managing collections

381

of related objects with dictionary-like access patterns.

382

"""

383

384

def __init__(self, name: str, **kwargs):

385

"""

386

Initialize simple multi-container.

387

388

Args:

389

name: Name of the container

390

**kwargs: Additional container properties

391

"""

392

393

def add_container(self, container: Container):

394

"""

395

Add a container to the collection.

396

397

Args:

398

container: Container to add

399

"""

400

401

def get_container(self, name: str) -> Container:

402

"""

403

Get container by name.

404

405

Args:

406

name: Container name

407

408

Returns:

409

Container object

410

"""

411

412

def __iter__(self):

413

"""Iterate over contained objects."""

414

415

def __len__(self) -> int:

416

"""Number of contained objects."""

417

```

418

419

### Experimental Data Types

420

421

Experimental and specialized data structures for advanced use cases.

422

423

```python { .api }

424

class EnumData(VectorData):

425

"""

426

Enumeration data (experimental) for categorical data with controlled vocabularies.

427

428

Stores categorical data with predefined value sets and provides

429

validation and conversion capabilities for enumerated types.

430

"""

431

432

def __init__(self, name: str, description: str, data, elements: list, **kwargs):

433

"""

434

Initialize enumeration data.

435

436

Args:

437

name: Name of the enumeration data

438

description: Description of the data

439

data: Enumeration values (indices or strings)

440

elements: List of allowed enumeration elements

441

"""

442

443

@property

444

def elements(self) -> tuple:

445

"""Tuple of allowed enumeration elements."""

446

447

def add_element(self, element: str):

448

"""

449

Add allowed element to enumeration.

450

451

Args:

452

element: Element to add

453

"""

454

455

class HERD(Container):

456

"""

457

Hierarchical External Resource Descriptor (experimental).

458

459

Provides structured metadata for external resources and their

460

relationships within the data hierarchy.

461

"""

462

463

def __init__(self, **kwargs):

464

"""

465

Initialize HERD container.

466

467

Args:

468

**kwargs: HERD properties and metadata

469

"""

470

471

def add_resource(self, resource_spec: dict):

472

"""

473

Add external resource specification.

474

475

Args:

476

resource_spec: Dictionary describing the resource

477

"""

478

```

479

480

### Registration and Management Functions

481

482

Functions for registering and managing common data types.

483

484

```python { .api }

485

def register_class(neurodata_type: str, namespace: str, container_cls):

486

"""

487

Register container class for a data type.

488

489

Args:

490

neurodata_type: Name of the data type

491

namespace: Namespace containing the type

492

container_cls: Container class to register

493

"""

494

495

def register_map(container_cls, mapper_cls):

496

"""

497

Register object mapper for a container class.

498

499

Args:

500

container_cls: Container class

501

mapper_cls: Mapper class for serialization

502

"""

503

504

def get_class(neurodata_type: str, namespace: str = 'hdmf-common'):

505

"""

506

Get container class for a data type.

507

508

Args:

509

neurodata_type: Name of the data type

510

namespace: Namespace (default: 'hdmf-common')

511

512

Returns:

513

Container class for the data type

514

"""

515

516

def get_type_map():

517

"""

518

Get type map with HDMF-common extensions.

519

520

Returns:

521

TypeMap instance with common data types registered

522

"""

523

524

def get_manager():

525

"""

526

Get build manager with common data types.

527

528

Returns:

529

BuildManager instance configured for common types

530

"""

531

532

# Constants

533

CORE_NAMESPACE = 'hdmf-common' # Core namespace identifier

534

EXP_NAMESPACE = 'hdmf-experimental' # Experimental namespace identifier

535

```

536

537

## Usage Examples

538

539

### Creating and Using Dynamic Tables

540

541

```python

542

from hdmf.common import DynamicTable, VectorData

543

import numpy as np

544

545

# Create dynamic table

546

subjects_table = DynamicTable(

547

name='subjects',

548

description='Information about experimental subjects'

549

)

550

551

# Add columns

552

subjects_table.add_column('subject_id', 'Unique subject identifier')

553

subjects_table.add_column('age', 'Age in months', dtype='int')

554

subjects_table.add_column('weight', 'Weight in grams', dtype='float')

555

subjects_table.add_column('genotype', 'Genetic background')

556

557

# Add rows

558

subjects_table.add_row(subject_id='mouse_001', age=8, weight=25.3, genotype='WT')

559

subjects_table.add_row(subject_id='mouse_002', age=10, weight=27.1, genotype='KO')

560

subjects_table.add_row(subject_id='mouse_003', age=9, weight=24.8, genotype='WT')

561

562

# Access data

563

print(f"Table has {len(subjects_table)} rows")

564

print(f"Columns: {subjects_table.colnames}")

565

566

# Convert to DataFrame

567

df = subjects_table.to_dataframe()

568

print(df.head())

569

570

# Access specific columns

571

ages = subjects_table.get_column('age').data

572

print(f"Ages: {ages}")

573

```

574

575

### Working with Ragged Arrays Using Vector Indices

576

577

```python

578

from hdmf.common import VectorData, VectorIndex

579

580

# Create ragged data (variable-length spike trains)

581

spike_data = [

582

[0.1, 0.3, 0.7, 1.2], # Trial 1: 4 spikes

583

[0.2, 0.8], # Trial 2: 2 spikes

584

[0.05, 0.4, 0.6, 0.9, 1.1], # Trial 3: 5 spikes

585

]

586

587

# Flatten data and create cumulative indices

588

flattened_spikes = []

589

indices = []

590

for trial_spikes in spike_data:

591

flattened_spikes.extend(trial_spikes)

592

indices.append(len(flattened_spikes))

593

594

# Create VectorData and VectorIndex

595

spike_times = VectorData(

596

name='spike_times',

597

description='Spike timestamps in seconds',

598

data=flattened_spikes

599

)

600

601

spike_index = VectorIndex(

602

name='spike_times_index',

603

data=indices,

604

target=spike_times

605

)

606

607

# Access ragged data by trial

608

trial_0_spikes = spike_index[0] # [0.1, 0.3, 0.7, 1.2]

609

trial_1_spikes = spike_index[1] # [0.2, 0.8]

610

trial_2_spikes = spike_index[2] # [0.05, 0.4, 0.6, 0.9, 1.1]

611

612

print(f"Trial 0 spikes: {trial_0_spikes}")

613

print(f"Trial 1 spikes: {trial_1_spikes}")

614

```

615

616

### Creating Sparse Matrices

617

618

```python

619

from hdmf.common import CSRMatrix

620

import numpy as np

621

from scipy import sparse

622

623

# Create sparse data

624

row = np.array([0, 0, 1, 2, 2, 2])

625

col = np.array([0, 2, 1, 0, 1, 2])

626

data = np.array([1, 2, 3, 4, 5, 6])

627

628

# Create scipy sparse matrix

629

scipy_matrix = sparse.csr_matrix((data, (row, col)), shape=(3, 3))

630

631

# Convert to HDMF CSRMatrix

632

hdmf_matrix = CSRMatrix.from_scipy_sparse(

633

scipy_matrix,

634

name='connectivity_matrix',

635

description='Neural connectivity matrix'

636

)

637

638

print(f"Matrix shape: {hdmf_matrix.shape}")

639

print(f"Non-zero elements: {hdmf_matrix.nnz}")

640

641

# Convert back to dense for visualization

642

dense_matrix = hdmf_matrix.to_dense()

643

print("Dense representation:")

644

print(dense_matrix)

645

```

646

647

### Using Dynamic Table Regions for References

648

649

```python

650

from hdmf.common import DynamicTable, DynamicTableRegion

651

652

# Create source table

653

neurons_table = DynamicTable(

654

name='neurons',

655

description='Information about recorded neurons'

656

)

657

658

neurons_table.add_column('neuron_id', 'Unique neuron identifier')

659

neurons_table.add_column('brain_area', 'Brain area location')

660

neurons_table.add_column('cell_type', 'Cell type classification')

661

662

# Add neurons

663

for i in range(10):

664

neurons_table.add_row(

665

neuron_id=f'neuron_{i:03d}',

666

brain_area='CA1' if i < 5 else 'CA3',

667

cell_type='pyramidal' if i % 2 == 0 else 'interneuron'

668

)

669

670

# Create region referencing subset of neurons

671

ca1_neurons = DynamicTableRegion(

672

name='ca1_neurons',

673

data=[0, 1, 2, 3, 4], # Row indices for CA1 neurons

674

description='Neurons recorded from CA1 region',

675

table=neurons_table

676

)

677

678

# Access referenced data

679

referenced_neurons = ca1_neurons[:]

680

print(f"CA1 neurons: {len(referenced_neurons)} neurons")

681

682

# Use region in analysis table

683

analysis_table = DynamicTable(

684

name='spike_analysis',

685

description='Spike analysis results'

686

)

687

688

analysis_table.add_column('neurons', 'Analyzed neurons', data=[ca1_neurons])

689

analysis_table.add_column('firing_rate', 'Average firing rate')

690

691

analysis_table.add_row(neurons=ca1_neurons, firing_rate=15.3)

692

```

693

694

### Creating Aligned Dynamic Tables

695

696

```python

697

from hdmf.common import AlignedDynamicTable

698

699

# Create aligned table for multi-modal data

700

session_data = AlignedDynamicTable(

701

name='session_data',

702

description='Aligned behavioral and neural data'

703

)

704

705

# Add main columns

706

session_data.add_column('timestamp', 'Time in seconds', dtype='float')

707

session_data.add_column('behavior', 'Behavioral state')

708

709

# Add category table for neural data

710

session_data.add_category_table(

711

name='neural',

712

description='Neural recording data'

713

)

714

neural_table = session_data.get_category_table('neural')

715

neural_table.add_column('spike_count', 'Number of spikes', dtype='int')

716

neural_table.add_column('lfp_power', 'LFP power', dtype='float')

717

718

# Add category table for stimulus data

719

session_data.add_category_table(

720

name='stimulus',

721

description='Stimulus presentation data'

722

)

723

stimulus_table = session_data.get_category_table('stimulus')

724

stimulus_table.add_column('stimulus_type', 'Type of stimulus')

725

stimulus_table.add_column('intensity', 'Stimulus intensity', dtype='float')

726

727

# Add synchronized data

728

session_data.add_row(

729

timestamp=1.0,

730

behavior='running',

731

neural={'spike_count': 5, 'lfp_power': 0.23},

732

stimulus={'stimulus_type': 'visual', 'intensity': 0.8}

733

)

734

735

print(f"Session data columns: {session_data.colnames}")

736

print(f"Neural category columns: {neural_table.colnames}")

737

```

738

739

### Working with Enumerated Data

740

741

```python

742

from hdmf.common import EnumData

743

744

# Create enumeration for behavioral states

745

behavior_states = EnumData(

746

name='behavior_states',

747

description='Behavioral state classifications',

748

data=[0, 1, 2, 1, 0, 2, 1], # Indices into elements

749

elements=['rest', 'locomotion', 'grooming']

750

)

751

752

# Access enumerated values

753

print(f"Behavior elements: {behavior_states.elements}")

754

print(f"Behavior data: {behavior_states.data}")

755

756

# Could also use string data directly

757

string_behavior = EnumData(

758

name='string_behavior',

759

description='String-based behavioral states',

760

data=['rest', 'locomotion', 'grooming', 'locomotion'],

761

elements=['rest', 'locomotion', 'grooming']

762

)

763

764

print(f"String behavior: {string_behavior.data}")

765

```