0
# Type System and Descriptions
1
2
PyTables provides a comprehensive type system with Atom types for defining individual data elements and Column types for table structure definitions. This system supports all NumPy data types plus specialized types for time, strings, and complex data structures, enabling precise control over data storage and memory usage.
3
4
## Capabilities
5
6
### Table Descriptions
7
8
Base classes for defining table structures with strongly-typed column definitions.
9
10
```python { .api }
11
class IsDescription:
12
"""
13
Base class for user-defined table descriptions.
14
Inherit from this class to define table structures.
15
"""
16
pass
17
18
class Description:
19
"""
20
Runtime table description created from dictionaries or existing tables.
21
"""
22
def __init__(self, description, validate=True):
23
"""
24
Create description from dictionary or class.
25
26
Parameters:
27
- description (dict or class): Column definitions
28
- validate (bool): Validate column definitions
29
"""
30
31
@classmethod
32
def from_dtype(cls, dtype, ptparams=None):
33
"""
34
Create description from NumPy dtype.
35
36
Parameters:
37
- dtype (numpy.dtype): NumPy structured dtype
38
- ptparams (dict): PyTables-specific parameters
39
40
Returns:
41
Description: Table description object
42
"""
43
```
44
45
### Atom Types
46
47
Atom types define the data type and storage characteristics for individual elements.
48
49
```python { .api }
50
class Atom:
51
"""Base class for all atom types."""
52
def __init__(self, type, shape=(), dflt=None):
53
"""
54
Base atom constructor.
55
56
Parameters:
57
- type (str): Type identifier
58
- shape (tuple): Element shape for multidimensional atoms
59
- dflt (any): Default value
60
"""
61
62
@property
63
def type(self):
64
"""String identifier for the atom type."""
65
66
@property
67
def shape(self):
68
"""Shape tuple for multidimensional atoms."""
69
70
@property
71
def size(self):
72
"""Size in bytes of a single element."""
73
74
# String Atoms
75
class StringAtom(Atom):
76
"""Fixed-length string atom."""
77
def __init__(self, itemsize, shape=(), dflt=b''):
78
"""
79
Parameters:
80
- itemsize (int): Maximum string length in bytes
81
- shape (tuple): Shape for arrays of strings
82
- dflt (bytes): Default value
83
"""
84
85
class VLStringAtom(Atom):
86
"""Variable-length string atom (raw bytes)."""
87
def __init__(self, dflt=b''):
88
"""
89
Parameters:
90
- dflt (bytes): Default value
91
"""
92
93
class VLUnicodeAtom(Atom):
94
"""Variable-length Unicode string atom."""
95
def __init__(self, dflt=''):
96
"""
97
Parameters:
98
- dflt (str): Default value
99
"""
100
101
# Boolean Atoms
102
class BoolAtom(Atom):
103
"""Boolean atom (True/False)."""
104
def __init__(self, shape=(), dflt=False):
105
"""
106
Parameters:
107
- shape (tuple): Shape for arrays of booleans
108
- dflt (bool): Default value
109
"""
110
111
# Integer Atoms
112
class IntAtom(Atom):
113
"""Generic signed integer atom (platform-dependent size)."""
114
def __init__(self, shape=(), dflt=0):
115
"""
116
Parameters:
117
- shape (tuple): Shape for arrays of integers
118
- dflt (int): Default value
119
"""
120
121
class UIntAtom(Atom):
122
"""Generic unsigned integer atom (platform-dependent size)."""
123
def __init__(self, shape=(), dflt=0): ...
124
125
class Int8Atom(Atom):
126
"""8-bit signed integer atom (-128 to 127)."""
127
def __init__(self, shape=(), dflt=0): ...
128
129
class UInt8Atom(Atom):
130
"""8-bit unsigned integer atom (0 to 255)."""
131
def __init__(self, shape=(), dflt=0): ...
132
133
class Int16Atom(Atom):
134
"""16-bit signed integer atom (-32768 to 32767)."""
135
def __init__(self, shape=(), dflt=0): ...
136
137
class UInt16Atom(Atom):
138
"""16-bit unsigned integer atom (0 to 65535)."""
139
def __init__(self, shape=(), dflt=0): ...
140
141
class Int32Atom(Atom):
142
"""32-bit signed integer atom."""
143
def __init__(self, shape=(), dflt=0): ...
144
145
class UInt32Atom(Atom):
146
"""32-bit unsigned integer atom."""
147
def __init__(self, shape=(), dflt=0): ...
148
149
class Int64Atom(Atom):
150
"""64-bit signed integer atom."""
151
def __init__(self, shape=(), dflt=0): ...
152
153
class UInt64Atom(Atom):
154
"""64-bit unsigned integer atom."""
155
def __init__(self, shape=(), dflt=0): ...
156
157
# Floating Point Atoms
158
class FloatAtom(Atom):
159
"""Generic floating point atom (platform-dependent precision)."""
160
def __init__(self, shape=(), dflt=0.0): ...
161
162
class Float32Atom(Atom):
163
"""32-bit floating point atom (IEEE 754 single precision)."""
164
def __init__(self, shape=(), dflt=0.0): ...
165
166
class Float64Atom(Atom):
167
"""64-bit floating point atom (IEEE 754 double precision)."""
168
def __init__(self, shape=(), dflt=0.0): ...
169
170
class Float16Atom(Atom):
171
"""16-bit floating point atom (IEEE 754 half precision)."""
172
def __init__(self, shape=(), dflt=0.0):
173
"""
174
Note: Available when NumPy supports float16 type
175
"""
176
177
class Float96Atom(Atom):
178
"""96-bit extended precision floating point atom."""
179
def __init__(self, shape=(), dflt=0.0):
180
"""
181
Note: Platform-dependent availability
182
"""
183
184
class Float128Atom(Atom):
185
"""128-bit quadruple precision floating point atom."""
186
def __init__(self, shape=(), dflt=0.0):
187
"""
188
Note: Platform-dependent availability
189
"""
190
191
# Complex Number Atoms
192
class ComplexAtom(Atom):
193
"""Generic complex number atom (platform-dependent precision)."""
194
def __init__(self, shape=(), dflt=0.0+0j): ...
195
196
class Complex32Atom(Atom):
197
"""32-bit complex atom (two 16-bit floats)."""
198
def __init__(self, shape=(), dflt=0.0+0j): ...
199
200
class Complex64Atom(Atom):
201
"""64-bit complex atom (two 32-bit floats)."""
202
def __init__(self, shape=(), dflt=0.0+0j): ...
203
204
class Complex128Atom(Atom):
205
"""128-bit complex atom (two 64-bit floats)."""
206
def __init__(self, shape=(), dflt=0.0+0j): ...
207
208
class Complex192Atom(Atom):
209
"""192-bit complex atom (two 96-bit floats)."""
210
def __init__(self, shape=(), dflt=0.0+0j):
211
"""
212
Note: Platform-dependent availability
213
"""
214
215
class Complex256Atom(Atom):
216
"""256-bit complex atom (two 128-bit floats)."""
217
def __init__(self, shape=(), dflt=0.0+0j):
218
"""
219
Note: Platform-dependent availability
220
"""
221
222
# Time Atoms
223
class TimeAtom(Atom):
224
"""Generic time atom (platform-dependent precision)."""
225
def __init__(self, shape=(), dflt=0.0): ...
226
227
class Time32Atom(Atom):
228
"""32-bit time atom (seconds since epoch)."""
229
def __init__(self, shape=(), dflt=0.0): ...
230
231
class Time64Atom(Atom):
232
"""64-bit time atom (microseconds since epoch)."""
233
def __init__(self, shape=(), dflt=0.0): ...
234
235
# Special Atoms
236
class EnumAtom(Atom):
237
"""Enumerated type atom with named values."""
238
def __init__(self, enum, dflt, base=None, shape=()):
239
"""
240
Parameters:
241
- enum (Enum): Enumeration definition
242
- dflt (any): Default enumeration value
243
- base (Atom): Base atom type for storage
244
- shape (tuple): Shape for arrays of enums
245
"""
246
247
class PseudoAtom(Atom):
248
"""Pseudo-atom for complex data types."""
249
def __init__(self, kind, shape=(), dflt=None): ...
250
251
class ObjectAtom(Atom):
252
"""Object atom for Python object storage (with pickle)."""
253
def __init__(self, shape=(), dflt=None): ...
254
```
255
256
### Column Types
257
258
Column types are used in table descriptions to define the structure and data types for table columns.
259
260
```python { .api }
261
class Col:
262
"""Base class for all column types."""
263
def __init__(self, type=None, itemsize=None, shape=(), dflt=None, pos=None):
264
"""
265
Base column constructor.
266
267
Parameters:
268
- type (str): Column type identifier
269
- itemsize (int): Size specification for variable types
270
- shape (tuple): Shape for multidimensional columns
271
- dflt (any): Default value
272
- pos (int): Column position in table
273
"""
274
275
# String Columns
276
class StringCol(Col):
277
"""Fixed-length string column."""
278
def __init__(self, itemsize, shape=(), dflt=b'', pos=None):
279
"""
280
Parameters:
281
- itemsize (int): Maximum string length
282
- shape (tuple): Shape for string arrays
283
- dflt (bytes): Default value
284
- pos (int): Column position
285
"""
286
287
# Boolean Columns
288
class BoolCol(Col):
289
"""Boolean column."""
290
def __init__(self, shape=(), dflt=False, pos=None): ...
291
292
# Integer Columns
293
class IntCol(Col):
294
"""Generic signed integer column."""
295
def __init__(self, shape=(), dflt=0, pos=None): ...
296
297
class UIntCol(Col):
298
"""Generic unsigned integer column."""
299
def __init__(self, shape=(), dflt=0, pos=None): ...
300
301
class Int8Col(Col):
302
"""8-bit signed integer column."""
303
def __init__(self, shape=(), dflt=0, pos=None): ...
304
305
class UInt8Col(Col):
306
"""8-bit unsigned integer column."""
307
def __init__(self, shape=(), dflt=0, pos=None): ...
308
309
class Int16Col(Col):
310
"""16-bit signed integer column."""
311
def __init__(self, shape=(), dflt=0, pos=None): ...
312
313
class UInt16Col(Col):
314
"""16-bit unsigned integer column."""
315
def __init__(self, shape=(), dflt=0, pos=None): ...
316
317
class Int32Col(Col):
318
"""32-bit signed integer column."""
319
def __init__(self, shape=(), dflt=0, pos=None): ...
320
321
class UInt32Col(Col):
322
"""32-bit unsigned integer column."""
323
def __init__(self, shape=(), dflt=0, pos=None): ...
324
325
class Int64Col(Col):
326
"""64-bit signed integer column."""
327
def __init__(self, shape=(), dflt=0, pos=None): ...
328
329
class UInt64Col(Col):
330
"""64-bit unsigned integer column."""
331
def __init__(self, shape=(), dflt=0, pos=None): ...
332
333
# Floating Point Columns
334
class FloatCol(Col):
335
"""Generic floating point column."""
336
def __init__(self, shape=(), dflt=0.0, pos=None): ...
337
338
class Float32Col(Col):
339
"""32-bit floating point column."""
340
def __init__(self, shape=(), dflt=0.0, pos=None): ...
341
342
class Float64Col(Col):
343
"""64-bit floating point column."""
344
def __init__(self, shape=(), dflt=0.0, pos=None): ...
345
346
class Float16Col(Col):
347
"""16-bit floating point column."""
348
def __init__(self, shape=(), dflt=0.0, pos=None):
349
"""
350
Note: Available when NumPy supports float16 type
351
"""
352
353
class Float96Col(Col):
354
"""96-bit extended precision floating point column."""
355
def __init__(self, shape=(), dflt=0.0, pos=None):
356
"""
357
Note: Platform-dependent availability
358
"""
359
360
class Float128Col(Col):
361
"""128-bit quadruple precision floating point column."""
362
def __init__(self, shape=(), dflt=0.0, pos=None):
363
"""
364
Note: Platform-dependent availability
365
"""
366
367
# Complex Number Columns
368
class ComplexCol(Col):
369
"""Generic complex number column."""
370
def __init__(self, shape=(), dflt=0.0+0j, pos=None): ...
371
372
class Complex32Col(Col):
373
"""32-bit complex column."""
374
def __init__(self, shape=(), dflt=0.0+0j, pos=None): ...
375
376
class Complex64Col(Col):
377
"""64-bit complex column."""
378
def __init__(self, shape=(), dflt=0.0+0j, pos=None): ...
379
380
class Complex128Col(Col):
381
"""128-bit complex column."""
382
def __init__(self, shape=(), dflt=0.0+0j, pos=None): ...
383
384
class Complex192Col(Col):
385
"""192-bit complex column (two 96-bit floats)."""
386
def __init__(self, shape=(), dflt=0.0+0j, pos=None):
387
"""
388
Note: Platform-dependent availability
389
"""
390
391
class Complex256Col(Col):
392
"""256-bit complex column (two 128-bit floats)."""
393
def __init__(self, shape=(), dflt=0.0+0j, pos=None):
394
"""
395
Note: Platform-dependent availability
396
"""
397
398
# Time Columns
399
class TimeCol(Col):
400
"""Generic time column."""
401
def __init__(self, shape=(), dflt=0.0, pos=None): ...
402
403
class Time32Col(Col):
404
"""32-bit time column."""
405
def __init__(self, shape=(), dflt=0.0, pos=None): ...
406
407
class Time64Col(Col):
408
"""64-bit time column."""
409
def __init__(self, shape=(), dflt=0.0, pos=None): ...
410
411
# Special Columns
412
class EnumCol(Col):
413
"""Enumerated type column."""
414
def __init__(self, enum, dflt, base=None, shape=(), pos=None):
415
"""
416
Parameters:
417
- enum (Enum): Enumeration definition
418
- dflt (any): Default enumeration value
419
- base (Col): Base column type for storage
420
- shape (tuple): Shape for enum arrays
421
- pos (int): Column position
422
"""
423
```
424
425
### Type Utilities
426
427
```python { .api }
428
def split_type(type):
429
"""
430
Split a type specification into components.
431
432
Parameters:
433
- type (str): Type specification string
434
435
Returns:
436
tuple: (kind, itemsize) components of the type
437
"""
438
```
439
440
## Usage Examples
441
442
### Defining Table Structures
443
444
```python
445
import tables as tb
446
447
# Method 1: Class-based description
448
class Experiment(tb.IsDescription):
449
# Basic types
450
run_id = tb.Int64Col() # 64-bit integer
451
timestamp = tb.Time64Col() # Microsecond timestamp
452
temperature = tb.Float32Col() # 32-bit float
453
active = tb.BoolCol() # Boolean
454
455
# String types
456
name = tb.StringCol(50) # Fixed-length string (50 bytes)
457
notes = tb.StringCol(200, dflt=b'') # With default value
458
459
# Array types
460
coordinates = tb.Float64Col(shape=(3,)) # 3D position vector
461
measurements = tb.Int16Col(shape=(10,)) # Array of 10 measurements
462
463
# Complex types
464
signal = tb.Complex64Col() # Complex number
465
466
# Enumerated types with custom enum
467
Status = tb.Enum(['active', 'paused', 'stopped'])
468
status = tb.EnumCol(Status, 'active', base=tb.UInt8Col())
469
470
# Method 2: Dictionary-based description
471
experiment_desc = {
472
'run_id': tb.Int64Col(),
473
'timestamp': tb.Time64Col(),
474
'temperature': tb.Float32Col(),
475
'name': tb.StringCol(50),
476
'coordinates': tb.Float64Col(shape=(3,)),
477
'measurements': tb.Int16Col(shape=(10,))
478
}
479
480
# Create table with either approach
481
with tb.open_file("experiment.h5", "w") as h5file:
482
table1 = h5file.create_table("/", "exp_class", Experiment)
483
table2 = h5file.create_table("/", "exp_dict", experiment_desc)
484
```
485
486
### Working with Atoms for Arrays
487
488
```python
489
import tables as tb
490
import numpy as np
491
492
with tb.open_file("atoms.h5", "w") as h5file:
493
# Create arrays with specific atom types
494
495
# String array
496
string_atom = tb.StringAtom(20) # 20-byte strings
497
string_array = h5file.create_carray("/", "strings", string_atom,
498
shape=(100,), filters=tb.Filters(complevel=1))
499
500
# Time series data
501
time_atom = tb.Time64Atom() # Microsecond precision
502
time_array = h5file.create_earray("/", "timestamps", time_atom,
503
shape=(0,), expectedrows=100000)
504
505
# Complex signal data
506
complex_atom = tb.Complex128Atom()
507
signal_array = h5file.create_carray("/", "signal", complex_atom,
508
shape=(1000, 1000))
509
510
# Multidimensional atoms
511
vector_atom = tb.Float32Atom(shape=(3,)) # 3D vectors
512
vector_array = h5file.create_array("/", "vectors",
513
np.zeros((100,), dtype=[('pos', '3f4')]))
514
```
515
516
### Advanced Type Usage
517
518
```python
519
import tables as tb
520
from enum import Enum
521
522
# Custom enumeration
523
class Priority(Enum):
524
LOW = 1
525
MEDIUM = 2
526
HIGH = 3
527
CRITICAL = 4
528
529
# Table with mixed advanced types
530
class TaskDescription(tb.IsDescription):
531
task_id = tb.UInt32Col()
532
created = tb.Time64Col()
533
534
# Variable-length strings (stored as objects)
535
title = tb.StringCol(100)
536
description = tb.StringCol(500, dflt=b'No description')
537
538
# Custom enumeration
539
priority = tb.EnumCol(Priority, Priority.MEDIUM, base=tb.UInt8Col())
540
541
# Multi-dimensional data
542
progress_history = tb.Float32Col(shape=(10,)) # Last 10 progress values
543
544
# Complex metadata (stored as pickled objects)
545
metadata = tb.ObjectAtom()
546
547
with tb.open_file("tasks.h5", "w") as h5file:
548
table = h5file.create_table("/", "tasks", TaskDescription)
549
550
# Add sample data
551
row = table.row
552
row['task_id'] = 1
553
row['created'] = 1640995200000000 # Timestamp in microseconds
554
row['title'] = b'Implement feature X'
555
row['priority'] = Priority.HIGH
556
row['progress_history'] = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
557
row['metadata'] = {'tags': ['urgent', 'backend'], 'assignee': 'developer'}
558
row.append()
559
table.flush()
560
```