0
# Tables and Structured Data
1
2
PyTables' table system provides efficient storage and retrieval of structured, record-oriented data with column-oriented access, conditional querying, indexing capabilities, and in-place modification. Tables are ideal for datasets where each record has the same structure but individual fields need to be accessed independently.
3
4
## Capabilities
5
6
### Table Creation and Structure
7
8
Tables store structured data using column definitions that specify data types, shapes, and constraints for each field.
9
10
```python { .api }
11
class Table:
12
def __init__(self, parentnode, name, description, title="", filters=None, expectedrows=10000, **kwargs):
13
"""
14
Table constructor (typically called via File.create_table).
15
16
Parameters:
17
- parentnode (Group): Parent group
18
- name (str): Table name
19
- description (Description): Column structure definition
20
- title (str): Descriptive title
21
- filters (Filters): Compression and filtering options
22
- expectedrows (int): Expected number of rows for optimization
23
"""
24
25
@property
26
def description(self):
27
"""Table structure description with column information."""
28
29
@property
30
def colnames(self):
31
"""List of column names in the table."""
32
33
@property
34
def coldtypes(self):
35
"""Dictionary mapping column names to NumPy data types."""
36
37
@property
38
def colindexed(self):
39
"""Dictionary indicating which columns have indexes."""
40
```
41
42
### Data Reading
43
44
Comprehensive data reading with slicing, field selection, and conditional filtering.
45
46
```python { .api }
47
class Table:
48
def read(self, start=None, stop=None, step=None, field=None, out=None):
49
"""
50
Read table data with optional slicing and field selection.
51
52
Parameters:
53
- start (int): Starting row index
54
- stop (int): Ending row index (exclusive)
55
- step (int): Step size for row selection
56
- field (str): Single field name to read
57
- out (array): Pre-allocated output array
58
59
Returns:
60
ndarray: Structured array containing the requested data
61
"""
62
63
def read_where(self, condition, condvars=None, field=None, start=None, stop=None, step=None, out=None):
64
"""
65
Read rows that satisfy a condition.
66
67
Parameters:
68
- condition (str): Conditional expression string
69
- condvars (dict): Variables for use in condition
70
- field (str): Single field name to read
71
- start (int): Starting row for search
72
- stop (int): Ending row for search
73
- step (int): Step size for search
74
- out (array): Pre-allocated output array
75
76
Returns:
77
ndarray: Rows satisfying the condition
78
"""
79
80
def __getitem__(self, key):
81
"""
82
Array-style indexing for table access.
83
84
Parameters:
85
- key (int, slice, or tuple): Row selection specification
86
87
Returns:
88
ndarray or scalar: Selected data
89
"""
90
```
91
92
### Conditional Iteration
93
94
Iterator-based access for memory-efficient processing of large datasets.
95
96
```python { .api }
97
class Table:
98
def where(self, condition, condvars=None, start=None, stop=None, step=None):
99
"""
100
Iterate over rows satisfying a condition.
101
102
Parameters:
103
- condition (str): Conditional expression string
104
- condvars (dict): Variables for condition evaluation
105
- start (int): Starting row for search
106
- stop (int): Ending row for search
107
- step (int): Step size for search
108
109
Yields:
110
Row: Each row object satisfying the condition
111
"""
112
113
def iread(self, start=None, stop=None, step=None):
114
"""
115
Iterate over table rows.
116
117
Parameters:
118
- start (int): Starting row index
119
- stop (int): Ending row index
120
- step (int): Step size
121
122
Yields:
123
Row: Each row in the specified range
124
"""
125
126
def iterrows(self, start=None, stop=None, step=None):
127
"""
128
Iterate over table rows (alias for iread).
129
130
Parameters:
131
- start (int): Starting row index
132
- stop (int): Ending row index
133
- step (int): Step size
134
135
Yields:
136
Row: Each row in the specified range
137
"""
138
```
139
140
### Data Modification
141
142
In-place data modification including appending new records, modifying existing data, and row removal.
143
144
```python { .api }
145
class Table:
146
def append(self, rows):
147
"""
148
Append new rows to the table.
149
150
Parameters:
151
- rows (array-like): Structured data to append
152
"""
153
154
def modify_column(self, start=None, stop=None, step=None, column=None, value=None):
155
"""
156
Modify values in a specific column.
157
158
Parameters:
159
- start (int): Starting row index
160
- stop (int): Ending row index
161
- step (int): Step size
162
- column (str): Column name to modify
163
- value (scalar or array): New values
164
"""
165
166
def modify_columns(self, start=None, stop=None, step=None, columns=None):
167
"""
168
Modify multiple columns simultaneously.
169
170
Parameters:
171
- start (int): Starting row index
172
- stop (int): Ending row index
173
- step (int): Step size
174
- columns (dict): Mapping of column names to new values
175
"""
176
177
def remove_rows(self, start, stop=None):
178
"""
179
Remove rows from the table.
180
181
Parameters:
182
- start (int): Starting row index to remove
183
- stop (int): Ending row index (exclusive), or None for single row
184
"""
185
186
def truncate(self, size):
187
"""
188
Truncate table to specified number of rows.
189
190
Parameters:
191
- size (int): New table size in rows
192
"""
193
```
194
195
### Column Access
196
197
Individual column access through the `cols` attribute provides column-specific operations.
198
199
```python { .api }
200
class Cols:
201
def __getitem__(self, name):
202
"""
203
Get column accessor by name.
204
205
Parameters:
206
- name (str): Column name
207
208
Returns:
209
Column: Column accessor object
210
"""
211
212
def __setitem__(self, name, value):
213
"""
214
Set entire column values.
215
216
Parameters:
217
- name (str): Column name
218
- value (array-like): New column data
219
"""
220
221
def _f_col(self, name):
222
"""
223
Get Column object for specific column.
224
225
Parameters:
226
- name (str): Column name
227
228
Returns:
229
Column: Column accessor object
230
"""
231
232
class Column:
233
def __getitem__(self, key):
234
"""
235
Get column values with slicing support.
236
237
Parameters:
238
- key (int, slice): Row selection
239
240
Returns:
241
ndarray or scalar: Column values
242
"""
243
244
def __setitem__(self, key, value):
245
"""
246
Set column values with slicing support.
247
248
Parameters:
249
- key (int, slice): Row selection
250
- value (scalar or array): New values
251
"""
252
253
def create_index(self, optlevel=6, kind="medium", filters=None, tmp_dir=None):
254
"""
255
Create an index for this column to accelerate queries.
256
257
Parameters:
258
- optlevel (int): Optimization level (0-9)
259
- kind (str): Index type ("ultralight", "light", "medium", "full")
260
- filters (Filters): Compression for index
261
- tmp_dir (str): Temporary directory for index creation
262
"""
263
264
def remove_index(self):
265
"""Remove the index from this column."""
266
267
def reindex(self):
268
"""Recreate the index for this column."""
269
```
270
271
### Row Access
272
273
Individual row manipulation through Row objects.
274
275
```python { .api }
276
class Row:
277
def __getitem__(self, name):
278
"""
279
Get field value by name.
280
281
Parameters:
282
- name (str): Field name
283
284
Returns:
285
any: Field value
286
"""
287
288
def __setitem__(self, name, value):
289
"""
290
Set field value by name.
291
292
Parameters:
293
- name (str): Field name
294
- value (any): New field value
295
"""
296
297
def append(self):
298
"""Append this row's current values to the table."""
299
300
def update(self):
301
"""Update the table with this row's current values."""
302
303
@property
304
def table(self):
305
"""Reference to the parent table."""
306
```
307
308
## Table Properties
309
310
```python { .api }
311
class Table:
312
@property
313
def cols(self):
314
"""Cols accessor for column-oriented operations."""
315
316
@property
317
def row(self):
318
"""Row accessor for record-oriented operations."""
319
320
@property
321
def nrows(self):
322
"""Number of rows in the table."""
323
324
@property
325
def shape(self):
326
"""Shape of the table as (nrows,)."""
327
328
@property
329
def size_in_memory(self):
330
"""Estimated memory usage of table data."""
331
332
@property
333
def size_on_disk(self):
334
"""Actual disk space used by the table."""
335
```
336
337
## Usage Examples
338
339
### Creating and Populating Tables
340
341
```python
342
import tables as tb
343
import numpy as np
344
345
# Define table structure
346
class Particle(tb.IsDescription):
347
name = tb.StringCol(16) # 16-character string
348
idnumber = tb.Int64Col() # Signed 64-bit integer
349
ADCcount = tb.UInt16Col() # Unsigned 16-bit integer
350
TDCcount = tb.UInt8Col() # Unsigned 8-bit integer
351
energy = tb.Float32Col() # 32-bit float
352
timestamp = tb.Time64Col() # Timestamp
353
354
# Create file and table
355
with tb.open_file("particles.h5", "w") as h5file:
356
table = h5file.create_table("/", "detector", Particle, "Particle Data")
357
358
# Append data using Row interface
359
particle = table.row
360
for i in range(1000):
361
particle['name'] = f'Particle_{i:04d}'
362
particle['idnumber'] = i
363
particle['ADCcount'] = np.random.randint(0, 65536)
364
particle['TDCcount'] = np.random.randint(0, 256)
365
particle['energy'] = np.random.exponential(10.0)
366
particle['timestamp'] = i * 0.1
367
particle.append()
368
table.flush()
369
370
# Query high-energy particles
371
high_energy = [row for row in table.where('energy > 20.0')]
372
print(f"Found {len(high_energy)} high-energy particles")
373
374
# Read specific columns
375
energies = table.read(field='energy')
376
timestamps = table.read(field='timestamp')
377
378
# Column-based operations
379
table.cols.energy[0:10] = np.random.random(10) * 100
380
381
# Create index for faster queries
382
table.cols.energy.create_index()
383
```
384
385
### Advanced Querying
386
387
```python
388
# Complex conditional queries
389
with tb.open_file("particles.h5", "r") as h5file:
390
table = h5file.root.detector
391
392
# Query with multiple conditions
393
results = table.read_where('(energy > 15.0) & (TDCcount < 100)')
394
395
# Query with external variables
396
min_energy = 10.0
397
max_time = 50.0
398
condition = '(energy > min_energy) & (timestamp < max_time)'
399
results = table.read_where(condition, {'min_energy': min_energy, 'max_time': max_time})
400
401
# Memory-efficient iteration over large result sets
402
for row in table.where('energy > 30.0'):
403
print(f"High energy particle: {row['name']}, energy: {row['energy']}")
404
```