0
# Querying and Indexing
1
2
PyTables provides powerful querying capabilities through expression-based conditional access and B-tree indexing for fast data retrieval. The query system supports complex conditions with optimized evaluation and memory-efficient iteration over large datasets.
3
4
## Capabilities
5
6
### Expression-Based Querying
7
8
```python { .api }
9
class Expr:
10
def __init__(self, expr, uservars=None, **kwargs):
11
"""
12
Compiled expression for fast evaluation.
13
14
Parameters:
15
- expr (str): Expression string
16
- uservars (dict): Variables for expression evaluation
17
"""
18
19
def eval(self):
20
"""
21
Evaluate expression and return results.
22
23
Returns:
24
ndarray: Boolean array with evaluation results
25
"""
26
27
def append(self, expr):
28
"""
29
Append additional expression.
30
31
Parameters:
32
- expr (str): Expression to append
33
"""
34
```
35
36
### Table Indexing
37
38
```python { .api }
39
class Column:
40
def create_index(self, optlevel=6, kind="medium", filters=None, tmp_dir=None):
41
"""
42
Create index for fast querying.
43
44
Parameters:
45
- optlevel (int): Optimization level (0-9)
46
- kind (str): Index type ("ultralight", "light", "medium", "full")
47
- filters (Filters): Compression for index data
48
- tmp_dir (str): Temporary directory for index creation
49
"""
50
51
def remove_index(self):
52
"""Remove existing index from column."""
53
54
def reindex(self):
55
"""Recreate index with current optimization settings."""
56
57
def reindex_dirty(self):
58
"""Reindex if column data has been modified since last index update."""
59
```
60
61
### Query Methods
62
63
```python { .api }
64
class Table:
65
def read_where(self, condition, condvars=None, **kwargs):
66
"""
67
Read rows matching condition.
68
69
Parameters:
70
- condition (str): Query condition
71
- condvars (dict): Variables for condition
72
73
Returns:
74
ndarray: Matching rows
75
"""
76
77
def where(self, condition, condvars=None, **kwargs):
78
"""
79
Iterate over rows matching condition.
80
81
Parameters:
82
- condition (str): Query condition
83
- condvars (dict): Variables for condition
84
85
Yields:
86
Row: Each matching row
87
"""
88
89
def iread_where(self, condition, condvars=None, **kwargs):
90
"""
91
Iterate over matching rows (alternative interface).
92
93
Parameters:
94
- condition (str): Query condition
95
- condvars (dict): Variables for condition
96
97
Yields:
98
Row: Each matching row
99
"""
100
```
101
102
## Usage Examples
103
104
```python
105
import tables as tb
106
import numpy as np
107
108
with tb.open_file("indexed_data.h5", "w") as h5file:
109
# Create table with sample data
110
class Measurement(tb.IsDescription):
111
sensor_id = tb.Int32Col()
112
timestamp = tb.Time64Col()
113
value = tb.Float64Col()
114
quality = tb.StringCol(10)
115
116
table = h5file.create_table("/", "measurements", Measurement)
117
118
# Fill with sample data
119
row = table.row
120
for i in range(10000):
121
row['sensor_id'] = np.random.randint(1, 100)
122
row['timestamp'] = i * 1.0
123
row['value'] = np.random.normal(25.0, 5.0)
124
row['quality'] = b'good' if np.random.random() > 0.1 else b'bad'
125
row.append()
126
table.flush()
127
128
# Create indexes for faster queries
129
table.cols.sensor_id.create_index()
130
table.cols.timestamp.create_index()
131
table.cols.value.create_index()
132
133
# Simple queries
134
high_values = table.read_where('value > 30.0')
135
sensor_data = table.read_where('sensor_id == 42')
136
137
# Complex queries with multiple conditions
138
recent_good = table.read_where('(timestamp > 5000.0) & (quality == b"good")')
139
140
# Queries with external variables
141
min_val = 20.0
142
max_val = 35.0
143
filtered = table.read_where('(value >= min_val) & (value <= max_val)',
144
{'min_val': min_val, 'max_val': max_val})
145
146
# Memory-efficient iteration
147
for row in table.where('(sensor_id < 10) & (value > 25.0)'):
148
print(f"Sensor {row['sensor_id']}: {row['value']}")
149
```