or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

arrays-homogeneous-data.mdcompression-filtering.mdfile-operations.mdgroups-navigation.mdindex.mdquerying-indexing.mdtables-structured-data.mdtransactions-undo-redo.mdtype-system-descriptions.md

querying-indexing.mddocs/

0

# Querying and Indexing

1

2

PyTables provides powerful querying capabilities through expression-based conditional access and B-tree indexing for fast data retrieval. The query system supports complex conditions with optimized evaluation and memory-efficient iteration over large datasets.

3

4

## Capabilities

5

6

### Expression-Based Querying

7

8

```python { .api }

9

class Expr:

10

def __init__(self, expr, uservars=None, **kwargs):

11

"""

12

Compiled expression for fast evaluation.

13

14

Parameters:

15

- expr (str): Expression string

16

- uservars (dict): Variables for expression evaluation

17

"""

18

19

def eval(self):

20

"""

21

Evaluate expression and return results.

22

23

Returns:

24

ndarray: Boolean array with evaluation results

25

"""

26

27

def append(self, expr):

28

"""

29

Append additional expression.

30

31

Parameters:

32

- expr (str): Expression to append

33

"""

34

```

35

36

### Table Indexing

37

38

```python { .api }

39

class Column:

40

def create_index(self, optlevel=6, kind="medium", filters=None, tmp_dir=None):

41

"""

42

Create index for fast querying.

43

44

Parameters:

45

- optlevel (int): Optimization level (0-9)

46

- kind (str): Index type ("ultralight", "light", "medium", "full")

47

- filters (Filters): Compression for index data

48

- tmp_dir (str): Temporary directory for index creation

49

"""

50

51

def remove_index(self):

52

"""Remove existing index from column."""

53

54

def reindex(self):

55

"""Recreate index with current optimization settings."""

56

57

def reindex_dirty(self):

58

"""Reindex if column data has been modified since last index update."""

59

```

60

61

### Query Methods

62

63

```python { .api }

64

class Table:

65

def read_where(self, condition, condvars=None, **kwargs):

66

"""

67

Read rows matching condition.

68

69

Parameters:

70

- condition (str): Query condition

71

- condvars (dict): Variables for condition

72

73

Returns:

74

ndarray: Matching rows

75

"""

76

77

def where(self, condition, condvars=None, **kwargs):

78

"""

79

Iterate over rows matching condition.

80

81

Parameters:

82

- condition (str): Query condition

83

- condvars (dict): Variables for condition

84

85

Yields:

86

Row: Each matching row

87

"""

88

89

def iread_where(self, condition, condvars=None, **kwargs):

90

"""

91

Iterate over matching rows (alternative interface).

92

93

Parameters:

94

- condition (str): Query condition

95

- condvars (dict): Variables for condition

96

97

Yields:

98

Row: Each matching row

99

"""

100

```

101

102

## Usage Examples

103

104

```python

105

import tables as tb

106

import numpy as np

107

108

with tb.open_file("indexed_data.h5", "w") as h5file:

109

# Create table with sample data

110

class Measurement(tb.IsDescription):

111

sensor_id = tb.Int32Col()

112

timestamp = tb.Time64Col()

113

value = tb.Float64Col()

114

quality = tb.StringCol(10)

115

116

table = h5file.create_table("/", "measurements", Measurement)

117

118

# Fill with sample data

119

row = table.row

120

for i in range(10000):

121

row['sensor_id'] = np.random.randint(1, 100)

122

row['timestamp'] = i * 1.0

123

row['value'] = np.random.normal(25.0, 5.0)

124

row['quality'] = b'good' if np.random.random() > 0.1 else b'bad'

125

row.append()

126

table.flush()

127

128

# Create indexes for faster queries

129

table.cols.sensor_id.create_index()

130

table.cols.timestamp.create_index()

131

table.cols.value.create_index()

132

133

# Simple queries

134

high_values = table.read_where('value > 30.0')

135

sensor_data = table.read_where('sensor_id == 42')

136

137

# Complex queries with multiple conditions

138

recent_good = table.read_where('(timestamp > 5000.0) & (quality == b"good")')

139

140

# Queries with external variables

141

min_val = 20.0

142

max_val = 35.0

143

filtered = table.read_where('(value >= min_val) & (value <= max_val)',

144

{'min_val': min_val, 'max_val': max_val})

145

146

# Memory-efficient iteration

147

for row in table.where('(sensor_id < 10) & (value > 25.0)'):

148

print(f"Sensor {row['sensor_id']}: {row['value']}")

149

```