0
# Compression and Filtering
1
2
PyTables provides advanced compression and filtering capabilities using multiple algorithms including zlib, blosc, blosc2, bzip2, and lzo. The filtering system optimizes storage efficiency and I/O performance while maintaining data integrity through checksums and error detection.
3
4
## Capabilities
5
6
### Filter Configuration
7
8
```python { .api }
9
class Filters:
10
def __init__(self, complevel=0, complib="zlib", shuffle=True, bitshuffle=False, fletcher32=False):
11
"""
12
Configure compression and filtering options.
13
14
Parameters:
15
- complevel (int): Compression level (0-9, 0=no compression)
16
- complib (str): Compression library ("zlib", "blosc", "blosc2", "bzip2", "lzo")
17
- shuffle (bool): Enable byte-shuffling filter
18
- bitshuffle (bool): Enable bit-shuffling filter (blosc/blosc2 only)
19
- fletcher32 (bool): Enable Fletcher32 checksum
20
"""
21
22
@property
23
def complevel(self):
24
"""Compression level (0-9)."""
25
26
@property
27
def complib(self):
28
"""Compression library name."""
29
30
@property
31
def shuffle(self):
32
"""Byte-shuffling filter status."""
33
34
@property
35
def bitshuffle(self):
36
"""Bit-shuffling filter status."""
37
38
@property
39
def fletcher32(self):
40
"""Fletcher32 checksum status."""
41
```
42
43
### Compression Library Management
44
45
```python { .api }
46
def set_blosc_max_threads(nthreads):
47
"""
48
Set maximum number of threads for Blosc compression.
49
50
Parameters:
51
- nthreads (int): Maximum threads (0 for automatic)
52
"""
53
54
def set_blosc2_max_threads(nthreads):
55
"""
56
Set maximum number of threads for Blosc2 compression.
57
58
Parameters:
59
- nthreads (int): Maximum threads (0 for automatic)
60
"""
61
62
def blosc_compcode_to_compname(compcode):
63
"""
64
Convert Blosc compression code to name.
65
66
Parameters:
67
- compcode (int): Compression code
68
69
Returns:
70
str: Compression algorithm name
71
"""
72
73
def blosc2_compcode_to_compname(compcode):
74
"""
75
Convert Blosc2 compression code to name.
76
77
Parameters:
78
- compcode (int): Compression code
79
80
Returns:
81
str: Compression algorithm name
82
"""
83
84
def blosc_get_complib_info():
85
"""
86
Get information about available Blosc compression libraries.
87
88
Returns:
89
dict: Library information including versions and supported algorithms
90
"""
91
92
def blosc2_get_complib_info():
93
"""
94
Get information about available Blosc2 compression libraries.
95
96
Returns:
97
dict: Library information including versions and supported algorithms
98
"""
99
100
blosc_compressor_list = ["blosclz", "lz4", "lz4hc", "snappy", "zlib", "zstd"]
101
"""List of available Blosc compressors."""
102
103
blosc2_compressor_list = ["blosclz", "lz4", "lz4hc", "zlib", "zstd"]
104
"""List of available Blosc2 compressors."""
105
```
106
107
## Usage Examples
108
109
```python
110
import tables as tb
111
import numpy as np
112
113
# Different compression configurations
114
filters_none = tb.Filters(complevel=0) # No compression
115
filters_zlib = tb.Filters(complevel=6, complib="zlib", shuffle=True)
116
filters_blosc = tb.Filters(complevel=5, complib="blosc", shuffle=True, fletcher32=True)
117
filters_blosc2 = tb.Filters(complevel=1, complib="blosc2", bitshuffle=True)
118
119
with tb.open_file("compressed.h5", "w") as h5file:
120
# Create arrays with different compression
121
data = np.random.random((1000, 1000))
122
123
array_none = h5file.create_carray("/", "uncompressed", tb.Float64Atom(),
124
data.shape, filters=filters_none)
125
array_zlib = h5file.create_carray("/", "zlib_compressed", tb.Float64Atom(),
126
data.shape, filters=filters_zlib)
127
array_blosc = h5file.create_carray("/", "blosc_compressed", tb.Float64Atom(),
128
data.shape, filters=filters_blosc)
129
130
# Fill with same data
131
array_none[:] = data
132
array_zlib[:] = data
133
array_blosc[:] = data
134
135
# Compare sizes
136
print(f"Uncompressed: {array_none.size_on_disk} bytes")
137
print(f"Zlib: {array_zlib.size_on_disk} bytes")
138
print(f"Blosc: {array_blosc.size_on_disk} bytes")
139
140
# Configure Blosc threading
141
tb.set_blosc_max_threads(4) # Use 4 threads for compression
142
```