or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

arrays-homogeneous-data.mdcompression-filtering.mdfile-operations.mdgroups-navigation.mdindex.mdquerying-indexing.mdtables-structured-data.mdtransactions-undo-redo.mdtype-system-descriptions.md

compression-filtering.mddocs/

0

# Compression and Filtering

1

2

PyTables provides advanced compression and filtering capabilities using multiple algorithms including zlib, blosc, blosc2, bzip2, and lzo. The filtering system optimizes storage efficiency and I/O performance while maintaining data integrity through checksums and error detection.

3

4

## Capabilities

5

6

### Filter Configuration

7

8

```python { .api }

9

class Filters:

10

def __init__(self, complevel=0, complib="zlib", shuffle=True, bitshuffle=False, fletcher32=False):

11

"""

12

Configure compression and filtering options.

13

14

Parameters:

15

- complevel (int): Compression level (0-9, 0=no compression)

16

- complib (str): Compression library ("zlib", "blosc", "blosc2", "bzip2", "lzo")

17

- shuffle (bool): Enable byte-shuffling filter

18

- bitshuffle (bool): Enable bit-shuffling filter (blosc/blosc2 only)

19

- fletcher32 (bool): Enable Fletcher32 checksum

20

"""

21

22

@property

23

def complevel(self):

24

"""Compression level (0-9)."""

25

26

@property

27

def complib(self):

28

"""Compression library name."""

29

30

@property

31

def shuffle(self):

32

"""Byte-shuffling filter status."""

33

34

@property

35

def bitshuffle(self):

36

"""Bit-shuffling filter status."""

37

38

@property

39

def fletcher32(self):

40

"""Fletcher32 checksum status."""

41

```

42

43

### Compression Library Management

44

45

```python { .api }

46

def set_blosc_max_threads(nthreads):

47

"""

48

Set maximum number of threads for Blosc compression.

49

50

Parameters:

51

- nthreads (int): Maximum threads (0 for automatic)

52

"""

53

54

def set_blosc2_max_threads(nthreads):

55

"""

56

Set maximum number of threads for Blosc2 compression.

57

58

Parameters:

59

- nthreads (int): Maximum threads (0 for automatic)

60

"""

61

62

def blosc_compcode_to_compname(compcode):

63

"""

64

Convert Blosc compression code to name.

65

66

Parameters:

67

- compcode (int): Compression code

68

69

Returns:

70

str: Compression algorithm name

71

"""

72

73

def blosc2_compcode_to_compname(compcode):

74

"""

75

Convert Blosc2 compression code to name.

76

77

Parameters:

78

- compcode (int): Compression code

79

80

Returns:

81

str: Compression algorithm name

82

"""

83

84

def blosc_get_complib_info():

85

"""

86

Get information about available Blosc compression libraries.

87

88

Returns:

89

dict: Library information including versions and supported algorithms

90

"""

91

92

def blosc2_get_complib_info():

93

"""

94

Get information about available Blosc2 compression libraries.

95

96

Returns:

97

dict: Library information including versions and supported algorithms

98

"""

99

100

blosc_compressor_list = ["blosclz", "lz4", "lz4hc", "snappy", "zlib", "zstd"]

101

"""List of available Blosc compressors."""

102

103

blosc2_compressor_list = ["blosclz", "lz4", "lz4hc", "zlib", "zstd"]

104

"""List of available Blosc2 compressors."""

105

```

106

107

## Usage Examples

108

109

```python

110

import tables as tb

111

import numpy as np

112

113

# Different compression configurations

114

filters_none = tb.Filters(complevel=0) # No compression

115

filters_zlib = tb.Filters(complevel=6, complib="zlib", shuffle=True)

116

filters_blosc = tb.Filters(complevel=5, complib="blosc", shuffle=True, fletcher32=True)

117

filters_blosc2 = tb.Filters(complevel=1, complib="blosc2", bitshuffle=True)

118

119

with tb.open_file("compressed.h5", "w") as h5file:

120

# Create arrays with different compression

121

data = np.random.random((1000, 1000))

122

123

array_none = h5file.create_carray("/", "uncompressed", tb.Float64Atom(),

124

data.shape, filters=filters_none)

125

array_zlib = h5file.create_carray("/", "zlib_compressed", tb.Float64Atom(),

126

data.shape, filters=filters_zlib)

127

array_blosc = h5file.create_carray("/", "blosc_compressed", tb.Float64Atom(),

128

data.shape, filters=filters_blosc)

129

130

# Fill with same data

131

array_none[:] = data

132

array_zlib[:] = data

133

array_blosc[:] = data

134

135

# Compare sizes

136

print(f"Uncompressed: {array_none.size_on_disk} bytes")

137

print(f"Zlib: {array_zlib.size_on_disk} bytes")

138

print(f"Blosc: {array_blosc.size_on_disk} bytes")

139

140

# Configure Blosc threading

141

tb.set_blosc_max_threads(4) # Use 4 threads for compression

142

```