High-performance compression library wrapper for binary and numerical data with multiple algorithms and shuffle filters
npx @tessl/cli install tessl/pypi-blosc@1.11.0A high-performance compression library wrapper providing Python bindings for the Blosc compression library. Optimized for compressing binary and numerical data with multiple compression algorithms (blosclz, lz4, lz4hc, snappy, zlib, zstd) and configurable shuffling filters for optimal performance on time series, sparse data, and regular-spaced numerical arrays.
pip install bloscimport bloscimport blosc
import array
# Basic compression and decompression
data = b'0123456789' * 1000
compressed = blosc.compress(data, typesize=1)
decompressed = blosc.decompress(compressed)
# Working with numerical arrays
a = array.array('i', range(1000000))
a_bytes = a.tobytes()
compressed_array = blosc.compress(a_bytes, typesize=4, cname='lz4')
decompressed_array = blosc.decompress(compressed_array)
# Configuration
blosc.set_nthreads(4) # Use 4 threads
blosc.set_blocksize(0) # Automatic blocksize
# Get compression information
nbytes, cbytes, blocksize = blosc.get_cbuffer_sizes(compressed)
clib = blosc.get_clib(compressed)Primary compression and decompression operations supporting bytes-like objects with configurable compression parameters.
def compress(bytesobj, typesize=8, clevel=9, shuffle=blosc.SHUFFLE, cname='blosclz'):
"""
Compress bytesobj with specified parameters.
Parameters:
- bytesobj: bytes-like object supporting buffer interface
- typesize: int, data type size (1-255)
- clevel: int, compression level 0-9 (0=no compression, 9=max)
- shuffle: int, shuffle filter (NOSHUFFLE, SHUFFLE, BITSHUFFLE)
- cname: str, compressor name ('blosclz', 'lz4', 'lz4hc', 'snappy', 'zlib', 'zstd')
Returns:
bytes: Compressed data
Raises:
TypeError: If bytesobj doesn't support buffer interface
ValueError: If parameters out of range or cname invalid
"""
def decompress(bytes_like, as_bytearray=False):
"""
Decompress bytes-like compressed object.
Parameters:
- bytes_like: bytes-like object with compressed data
- as_bytearray: bool, return bytearray instead of bytes
Returns:
bytes or bytearray: Decompressed data
Raises:
TypeError: If bytes_like doesn't support buffer protocol
"""Low-level compression and decompression using memory addresses for integration with NumPy arrays and ctypes.
def compress_ptr(address, items, typesize=8, clevel=9, shuffle=blosc.SHUFFLE, cname='blosclz'):
"""
Compress data at memory address.
Parameters:
- address: int, memory pointer to data
- items: int, number of items of typesize to compress
- typesize: int, size of each data item
- clevel: int, compression level 0-9
- shuffle: int, shuffle filter
- cname: str, compressor name
Returns:
bytes: Compressed data
Raises:
TypeError: If address not int
ValueError: If items negative or total size exceeds limits
"""
def decompress_ptr(bytes_like, address):
"""
Decompress data directly into memory address.
Parameters:
- bytes_like: bytes-like object with compressed data
- address: int, memory pointer where to write decompressed data
Returns:
int: Number of bytes written
Raises:
TypeError: If address not int or bytes_like invalid
"""High-level functions for compressing and decompressing NumPy arrays using pickle serialization.
def pack_array(array, clevel=9, shuffle=blosc.SHUFFLE, cname='blosclz'):
"""
Pack (compress) a NumPy array.
Parameters:
- array: ndarray, NumPy array to compress
- clevel: int, compression level 0-9
- shuffle: int, shuffle filter
- cname: str, compressor name
Returns:
bytes: Packed array data
Raises:
TypeError: If array doesn't have dtype and shape attributes
ValueError: If array size exceeds limits or parameters invalid
"""
def unpack_array(packed_array, **kwargs):
"""
Unpack (decompress) a packed NumPy array.
Parameters:
- packed_array: bytes, packed array data
- **kwargs: Additional parameters for pickle.loads
Returns:
ndarray: Decompressed NumPy array
Raises:
TypeError: If packed_array not bytes
"""Functions to inspect compressed buffer properties and validate compressed data.
def get_cbuffer_sizes(bytesobj):
"""
Get information about compressed buffer.
Parameters:
- bytesobj: bytes, compressed buffer
Returns:
tuple: (uncompressed_bytes, compressed_bytes, blocksize)
"""
def cbuffer_validate(bytesobj):
"""
Validate compressed buffer safety.
Parameters:
- bytesobj: bytes, compressed buffer to validate
Returns:
bool: True if buffer is safe to decompress
"""
def get_clib(bytesobj):
"""
Get compression library name from compressed buffer.
Parameters:
- bytesobj: bytes, compressed buffer
Returns:
str: Name of compression library used
"""Functions to configure Blosc behavior including threading and block sizes.
def set_nthreads(nthreads):
"""
Set number of threads for Blosc operations.
Parameters:
- nthreads: int, number of threads (1 to MAX_THREADS)
Returns:
int: Previous number of threads
Raises:
ValueError: If nthreads exceeds MAX_THREADS
"""
def set_blocksize(blocksize):
"""
Force specific blocksize (0 for automatic).
Parameters:
- blocksize: int, blocksize in bytes (0 for automatic)
"""
def get_blocksize():
"""
Get current blocksize setting.
Returns:
int: Current blocksize (0 means automatic)
"""
def set_releasegil(gilstate):
"""
Set whether to release Python GIL during operations.
Parameters:
- gilstate: bool, True to release GIL during compression/decompression
Returns:
bool: Previous GIL release state
"""System detection, resource management, and version information functions.
def detect_number_of_cores():
"""
Detect number of CPU cores in system.
Returns:
int: Number of cores detected
"""
def free_resources():
"""
Free memory temporaries and thread resources.
Returns:
None
"""
def print_versions():
"""
Print versions of blosc and all dependencies.
Returns:
None
"""Functions to query available compressors and their properties.
def compressor_list():
"""
Get list of available compressors.
Returns:
list: List of compressor names
"""
def code_to_name(code):
"""
Convert compressor code to name.
Parameters:
- code: int, compressor code
Returns:
str: Compressor name
"""
def name_to_code(name):
"""
Convert compressor name to code.
Parameters:
- name: str, compressor name
Returns:
int: Compressor code
"""
def clib_info(cname):
"""
Get compression library information.
Parameters:
- cname: str, compressor name
Returns:
tuple: (library_name, version)
"""def test():
"""
Run blosc test suite.
Returns:
None
"""Functions for initializing and cleaning up Blosc resources (called automatically):
def init():
"""
Initialize Blosc library.
Returns:
None
Note: Called automatically on package import
"""
def destroy():
"""
Destroy Blosc resources and cleanup.
Returns:
None
Note: Called automatically on program exit
"""__version__: str # Python package version
VERSION_STRING: str # Blosc C library version
VERSION_DATE: str # Blosc C library date
blosclib_version: str # Combined version stringMAX_BUFFERSIZE: int # Maximum buffer size for compression
MAX_THREADS: int # Maximum number of threads
MAX_TYPESIZE: int # Maximum type size (255)NOSHUFFLE: int # No shuffle filter (0)
SHUFFLE: int # Byte shuffle filter (1)
BITSHUFFLE: int # Bit shuffle filter (2)Backward compatibility constants with BLOSC_ prefix:
BLOSC_VERSION_STRING: str # Alias for VERSION_STRING
BLOSC_VERSION_DATE: str # Alias for VERSION_DATE
BLOSC_MAX_BUFFERSIZE: int # Alias for MAX_BUFFERSIZE
BLOSC_MAX_THREADS: int # Alias for MAX_THREADS
BLOSC_MAX_TYPESIZE: int # Alias for MAX_TYPESIZECurrent state variables updated by configuration functions:
nthreads: int # Current number of threads in use
ncores: int # Number of cores detected on system
cnames: list # List of available compressor names
cname2clib: dict # Map compressor names to libraries
clib_versions: dict # Map libraries to versions
filters: dict # Map shuffle constants to string namesCommon exceptions raised by blosc functions:
clevel not in 0-9 rangetypesize not in 1-MAX_TYPESIZE rangecname not in available compressorsshuffle not NOSHUFFLE, SHUFFLE, or BITSHUFFLEnthreads exceeds MAX_THREADS