Image transformation, compression, and decompression codecs for scientific computing
—
General-purpose lossless compression algorithms optimized for different data types and use cases. These codecs provide high-performance compression without data loss, making them ideal for scientific computing, data archival, and scenarios where exact data reconstruction is required.
Industry-standard deflate compression with zlib wrapper, widely compatible and efficient for general-purpose data compression.
def zlib_encode(data, level=None, *, out=None):
"""
Return ZLIB encoded data.
Parameters:
- data: bytes | bytearray | mmap.mmap - Input data to compress
- level: int | None - Compression level (0-9, default 6). Higher values = better compression, slower speed
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: ZLIB compressed data with header and checksum
"""
def zlib_decode(data, *, out=None):
"""
Return decoded ZLIB data.
Parameters:
- data: bytes | bytearray | mmap.mmap - ZLIB compressed data
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: Decompressed data
"""
def zlib_check(data):
"""
Check if data is ZLIB encoded.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to check
Returns:
bool | None: True if ZLIB format detected, None if uncertain
"""
def zlib_crc32(data, value=None):
"""
Return CRC32 checksum.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to checksum
- value: int | None - Initial CRC value for incremental calculation
Returns:
int: CRC32 checksum value
"""
def zlib_adler32(data, value=None):
"""
Return Adler-32 checksum.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to checksum
- value: int | None - Initial Adler-32 value for incremental calculation
Returns:
int: Adler-32 checksum value
"""GZIP format compression compatible with gzip command-line tool and HTTP compression.
def gzip_encode(data, level=None, *, out=None):
"""
Return GZIP encoded data.
Parameters:
- data: bytes | bytearray | mmap.mmap - Input data to compress
- level: int | None - Compression level (0-9, default 6)
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: GZIP compressed data with header and trailer
"""
def gzip_decode(data, *, out=None):
"""
Return decoded GZIP data.
Parameters:
- data: bytes | bytearray | mmap.mmap - GZIP compressed data
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: Decompressed data
"""
def gzip_check(data):
"""
Check if data is GZIP encoded.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to check
Returns:
bool: True if GZIP magic number detected
"""Columnar storage compressor optimized for numerical data with multi-threading and multiple compression algorithms.
def blosc_encode(data, level=None, *, compressor=None, shuffle=None, typesize=None, blocksize=None, numthreads=None, out=None):
"""
Return BLOSC encoded data.
Parameters:
- data: bytes | bytearray | mmap.mmap - Input data to compress
- level: int | None - Compression level (0-9, default 5)
- compressor: str | None - Compression algorithm:
'blosclz' (default), 'lz4', 'lz4hc', 'snappy', 'zlib', 'zstd'
- shuffle: int | None - Shuffle filter:
0 = no shuffle, 1 = byte shuffle, 2 = bit shuffle
- typesize: int | None - Element size in bytes for shuffle optimization
- blocksize: int | None - Block size in bytes (default auto-determined)
- numthreads: int | None - Number of threads for compression
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: BLOSC compressed data
"""
def blosc_decode(data, *, numthreads=None, out=None):
"""
Return decoded BLOSC data.
Parameters:
- data: bytes | bytearray | mmap.mmap - BLOSC compressed data
- numthreads: int | None - Number of threads for decompression
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: Decompressed data
"""
def blosc_check(data):
"""
Check if data is BLOSC encoded.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to check
Returns:
None: Always returns None (format detected by attempting decompression)
"""Modern compression algorithm providing excellent compression ratios with fast decompression speeds.
def zstd_encode(data, level=None, *, out=None):
"""
Return ZSTD encoded data.
Parameters:
- data: bytes | bytearray | mmap.mmap - Input data to compress
- level: int | None - Compression level (1-22, default 3).
Higher values = better compression, slower speed
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: ZSTD compressed data
"""
def zstd_decode(data, *, out=None):
"""
Return decoded ZSTD data.
Parameters:
- data: bytes | bytearray | mmap.mmap - ZSTD compressed data
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: Decompressed data
"""
def zstd_check(data):
"""
Check if data is ZSTD encoded.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to check
Returns:
bool | None: True if ZSTD magic number detected
"""Ultra-fast compression algorithm optimized for speed over compression ratio.
def lz4_encode(data, level=None, *, out=None):
"""
Return LZ4 encoded data.
Parameters:
- data: bytes | bytearray | mmap.mmap - Input data to compress
- level: int | None - Compression level (1-12, default 1).
Higher values = better compression, slower speed
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: LZ4 compressed data
"""
def lz4_decode(data, *, out=None):
"""
Return decoded LZ4 data.
Parameters:
- data: bytes | bytearray | mmap.mmap - LZ4 compressed data
- out: bytes | bytearray | None - Pre-allocated output buffer (size must be known)
Returns:
bytes | bytearray: Decompressed data
"""
def lz4_check(data):
"""
Check if data is LZ4 encoded.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to check
Returns:
bool | None: True if LZ4 magic number detected
"""LZ4 compression with frame format that includes metadata and content checksums for safe streaming.
def lz4f_encode(data, level=None, *, out=None):
"""
Return LZ4F (LZ4 Frame format) encoded data.
Parameters:
- data: bytes | bytearray | mmap.mmap - Input data to compress
- level: int | None - Compression level (0-12, default 0)
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: LZ4F compressed data with frame header and footer
"""
def lz4f_decode(data, *, out=None):
"""
Return decoded LZ4F data.
Parameters:
- data: bytes | bytearray | mmap.mmap - LZ4F compressed data
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: Decompressed data
"""
def lz4f_check(data):
"""
Check if data is LZ4F encoded.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to check
Returns:
bool | None: True if LZ4F magic number detected
"""High compression ratio algorithm used in 7-Zip and XZ utilities.
def lzma_encode(data, level=None, *, out=None):
"""
Return LZMA encoded data.
Parameters:
- data: bytes | bytearray | mmap.mmap - Input data to compress
- level: int | None - Compression level (0-9, default 6)
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: LZMA compressed data
"""
def lzma_decode(data, *, out=None):
"""
Return decoded LZMA data.
Parameters:
- data: bytes | bytearray | mmap.mmap - LZMA compressed data
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: Decompressed data
"""
def lzma_check(data):
"""
Check if data is LZMA encoded.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to check
Returns:
bool | None: True if LZMA signature detected
"""Google's compression algorithm optimized for web content and text compression.
def brotli_encode(data, level=None, *, mode=None, lgwin=None, out=None):
"""
Return BROTLI encoded data.
Parameters:
- data: bytes | bytearray | mmap.mmap - Input data to compress
- level: int | None - Compression level (0-11, default 6)
- mode: int | None - Compression mode (0=generic, 1=text, 2=font)
- lgwin: int | None - Window size (10-24, default 22)
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: BROTLI compressed data
"""
def brotli_decode(data, *, out=None):
"""
Return decoded BROTLI data.
Parameters:
- data: bytes | bytearray | mmap.mmap - BROTLI compressed data
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: Decompressed data
"""
def brotli_check(data):
"""
Check if data is BROTLI encoded.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to check
Returns:
None: Always returns None (no reliable magic number)
"""Fast compression algorithm developed by Google for high-speed compression/decompression.
def snappy_encode(data, *, out=None):
"""
Return SNAPPY encoded data.
Parameters:
- data: bytes | bytearray | mmap.mmap - Input data to compress
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: SNAPPY compressed data
"""
def snappy_decode(data, *, out=None):
"""
Return decoded SNAPPY data.
Parameters:
- data: bytes | bytearray | mmap.mmap - SNAPPY compressed data
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: Decompressed data
"""
def snappy_check(data):
"""
Check if data is SNAPPY encoded.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to check
Returns:
None: Always returns None (no magic number)
"""import imagecodecs
import numpy as np
# Compress array data
data = np.random.randint(0, 256, 10000, dtype=np.uint8).tobytes()
# Try different algorithms
zlib_compressed = imagecodecs.zlib_encode(data, level=9)
zstd_compressed = imagecodecs.zstd_encode(data, level=3)
lz4_compressed = imagecodecs.lz4_encode(data, level=1)
print(f"Original size: {len(data)}")
print(f"ZLIB size: {len(zlib_compressed)} ({len(zlib_compressed)/len(data):.2%})")
print(f"ZSTD size: {len(zstd_compressed)} ({len(zstd_compressed)/len(data):.2%})")
print(f"LZ4 size: {len(lz4_compressed)} ({len(lz4_compressed)/len(data):.2%})")import imagecodecs
import numpy as np
# Scientific array compression with BLOSC
data = np.random.random((1000, 1000)).astype(np.float32)
data_bytes = data.tobytes()
# Optimize for floating-point data
compressed = imagecodecs.blosc_encode(
data_bytes,
level=5,
compressor='zstd',
shuffle=1, # Byte shuffle for better compression
typesize=4, # float32 = 4 bytes
numthreads=4 # Multi-threaded compression
)
# Decompress with multi-threading
decompressed = imagecodecs.blosc_decode(compressed, numthreads=4)
recovered = np.frombuffer(decompressed, dtype=np.float32).reshape(1000, 1000)
assert np.array_equal(data, recovered)
print(f"Compression ratio: {len(compressed)/len(data_bytes):.2%}")import imagecodecs
# Incremental checksum calculation
crc = 0
adler = 1
data_chunks = [b"chunk1", b"chunk2", b"chunk3"]
for chunk in data_chunks:
crc = imagecodecs.zlib_crc32(chunk, crc)
adler = imagecodecs.zlib_adler32(chunk, adler)
print(f"Final CRC32: {crc:08x}")
print(f"Final Adler32: {adler:08x}")class ZLIB:
available: bool = True
class COMPRESSION:
NO_COMPRESSION = 0
BEST_SPEED = 1
BEST_COMPRESSION = 9
DEFAULT_COMPRESSION = 6
class STRATEGY:
DEFAULT_STRATEGY = 0
FILTERED = 1
HUFFMAN_ONLY = 2
RLE = 3
FIXED = 4class BLOSC:
available: bool
class SHUFFLE:
NOSHUFFLE = 0
SHUFFLE = 1
BITSHUFFLE = 2
class COMPRESSOR:
BLOSCLZ = 'blosclz'
LZ4 = 'lz4'
LZ4HC = 'lz4hc'
SNAPPY = 'snappy'
ZLIB = 'zlib'
ZSTD = 'zstd'class ZSTD:
available: bool
class STRATEGY:
FAST = 1
DFAST = 2
GREEDY = 3
LAZY = 4
LAZY2 = 5
BTLAZY2 = 6
BTOPT = 7
BTULTRA = 8
BTULTRA2 = 9Install with Tessl CLI
npx tessl i tessl/pypi-imagecodecs