Image transformation, compression, and decompression codecs for scientific computing
—
Utilities for array transformation, bit manipulation, byte shuffling, and data preparation for compression algorithms. These functions optimize data layout and remove redundancy to improve compression efficiency or prepare data for specific processing requirements.
Compute differences between adjacent elements to remove trends and improve compressibility.
def delta_encode(data, *, axis=-1, dist=1, out=None):
"""
Return delta encoded data.
Parameters:
- data: NDArray - Input array to encode (any numeric dtype)
- axis: int - Axis along which to compute differences (default -1, last axis)
- dist: int - Distance for delta computation (default 1, adjacent elements)
- out: NDArray | None - Pre-allocated output buffer (same shape as input)
Returns:
NDArray: Delta encoded array (first element unchanged, rest are differences)
"""
def delta_decode(data, *, axis=-1, dist=1, out=None):
"""
Return delta decoded data.
Parameters:
- data: NDArray - Delta encoded array
- axis: int - Axis along which delta was computed (default -1)
- dist: int - Distance used for delta computation (default 1)
- out: NDArray | None - Pre-allocated output buffer
Returns:
NDArray: Decoded array (reconstructed from differences)
"""
def delta_check(data):
"""
Check if data is delta encoded.
Parameters:
- data: bytes | bytearray | mmap.mmap | NDArray - Data to check
Returns:
None: Always returns None (delta is a transform, not a format)
"""Reorganize bits to group similar bit positions together, improving compression of typed data.
def bitshuffle_encode(data, *, itemsize=1, blocksize=0, out=None):
"""
Return bit-shuffled data.
Parameters:
- data: bytes | bytearray | mmap.mmap | NDArray - Input data
- itemsize: int - Size of data items in bytes (default 1)
Common values: 1 (uint8), 2 (uint16), 4 (uint32/float32), 8 (uint64/float64)
- blocksize: int - Block size for shuffling in bytes (default 0 = auto-determine)
- out: bytes | bytearray | NDArray | None - Pre-allocated output buffer
Returns:
bytes | bytearray | NDArray: Bit-shuffled data
"""
def bitshuffle_decode(data, *, itemsize=1, blocksize=0, out=None):
"""
Return un-bit-shuffled data.
Parameters:
- data: bytes | bytearray | mmap.mmap | NDArray - Bit-shuffled data
- itemsize: int - Size of data items in bytes (must match encoding)
- blocksize: int - Block size used for shuffling (must match encoding)
- out: bytes | bytearray | NDArray | None - Pre-allocated output buffer
Returns:
bytes | bytearray | NDArray: Reconstructed data
"""
def bitshuffle_check(data):
"""
Check if data is bit-shuffled.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to check
Returns:
bool | None: True if bitshuffle signature detected
"""Reorder bytes to group similar byte positions together, useful for multi-byte data types.
def byteshuffle_encode(data, *, axis=-1, dist=1, delta=False, reorder=False, out=None):
"""
Return byte-shuffled data.
Parameters:
- data: NDArray - Input array to shuffle
- axis: int - Axis along which to shuffle (default -1)
- dist: int - Distance for shuffling pattern (default 1)
- delta: bool - Apply delta encoding before shuffling (default False)
- reorder: bool - Reorder dimensions for better locality (default False)
- out: NDArray | None - Pre-allocated output buffer
Returns:
NDArray: Byte-shuffled array
"""
def byteshuffle_decode(data, *, axis=-1, dist=1, delta=False, reorder=False, out=None):
"""
Return un-byte-shuffled data.
Parameters:
- data: NDArray - Byte-shuffled array
- axis: int - Axis along which shuffling was applied (default -1)
- dist: int - Distance used for shuffling (default 1)
- delta: bool - Reverse delta encoding after unshuffling (default False)
- reorder: bool - Reverse dimension reordering (default False)
- out: NDArray | None - Pre-allocated output buffer
Returns:
NDArray: Reconstructed array
"""
def byteshuffle_check(data):
"""
Check if data is byte-shuffled.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to check
Returns:
None: Always returns None (byte shuffle is a transform, not a format)
"""Pack integer arrays by removing unused high-order bits to reduce storage requirements.
def packints_encode(data, *, out=None):
"""
Return packed integer array.
Parameters:
- data: NDArray - Integer array to pack (uint8, uint16, uint32, uint64)
- out: NDArray | None - Pre-allocated output buffer
Returns:
NDArray: Packed integer data with reduced bit width
"""
def packints_decode(data, dtype=None, *, out=None):
"""
Return unpacked integer array.
Parameters:
- data: NDArray - Packed integer data
- dtype: numpy.dtype | None - Target dtype for unpacking (required)
- out: NDArray | None - Pre-allocated output buffer
Returns:
NDArray: Unpacked integer array
"""
def packints_check(data):
"""
Check if data is packed integers.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to check
Returns:
None: Always returns None (packints is a transform, not a format)
"""Simple run-length encoding compression used in TIFF and other formats.
def packbits_encode(data, *, out=None):
"""
Return PackBits encoded data.
Parameters:
- data: bytes | bytearray | mmap.mmap - Input data to encode
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: PackBits encoded data
"""
def packbits_decode(data, *, out=None):
"""
Return PackBits decoded data.
Parameters:
- data: bytes | bytearray | mmap.mmap - PackBits encoded data
- out: bytes | bytearray | None - Pre-allocated output buffer
Returns:
bytes | bytearray: Decoded data
"""
def packbits_check(data):
"""
Check if data is PackBits encoded.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to check
Returns:
None: Always returns None (no reliable magic number)
"""Apply XOR transformation to remove correlation between adjacent values.
def xor_encode(data, *, out=None):
"""
Return XOR encoded data.
Parameters:
- data: NDArray - Input array to encode (integer types)
- out: NDArray | None - Pre-allocated output buffer
Returns:
NDArray: XOR encoded array
"""
def xor_decode(data, *, out=None):
"""
Return XOR decoded data.
Parameters:
- data: NDArray - XOR encoded array
- out: NDArray | None - Pre-allocated output buffer
Returns:
NDArray: Decoded array
"""
def xor_check(data):
"""
Check if data is XOR encoded.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to check
Returns:
None: Always returns None (XOR is a transform, not a format)
"""Reverse the bit order within bytes for compatibility with different endianness or protocols.
def bitorder_encode(data, *, out=None):
"""
Return data with reversed bit-order.
Parameters:
- data: bytes | bytearray | mmap.mmap | NDArray - Input data
- out: bytes | bytearray | NDArray | None - Pre-allocated output buffer
Returns:
bytes | bytearray | NDArray: Data with bits reversed in each byte
"""
def bitorder_decode(data, *, out=None):
"""
Return data with restored bit-order (same as encode).
Parameters:
- data: bytes | bytearray | mmap.mmap | NDArray - Bit-reversed data
- out: bytes | bytearray | NDArray | None - Pre-allocated output buffer
Returns:
bytes | bytearray | NDArray: Data with original bit order
"""
def bitorder_check(data):
"""
Check if data has reversed bit-order.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to check
Returns:
None: Always returns None (bit order reversal is a transform)
"""Reduce the precision of floating-point data by quantizing to fewer levels.
def quantize_encode(data, *, levels=None, out=None):
"""
Return quantized data.
Parameters:
- data: NDArray - Floating-point data to quantize
- levels: int | None - Number of quantization levels (default 256)
- out: NDArray | None - Pre-allocated output buffer
Returns:
NDArray: Quantized data (typically integer type)
"""
def quantize_decode(data, *, levels=None, out=None):
"""
Return dequantized data.
Parameters:
- data: NDArray - Quantized data
- levels: int | None - Number of quantization levels used (default 256)
- out: NDArray | None - Pre-allocated output buffer
Returns:
NDArray: Dequantized floating-point data
"""
def quantize_check(data):
"""
Check if data is quantized.
Parameters:
- data: bytes | bytearray | mmap.mmap - Data to check
Returns:
None: Always returns None (quantization is a transform)
"""import imagecodecs
import numpy as np
# Simulate 16-bit sensor data
sensor_data = np.random.randint(0, 65536, (1024, 1024), dtype=np.uint16)
# Apply delta encoding to remove gradients
delta_encoded = imagecodecs.delta_encode(sensor_data, axis=1) # Row-wise differences
# Apply bit shuffling optimized for 16-bit data
bit_shuffled = imagecodecs.bitshuffle_encode(
delta_encoded,
itemsize=2, # 16-bit = 2 bytes
blocksize=8192 # 8KB blocks
)
# Compress the preprocessed data
compressed = imagecodecs.zlib_encode(bit_shuffled.tobytes(), level=9)
# Compare with direct compression
direct_compressed = imagecodecs.zlib_encode(sensor_data.tobytes(), level=9)
print(f"Original size: {sensor_data.nbytes} bytes")
print(f"Direct compression: {len(direct_compressed)} bytes ({len(direct_compressed)/sensor_data.nbytes:.2%})")
print(f"Preprocessed compression: {len(compressed)} bytes ({len(compressed)/sensor_data.nbytes:.2%})")
print(f"Improvement: {len(direct_compressed) / len(compressed):.1f}x")
# Decompress and decode
decompressed_bytes = imagecodecs.zlib_decode(compressed)
decompressed_array = np.frombuffer(decompressed_bytes, dtype=np.uint16).reshape(sensor_data.shape)
bit_unshuffled = imagecodecs.bitshuffle_decode(decompressed_array, itemsize=2, blocksize=8192)
reconstructed = imagecodecs.delta_decode(bit_unshuffled, axis=1)
assert np.array_equal(sensor_data, reconstructed)import imagecodecs
import numpy as np
# Simulate time-series scientific measurements
time_points, sensors = 10000, 128
measurements = np.cumsum(np.random.normal(0, 0.1, (time_points, sensors)), axis=0).astype(np.float32)
# Apply floating-point predictor along time axis
predicted = imagecodecs.floatpred_encode(measurements, axis=0)
# Apply byte shuffling for better compression
shuffled = imagecodecs.byteshuffle_encode(predicted, axis=1, delta=False)
# Compress with high-performance algorithm
compressed = imagecodecs.blosc_encode(
shuffled.tobytes(),
level=5,
compressor='zstd',
shuffle=1, # Additional byte shuffle at BLOSC level
typesize=4, # float32 = 4 bytes
numthreads=4
)
print(f"Original: {measurements.nbytes} bytes")
print(f"Compressed: {len(compressed)} bytes ({len(compressed)/measurements.nbytes:.2%})")
# Decompress and reconstruct
decompressed_bytes = imagecodecs.blosc_decode(compressed, numthreads=4)
decompressed_array = np.frombuffer(decompressed_bytes, dtype=np.float32).reshape(measurements.shape)
unshuffled = imagecodecs.byteshuffle_decode(decompressed_array, axis=1, delta=False)
reconstructed = imagecodecs.floatpred_decode(unshuffled, axis=0)
# Verify exact reconstruction
assert np.allclose(measurements, reconstructed, rtol=1e-7, atol=1e-7)import imagecodecs
import numpy as np
# Simulate sparse integer data (many small values)
data = np.random.choice([0, 1, 2, 3, 4, 255, 65535], size=(1000, 1000),
p=[0.4, 0.2, 0.15, 0.1, 0.1, 0.04, 0.01]).astype(np.uint16)
# Pack integers to remove unused high bits
packed = imagecodecs.packints_encode(data)
print(f"Original dtype: {data.dtype}, packed dtype: {packed.dtype}")
# Apply XOR encoding to remove correlation
xor_encoded = imagecodecs.xor_encode(packed)
# Apply run-length encoding for sparse data
packbits_compressed = imagecodecs.packbits_encode(xor_encoded.tobytes())
print(f"Original: {data.nbytes} bytes")
print(f"After packing: {packed.nbytes} bytes")
print(f"After PackBits: {len(packbits_compressed)} bytes")
print(f"Total compression: {data.nbytes / len(packbits_compressed):.1f}x")
# Reconstruct
packbits_decompressed = imagecodecs.packbits_decode(packbits_compressed)
packed_array = np.frombuffer(packbits_decompressed, dtype=packed.dtype).reshape(packed.shape)
xor_decoded = imagecodecs.xor_decode(packed_array)
unpacked = imagecodecs.packints_decode(xor_decoded, dtype=data.dtype)
assert np.array_equal(data, unpacked)import imagecodecs
import numpy as np
# 3D medical or scientific dataset
depth, height, width = 64, 512, 512
volume = np.random.randint(0, 4096, (depth, height, width), dtype=np.uint16)
# Apply delta encoding along different axes
z_delta = imagecodecs.delta_encode(volume, axis=0) # Slice-to-slice differences
xy_delta = imagecodecs.delta_encode(z_delta, axis=2) # Column differences
# Byte shuffle optimized for 3D data
shuffled = imagecodecs.byteshuffle_encode(xy_delta, axis=1, reorder=True)
# Compress with algorithm suitable for 3D data
compressed = imagecodecs.lzma_encode(shuffled.tobytes(), level=6)
print(f"3D volume: {volume.shape}")
print(f"Original: {volume.nbytes} bytes")
print(f"Compressed: {len(compressed)} bytes ({len(compressed)/volume.nbytes:.2%})")
# Reconstruct
decompressed_bytes = imagecodecs.lzma_decode(compressed)
decompressed_array = np.frombuffer(decompressed_bytes, dtype=volume.dtype).reshape(volume.shape)
unshuffled = imagecodecs.byteshuffle_decode(decompressed_array, axis=1, reorder=True)
xy_reconstructed = imagecodecs.delta_decode(unshuffled, axis=2)
z_reconstructed = imagecodecs.delta_decode(xy_reconstructed, axis=0)
assert np.array_equal(volume, z_reconstructed)import imagecodecs
import numpy as np
# High-precision floating-point data
data = np.random.normal(0, 1, (256, 256)).astype(np.float64)
# Quantize to reduce precision
quantized = imagecodecs.quantize_encode(data, levels=1024) # 10-bit quantization
print(f"Original dtype: {data.dtype}, quantized dtype: {quantized.dtype}")
# Compress quantized data (integers compress better)
compressed = imagecodecs.zlib_encode(quantized.tobytes(), level=9)
# Compare with direct float compression
direct_compressed = imagecodecs.zlib_encode(data.tobytes(), level=9)
print(f"Original: {data.nbytes} bytes")
print(f"Direct compression: {len(direct_compressed)} bytes")
print(f"Quantized compression: {len(compressed)} bytes")
print(f"Improvement: {len(direct_compressed) / len(compressed):.1f}x")
# Reconstruct (lossy)
decompressed_bytes = imagecodecs.zlib_decode(compressed)
quantized_restored = np.frombuffer(decompressed_bytes, dtype=quantized.dtype).reshape(data.shape)
dequantized = imagecodecs.quantize_decode(quantized_restored, levels=1024)
# Measure quantization error
max_error = np.max(np.abs(data - dequantized))
mse = np.mean((data - dequantized) ** 2)
print(f"Max quantization error: {max_error:.6f}")
print(f"MSE: {mse:.6f}")class BITSHUFFLE:
available: bool
# Common item sizes
ITEMSIZE_UINT8 = 1
ITEMSIZE_UINT16 = 2
ITEMSIZE_UINT32 = 4
ITEMSIZE_UINT64 = 8
ITEMSIZE_FLOAT32 = 4
ITEMSIZE_FLOAT64 = 8class DELTA:
available: bool = True # Pure Python implementation always available
# Common distance values
DISTANCE_ADJACENT = 1 # Adjacent elements
DISTANCE_ROW = None # Width of 2D array (context-dependent)
DISTANCE_PLANE = None # Area of 2D slice in 3D arrayAll array processing functions use the base ImcdError exception class:
class ImcdError(Exception):
"""Base IMCD codec exception."""
# Specific aliases for array processing
DeltaError = ImcdError
BitshuffleError = Exception # Uses standard bitshuffle exceptions
ByteshuffleError = ImcdError
PackintsError = ImcdError
PackbitsError = ImcdError
XorError = ImcdError
BitorderError = ImcdError
QuantizeError = ImcdErrorInstall with Tessl CLI
npx tessl i tessl/pypi-imagecodecs