Extremely thin and easy-to-install Python bindings to de/compression algorithms in Rust
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Advanced compression modules with specialized features beyond standard compress/decompress operations.
from cramjam import snappy, lz4, xzFast compression with support for both framed and raw formats.
def compress(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
"""Snappy compression using framed encoding.
Args:
data: Input data to compress
output_len: Optional expected output length
Returns:
Buffer: Compressed data with framing
"""
def decompress(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
"""Snappy decompression using framed encoding.
Args:
data: Framed compressed data to decompress
output_len: Optional expected output length
Returns:
Buffer: Decompressed data
"""def compress_raw(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
"""Snappy compression without framed encoding.
Args:
data: Input data to compress
output_len: Optional expected output length
Returns:
Buffer: Raw compressed data (no framing headers)
"""
def decompress_raw(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
"""Snappy decompression without framed encoding.
Args:
data: Raw compressed data to decompress
output_len: Optional expected output length
Returns:
Buffer: Decompressed data
"""# Framed format
def compress_into(input: BufferProtocol, output: BufferProtocol) -> int:
"""Compress into output buffer using framed format."""
def decompress_into(input: BufferProtocol, output: BufferProtocol) -> int:
"""Decompress from framed format into output buffer."""
# Raw format
def compress_raw_into(input: BufferProtocol, output: BufferProtocol) -> int:
"""Compress into output buffer using raw format."""
def decompress_raw_into(input: BufferProtocol, output: BufferProtocol) -> int:
"""Decompress from raw format into output buffer."""def compress_raw_max_len(data: BufferProtocol) -> int:
"""Get expected max compressed length for snappy raw compression.
This is the size of buffer that should be passed to compress_raw_into.
Args:
data: Input data to estimate compressed size for
Returns:
int: Maximum possible compressed size
"""
def decompress_raw_len(data: BufferProtocol) -> int:
"""Get decompressed length for the given raw compressed data.
This is the size of buffer that should be passed to decompress_raw_into.
Args:
data: Raw compressed data
Returns:
int: Exact decompressed data size
"""class Compressor:
"""Snappy compressor for streaming compression (framed format)."""
def __init__(self) -> None:
"""Initialize streaming compressor."""
def compress(self, input: bytes) -> int:
"""Compress input into the current compressor's stream."""
def flush(self) -> Buffer:
"""Flush and return current compressed stream."""
def finish(self) -> Buffer:
"""Consume compressor state and return final compressed stream."""
class Decompressor:
"""Snappy streaming decompressor."""import cramjam
data = b"Snappy compression test" * 1000
# Framed format (standard, includes headers)
framed_compressed = cramjam.snappy.compress(data)
framed_decompressed = cramjam.snappy.decompress(framed_compressed)
# Raw format (no headers, smaller output)
raw_compressed = cramjam.snappy.compress_raw(data)
raw_decompressed = cramjam.snappy.decompress_raw(raw_compressed)
# Efficient raw format with pre-calculated sizes
max_compressed_size = cramjam.snappy.compress_raw_max_len(data)
output_buffer = cramjam.Buffer()
output_buffer.set_len(max_compressed_size)
actual_size = cramjam.snappy.compress_raw_into(data, output_buffer)
# Decompress with known size
decompressed_size = cramjam.snappy.decompress_raw_len(raw_compressed)
decomp_buffer = cramjam.Buffer()
decomp_buffer.set_len(decompressed_size)
cramjam.snappy.decompress_raw_into(raw_compressed, decomp_buffer)Ultra-fast compression with block operations and advanced parameters.
def compress(data: BufferProtocol, level: Optional[int] = None, output_len: Optional[int] = None) -> Buffer:
"""LZ4 frame compression.
Args:
data: Input data to compress
level: Compression level (optional)
output_len: Optional expected output length (currently ignored)
Note: output_len is ignored; underlying algorithm does not support reading to slice
"""
def decompress(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
"""LZ4 frame decompression.
Args:
data: LZ4 frame compressed data
output_len: Optional expected output length (currently ignored)
Note: output_len is ignored; underlying algorithm does not support reading to slice
"""
def compress_into(input: BufferProtocol, output: BufferProtocol, level: Optional[int] = None) -> int:
"""Compress into output buffer using LZ4 frame format."""
def decompress_into(input: BufferProtocol, output: BufferProtocol) -> int:
"""Decompress LZ4 frame into output buffer."""def compress_block(data: BufferProtocol, output_len: Optional[int] = None, mode: Optional[str] = None,
acceleration: Optional[int] = None, compression: Optional[int] = None,
store_size: Optional[bool] = None) -> Buffer:
"""LZ4 block compression with advanced parameters.
Args:
data: Input data to compress
output_len: Optional expected output length
mode: Compression mode (optional)
acceleration: Acceleration parameter for faster compression (optional)
compression: Compression parameter for better ratio (optional)
store_size: Whether to store size in header for decompression (optional)
Returns:
Buffer: Compressed block data
"""
def decompress_block(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
"""LZ4 block decompression.
Args:
data: Compressed block data
output_len: Optional upper bound length of decompressed data.
If not provided, assumes store_size=True was used during compression
Returns:
Buffer: Decompressed data
"""
def compress_block_into(data: BufferProtocol, output: BufferProtocol, mode: Optional[str] = None,
acceleration: Optional[int] = None, store_size: Optional[bool] = None) -> int:
"""LZ4 block compression into pre-allocated buffer.
Args:
data: Input data to compress
output: Pre-allocated output buffer
mode: Compression mode (optional)
acceleration: Acceleration parameter (optional)
store_size: Whether to store size in header (optional)
Returns:
int: Number of bytes written
"""
def decompress_block_into(input: BufferProtocol, output: BufferProtocol, output_len: Optional[int] = None) -> int:
"""LZ4 block decompression into pre-allocated buffer.
Args:
input: Compressed block data
output: Pre-allocated output buffer
output_len: Optional output length hint
Returns:
int: Number of bytes written
"""def compress_block_bound(src: BufferProtocol) -> int:
"""Determine guaranteed buffer size for block compression.
Args:
src: Source data to compress
Returns:
int: Buffer size guaranteed to hold compression result
Raises:
Error: If data is too long to be compressed by LZ4
"""class Compressor:
"""LZ4 streaming compressor with advanced options."""
def __init__(self, level: Optional[int] = None, content_checksum: Optional[bool] = None,
block_linked: Optional[bool] = None) -> None:
"""Initialize LZ4 compressor.
Args:
level: Compression level (optional)
content_checksum: Enable content checksum (optional)
block_linked: Enable block linking for better compression (optional)
"""
def compress(self, input: bytes) -> int:
"""Add data to compression stream."""
def flush(self) -> Buffer:
"""Flush and return current compressed stream."""
def finish(self) -> Buffer:
"""Finish compression and return final stream."""
class Decompressor:
"""LZ4 streaming decompressor."""
def __init__(self, *args, **kwargs) -> None:
"""Initialize decompressor with flexible arguments."""
def decompress(self, data: bytes) -> Buffer:
"""Decompress data chunk."""import cramjam
data = b"LZ4 ultra-fast compression" * 2000
# Standard frame compression
compressed = cramjam.lz4.compress(data, level=1) # Fast compression
decompressed = cramjam.lz4.decompress(compressed)
# Block compression with size storage
block_compressed = cramjam.lz4.compress_block(data, store_size=True)
block_decompressed = cramjam.lz4.decompress_block(block_compressed) # No output_len needed
# Block compression with acceleration
fast_compressed = cramjam.lz4.compress_block(data, acceleration=10, store_size=True)
# Pre-allocated buffer with bound calculation
bound_size = cramjam.lz4.compress_block_bound(data)
output = cramjam.Buffer()
output.set_len(bound_size)
actual_size = cramjam.lz4.compress_block_into(data, output, acceleration=5)
# Advanced streaming with options
compressor = cramjam.lz4.Compressor(level=5, content_checksum=True, block_linked=True)
compressor.compress(b"First chunk")
compressor.compress(b"Second chunk")
result = compressor.finish()High-ratio compression with comprehensive configuration options.
# Compression formats
class Format(Enum):
AUTO = ... # Auto-detect format
XZ = ... # XZ format
ALONE = ... # Legacy LZMA alone format
RAW = ... # Raw LZMA data
# Checksum types
class Check(Enum):
NONE = ... # No checksum
Crc32 = ... # CRC32 checksum
Crc64 = ... # CRC64 checksum
Sha256 = ... # SHA256 checksum
# Available filters
class Filter(Enum):
Lzma1 = ... # LZMA1 algorithm
Lzma2 = ... # LZMA2 algorithm (default)
X86 = ... # x86 BCJ filter
PowerPC = ... # PowerPC BCJ filter
Ia64 = ... # IA-64 BCJ filter
Arm = ... # ARM BCJ filter
ArmThumb = ... # ARM-Thumb BCJ filter
Sparc = ... # SPARC BCJ filter
# Match finder algorithms
class MatchFinder(Enum):
HashChain3 = ... # Hash chain with 3-byte hashing
HashChain4 = ... # Hash chain with 4-byte hashing
BinaryTree2 = ... # Binary tree with 2-byte hashing
BinaryTree3 = ... # Binary tree with 3-byte hashing
BinaryTree4 = ... # Binary tree with 4-byte hashing
# Compression modes
class Mode(Enum):
Fast = ... # Fast compression mode
Normal = ... # Normal compression modeclass Options:
"""Configuration options for XZ compression."""
def __init__(self) -> None:
"""Initialize options object."""
def set_preset(self, preset: int) -> Options:
"""Set compression preset (0-9).
Returns: Self for method chaining
"""
def set_dict_size(self, dict_size: int) -> Options:
"""Set dictionary size in bytes."""
def set_lc(self, lc: int) -> Options:
"""Set literal context bits (0-4)."""
def set_lp(self, lp: int) -> Options:
"""Set literal position bits (0-4)."""
def set_pb(self, pb: int) -> Options:
"""Set position bits (0-4)."""
def set_mode(self, mode: Mode) -> Options:
"""Set compression mode."""
def set_nice_len(self, nice_len: int) -> Options:
"""Set nice length parameter (3-273)."""
def set_mf(self, mf: MatchFinder) -> Options:
"""Set match finder algorithm."""
def set_depth(self, depth: int) -> Options:
"""Set search depth (0-1000)."""
class FilterChainItem:
"""Individual filter in compression chain."""
def __init__(self, filter: Filter, options: Optional[Options] = None) -> None:
"""Initialize filter chain item.
Args:
filter: Filter type to use
options: Optional configuration for this filter
"""
class FilterChain:
"""Chain of filters for advanced compression pipeline."""
def __init__(self) -> None:
"""Initialize empty filter chain."""
def append_filter(self, filter_chain_item: FilterChainItem) -> None:
"""Add filter to the chain.
Args:
filter_chain_item: Configured filter to append
"""def compress(data: BufferProtocol, preset: Optional[int] = None, format: Optional[Format] = None,
check: Optional[Check] = None, filters: Optional[FilterChain] = None,
options: Optional[Options] = None, output_len: Optional[int] = None) -> Buffer:
"""LZMA compression with comprehensive options.
Args:
data: Input data to compress
preset: Compression preset (0-9, default uses library default)
format: Compression format (default: XZ)
check: Checksum type (default: Crc64 for XZ format)
filters: Custom filter chain (optional)
options: Fine-grained compression options (optional)
output_len: Optional expected output length
Returns:
Buffer: Compressed data
"""
def decompress(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
"""LZMA decompression (auto-detects format).
Args:
data: Compressed data to decompress
output_len: Optional expected output length
Returns:
Buffer: Decompressed data
"""
def compress_into(input: BufferProtocol, output: BufferProtocol, preset: Optional[int] = None,
format: Optional[Format] = None, check: Optional[Check] = None,
filters: Optional[FilterChain] = None, options: Optional[Options] = None) -> int:
"""LZMA compression directly into output buffer."""
def decompress_into(data: BufferProtocol, output: BufferProtocol) -> int:
"""LZMA decompression directly into output buffer."""import cramjam
data = b"XZ compression with advanced options" * 1000
# Simple compression with preset
compressed = cramjam.xz.compress(data, preset=6)
decompressed = cramjam.xz.decompress(compressed)
# Custom format and checksum
compressed_custom = cramjam.xz.compress(
data,
format=cramjam.xz.Format.XZ,
check=cramjam.xz.Check.Sha256
)
# Advanced options configuration
options = (cramjam.xz.Options()
.set_preset(5)
.set_dict_size(1024 * 1024) # 1MB dictionary
.set_mode(cramjam.xz.Mode.Normal)
.set_mf(cramjam.xz.MatchFinder.BinaryTree4)
.set_depth(100))
compressed_advanced = cramjam.xz.compress(data, options=options)
# Custom filter chain with BCJ filter for x86 binaries
filter_chain = cramjam.xz.FilterChain()
filter_chain.append_filter(cramjam.xz.FilterChainItem(cramjam.xz.Filter.X86))
filter_chain.append_filter(cramjam.xz.FilterChainItem(cramjam.xz.Filter.Lzma2, options))
compressed_bcj = cramjam.xz.compress(
data,
filters=filter_chain,
format=cramjam.xz.Format.XZ,
check=cramjam.xz.Check.Crc64
)
# Legacy LZMA alone format
compressed_alone = cramjam.xz.compress(
data,
format=cramjam.xz.Format.ALONE,
preset=9 # Maximum compression
)# Choose algorithm based on requirements
import cramjam
def compress_data(data, priority='balanced'):
"""Compress data based on priority."""
if priority == 'speed':
# Ultra-fast compression
return cramjam.lz4.compress(data, level=1)
elif priority == 'size':
# Maximum compression ratio
return cramjam.xz.compress(data, preset=9)
elif priority == 'balanced':
# Good speed/size balance
return cramjam.zstd.compress(data, level=6)
elif priority == 'compatibility':
# Maximum compatibility
return cramjam.gzip.compress(data, level=6)import cramjam
def compress_large_file(input_path, output_path, algorithm='zstd'):
"""Compress large file with memory efficiency."""
# Use streaming for large files
if algorithm == 'lz4':
compressor = cramjam.lz4.Compressor(
level=5,
content_checksum=True,
block_linked=True
)
elif algorithm == 'zstd':
compressor = cramjam.zstd.Compressor(level=6)
else:
compressor = cramjam.gzip.Compressor(level=6)
with open(input_path, 'rb') as infile, open(output_path, 'wb') as outfile:
while chunk := infile.read(1024 * 1024): # 1MB chunks
compressor.compress(chunk)
# Write intermediate results to avoid memory buildup
compressed_chunk = compressor.flush()
if compressed_chunk:
outfile.write(bytes(compressed_chunk))
# Write final data
final_data = compressor.finish()
outfile.write(bytes(final_data))import cramjam
# Snappy: Raw format for minimal overhead
def fast_compress_raw(data):
"""Ultra-fast compression with minimal headers."""
return cramjam.snappy.compress_raw(data)
# LZ4: Block compression with acceleration
def compress_with_speed(data, speed_factor=10):
"""LZ4 compression optimized for speed."""
return cramjam.lz4.compress_block(
data,
acceleration=speed_factor,
store_size=True
)
# XZ: Optimized for executable files
def compress_executable(binary_data):
"""XZ compression optimized for x86 executables."""
options = cramjam.xz.Options().set_preset(6).set_dict_size(2**20)
filter_chain = cramjam.xz.FilterChain()
filter_chain.append_filter(cramjam.xz.FilterChainItem(cramjam.xz.Filter.X86))
filter_chain.append_filter(cramjam.xz.FilterChainItem(cramjam.xz.Filter.Lzma2, options))
return cramjam.xz.compress(
binary_data,
filters=filter_chain,
check=cramjam.xz.Check.Sha256
)Install with Tessl CLI
npx tessl i tessl/pypi-cramjam