CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-cramjam

Extremely thin and easy-to-install Python bindings to de/compression algorithms in Rust

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

advanced-compression.mddocs/

Advanced Compression

Advanced compression modules with specialized features beyond standard compress/decompress operations.

Imports

from cramjam import snappy, lz4, xz

Snappy Module

Fast compression with support for both framed and raw formats.

Standard Framed Operations

def compress(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
    """Snappy compression using framed encoding.
    
    Args:
        data: Input data to compress
        output_len: Optional expected output length
        
    Returns:
        Buffer: Compressed data with framing
    """

def decompress(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
    """Snappy decompression using framed encoding.
    
    Args:
        data: Framed compressed data to decompress
        output_len: Optional expected output length
        
    Returns:
        Buffer: Decompressed data
    """

Raw Format Operations

def compress_raw(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
    """Snappy compression without framed encoding.
    
    Args:
        data: Input data to compress
        output_len: Optional expected output length
        
    Returns:
        Buffer: Raw compressed data (no framing headers)
    """

def decompress_raw(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
    """Snappy decompression without framed encoding.
    
    Args:
        data: Raw compressed data to decompress  
        output_len: Optional expected output length
        
    Returns:
        Buffer: Decompressed data
    """

Direct Buffer Operations

# Framed format
def compress_into(input: BufferProtocol, output: BufferProtocol) -> int:
    """Compress into output buffer using framed format."""

def decompress_into(input: BufferProtocol, output: BufferProtocol) -> int:
    """Decompress from framed format into output buffer."""

# Raw format  
def compress_raw_into(input: BufferProtocol, output: BufferProtocol) -> int:
    """Compress into output buffer using raw format."""

def decompress_raw_into(input: BufferProtocol, output: BufferProtocol) -> int:
    """Decompress from raw format into output buffer."""

Utility Functions

def compress_raw_max_len(data: BufferProtocol) -> int:
    """Get expected max compressed length for snappy raw compression.
    
    This is the size of buffer that should be passed to compress_raw_into.
    
    Args:
        data: Input data to estimate compressed size for
        
    Returns:
        int: Maximum possible compressed size
    """

def decompress_raw_len(data: BufferProtocol) -> int:
    """Get decompressed length for the given raw compressed data.
    
    This is the size of buffer that should be passed to decompress_raw_into.
    
    Args:
        data: Raw compressed data
        
    Returns:
        int: Exact decompressed data size  
    """

Streaming Classes

class Compressor:
    """Snappy compressor for streaming compression (framed format)."""
    
    def __init__(self) -> None:
        """Initialize streaming compressor."""
    
    def compress(self, input: bytes) -> int:
        """Compress input into the current compressor's stream."""
        
    def flush(self) -> Buffer:
        """Flush and return current compressed stream."""
        
    def finish(self) -> Buffer:
        """Consume compressor state and return final compressed stream."""

class Decompressor:
    """Snappy streaming decompressor."""

Snappy Usage Examples

import cramjam

data = b"Snappy compression test" * 1000

# Framed format (standard, includes headers)
framed_compressed = cramjam.snappy.compress(data)
framed_decompressed = cramjam.snappy.decompress(framed_compressed)

# Raw format (no headers, smaller output)
raw_compressed = cramjam.snappy.compress_raw(data)
raw_decompressed = cramjam.snappy.decompress_raw(raw_compressed)

# Efficient raw format with pre-calculated sizes
max_compressed_size = cramjam.snappy.compress_raw_max_len(data)
output_buffer = cramjam.Buffer()
output_buffer.set_len(max_compressed_size)
actual_size = cramjam.snappy.compress_raw_into(data, output_buffer)

# Decompress with known size
decompressed_size = cramjam.snappy.decompress_raw_len(raw_compressed)
decomp_buffer = cramjam.Buffer()
decomp_buffer.set_len(decompressed_size)
cramjam.snappy.decompress_raw_into(raw_compressed, decomp_buffer)

LZ4 Module

Ultra-fast compression with block operations and advanced parameters.

Standard Frame Operations

def compress(data: BufferProtocol, level: Optional[int] = None, output_len: Optional[int] = None) -> Buffer:
    """LZ4 frame compression.
    
    Args:
        data: Input data to compress
        level: Compression level (optional)
        output_len: Optional expected output length (currently ignored)
        
    Note: output_len is ignored; underlying algorithm does not support reading to slice
    """

def decompress(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
    """LZ4 frame decompression.
    
    Args:
        data: LZ4 frame compressed data
        output_len: Optional expected output length (currently ignored)
        
    Note: output_len is ignored; underlying algorithm does not support reading to slice
    """

def compress_into(input: BufferProtocol, output: BufferProtocol, level: Optional[int] = None) -> int:
    """Compress into output buffer using LZ4 frame format."""

def decompress_into(input: BufferProtocol, output: BufferProtocol) -> int:
    """Decompress LZ4 frame into output buffer."""

Block Operations

def compress_block(data: BufferProtocol, output_len: Optional[int] = None, mode: Optional[str] = None, 
                  acceleration: Optional[int] = None, compression: Optional[int] = None, 
                  store_size: Optional[bool] = None) -> Buffer:
    """LZ4 block compression with advanced parameters.
    
    Args:
        data: Input data to compress
        output_len: Optional expected output length
        mode: Compression mode (optional)
        acceleration: Acceleration parameter for faster compression (optional)
        compression: Compression parameter for better ratio (optional) 
        store_size: Whether to store size in header for decompression (optional)
        
    Returns:
        Buffer: Compressed block data
    """

def decompress_block(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
    """LZ4 block decompression.
    
    Args:
        data: Compressed block data
        output_len: Optional upper bound length of decompressed data.
                   If not provided, assumes store_size=True was used during compression
                   
    Returns:
        Buffer: Decompressed data
    """

def compress_block_into(data: BufferProtocol, output: BufferProtocol, mode: Optional[str] = None,
                       acceleration: Optional[int] = None, store_size: Optional[bool] = None) -> int:
    """LZ4 block compression into pre-allocated buffer.
    
    Args:
        data: Input data to compress
        output: Pre-allocated output buffer
        mode: Compression mode (optional)
        acceleration: Acceleration parameter (optional)
        store_size: Whether to store size in header (optional)
        
    Returns:
        int: Number of bytes written
    """

def decompress_block_into(input: BufferProtocol, output: BufferProtocol, output_len: Optional[int] = None) -> int:
    """LZ4 block decompression into pre-allocated buffer.
    
    Args:
        input: Compressed block data
        output: Pre-allocated output buffer
        output_len: Optional output length hint
        
    Returns:
        int: Number of bytes written
    """

Utility Functions

def compress_block_bound(src: BufferProtocol) -> int:
    """Determine guaranteed buffer size for block compression.
    
    Args:
        src: Source data to compress
        
    Returns:
        int: Buffer size guaranteed to hold compression result
        
    Raises:
        Error: If data is too long to be compressed by LZ4
    """

Enhanced Streaming Classes

class Compressor:
    """LZ4 streaming compressor with advanced options."""
    
    def __init__(self, level: Optional[int] = None, content_checksum: Optional[bool] = None,
                 block_linked: Optional[bool] = None) -> None:
        """Initialize LZ4 compressor.
        
        Args:
            level: Compression level (optional)
            content_checksum: Enable content checksum (optional)
            block_linked: Enable block linking for better compression (optional)
        """
    
    def compress(self, input: bytes) -> int:
        """Add data to compression stream."""
        
    def flush(self) -> Buffer:
        """Flush and return current compressed stream."""
        
    def finish(self) -> Buffer:
        """Finish compression and return final stream."""

class Decompressor:
    """LZ4 streaming decompressor."""
    
    def __init__(self, *args, **kwargs) -> None:
        """Initialize decompressor with flexible arguments."""
    
    def decompress(self, data: bytes) -> Buffer:
        """Decompress data chunk."""

LZ4 Usage Examples

import cramjam

data = b"LZ4 ultra-fast compression" * 2000

# Standard frame compression
compressed = cramjam.lz4.compress(data, level=1)  # Fast compression
decompressed = cramjam.lz4.decompress(compressed)

# Block compression with size storage
block_compressed = cramjam.lz4.compress_block(data, store_size=True)
block_decompressed = cramjam.lz4.decompress_block(block_compressed)  # No output_len needed

# Block compression with acceleration
fast_compressed = cramjam.lz4.compress_block(data, acceleration=10, store_size=True)

# Pre-allocated buffer with bound calculation
bound_size = cramjam.lz4.compress_block_bound(data)
output = cramjam.Buffer()
output.set_len(bound_size)
actual_size = cramjam.lz4.compress_block_into(data, output, acceleration=5)

# Advanced streaming with options
compressor = cramjam.lz4.Compressor(level=5, content_checksum=True, block_linked=True)
compressor.compress(b"First chunk")
compressor.compress(b"Second chunk")
result = compressor.finish()

XZ/LZMA Module

High-ratio compression with comprehensive configuration options.

Enums and Configuration

# Compression formats
class Format(Enum):
    AUTO = ...     # Auto-detect format
    XZ = ...       # XZ format 
    ALONE = ...    # Legacy LZMA alone format
    RAW = ...      # Raw LZMA data

# Checksum types  
class Check(Enum):
    NONE = ...     # No checksum
    Crc32 = ...    # CRC32 checksum
    Crc64 = ...    # CRC64 checksum  
    Sha256 = ...   # SHA256 checksum

# Available filters
class Filter(Enum):
    Lzma1 = ...    # LZMA1 algorithm
    Lzma2 = ...    # LZMA2 algorithm (default)
    X86 = ...      # x86 BCJ filter
    PowerPC = ...  # PowerPC BCJ filter
    Ia64 = ...     # IA-64 BCJ filter
    Arm = ...      # ARM BCJ filter
    ArmThumb = ... # ARM-Thumb BCJ filter
    Sparc = ...    # SPARC BCJ filter

# Match finder algorithms
class MatchFinder(Enum):
    HashChain3 = ...    # Hash chain with 3-byte hashing
    HashChain4 = ...    # Hash chain with 4-byte hashing  
    BinaryTree2 = ...   # Binary tree with 2-byte hashing
    BinaryTree3 = ...   # Binary tree with 3-byte hashing
    BinaryTree4 = ...   # Binary tree with 4-byte hashing

# Compression modes
class Mode(Enum):
    Fast = ...     # Fast compression mode
    Normal = ...   # Normal compression mode

Configuration Classes

class Options:
    """Configuration options for XZ compression."""
    
    def __init__(self) -> None:
        """Initialize options object."""
    
    def set_preset(self, preset: int) -> Options:
        """Set compression preset (0-9).
        
        Returns: Self for method chaining
        """
    
    def set_dict_size(self, dict_size: int) -> Options:
        """Set dictionary size in bytes."""
    
    def set_lc(self, lc: int) -> Options:
        """Set literal context bits (0-4)."""
    
    def set_lp(self, lp: int) -> Options:
        """Set literal position bits (0-4)."""
    
    def set_pb(self, pb: int) -> Options:
        """Set position bits (0-4)."""
    
    def set_mode(self, mode: Mode) -> Options:
        """Set compression mode."""
    
    def set_nice_len(self, nice_len: int) -> Options:
        """Set nice length parameter (3-273)."""
    
    def set_mf(self, mf: MatchFinder) -> Options:
        """Set match finder algorithm."""
    
    def set_depth(self, depth: int) -> Options:
        """Set search depth (0-1000)."""

class FilterChainItem:
    """Individual filter in compression chain."""
    
    def __init__(self, filter: Filter, options: Optional[Options] = None) -> None:
        """Initialize filter chain item.
        
        Args:
            filter: Filter type to use
            options: Optional configuration for this filter
        """

class FilterChain:
    """Chain of filters for advanced compression pipeline."""
    
    def __init__(self) -> None:
        """Initialize empty filter chain."""
    
    def append_filter(self, filter_chain_item: FilterChainItem) -> None:
        """Add filter to the chain.
        
        Args:
            filter_chain_item: Configured filter to append
        """

Compression Functions

def compress(data: BufferProtocol, preset: Optional[int] = None, format: Optional[Format] = None,
             check: Optional[Check] = None, filters: Optional[FilterChain] = None, 
             options: Optional[Options] = None, output_len: Optional[int] = None) -> Buffer:
    """LZMA compression with comprehensive options.
    
    Args:
        data: Input data to compress
        preset: Compression preset (0-9, default uses library default)
        format: Compression format (default: XZ)  
        check: Checksum type (default: Crc64 for XZ format)
        filters: Custom filter chain (optional)
        options: Fine-grained compression options (optional)
        output_len: Optional expected output length
        
    Returns:
        Buffer: Compressed data
    """

def decompress(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
    """LZMA decompression (auto-detects format).
    
    Args:
        data: Compressed data to decompress
        output_len: Optional expected output length
        
    Returns:
        Buffer: Decompressed data
    """

def compress_into(input: BufferProtocol, output: BufferProtocol, preset: Optional[int] = None,
                 format: Optional[Format] = None, check: Optional[Check] = None,
                 filters: Optional[FilterChain] = None, options: Optional[Options] = None) -> int:
    """LZMA compression directly into output buffer."""

def decompress_into(data: BufferProtocol, output: BufferProtocol) -> int:
    """LZMA decompression directly into output buffer."""

XZ Usage Examples

import cramjam

data = b"XZ compression with advanced options" * 1000

# Simple compression with preset
compressed = cramjam.xz.compress(data, preset=6)
decompressed = cramjam.xz.decompress(compressed)

# Custom format and checksum
compressed_custom = cramjam.xz.compress(
    data, 
    format=cramjam.xz.Format.XZ,
    check=cramjam.xz.Check.Sha256
)

# Advanced options configuration
options = (cramjam.xz.Options()
    .set_preset(5)
    .set_dict_size(1024 * 1024)  # 1MB dictionary
    .set_mode(cramjam.xz.Mode.Normal)
    .set_mf(cramjam.xz.MatchFinder.BinaryTree4)
    .set_depth(100))

compressed_advanced = cramjam.xz.compress(data, options=options)

# Custom filter chain with BCJ filter for x86 binaries
filter_chain = cramjam.xz.FilterChain()
filter_chain.append_filter(cramjam.xz.FilterChainItem(cramjam.xz.Filter.X86))
filter_chain.append_filter(cramjam.xz.FilterChainItem(cramjam.xz.Filter.Lzma2, options))

compressed_bcj = cramjam.xz.compress(
    data,
    filters=filter_chain,
    format=cramjam.xz.Format.XZ,
    check=cramjam.xz.Check.Crc64
)

# Legacy LZMA alone format
compressed_alone = cramjam.xz.compress(
    data,
    format=cramjam.xz.Format.ALONE,
    preset=9  # Maximum compression
)

Advanced Patterns and Best Practices

Algorithm Selection Criteria

# Choose algorithm based on requirements
import cramjam

def compress_data(data, priority='balanced'):
    """Compress data based on priority."""
    
    if priority == 'speed':
        # Ultra-fast compression
        return cramjam.lz4.compress(data, level=1)
    
    elif priority == 'size':
        # Maximum compression ratio
        return cramjam.xz.compress(data, preset=9)
    
    elif priority == 'balanced':
        # Good speed/size balance
        return cramjam.zstd.compress(data, level=6)
    
    elif priority == 'compatibility':  
        # Maximum compatibility
        return cramjam.gzip.compress(data, level=6)

Memory-Efficient Processing

import cramjam

def compress_large_file(input_path, output_path, algorithm='zstd'):
    """Compress large file with memory efficiency."""
    
    # Use streaming for large files
    if algorithm == 'lz4':
        compressor = cramjam.lz4.Compressor(
            level=5, 
            content_checksum=True,
            block_linked=True
        )
    elif algorithm == 'zstd':
        compressor = cramjam.zstd.Compressor(level=6)
    else:
        compressor = cramjam.gzip.Compressor(level=6)
    
    with open(input_path, 'rb') as infile, open(output_path, 'wb') as outfile:
        while chunk := infile.read(1024 * 1024):  # 1MB chunks
            compressor.compress(chunk)
            # Write intermediate results to avoid memory buildup
            compressed_chunk = compressor.flush()
            if compressed_chunk:
                outfile.write(bytes(compressed_chunk))
        
        # Write final data
        final_data = compressor.finish()
        outfile.write(bytes(final_data))

Format-Specific Optimizations

import cramjam

# Snappy: Raw format for minimal overhead  
def fast_compress_raw(data):
    """Ultra-fast compression with minimal headers."""
    return cramjam.snappy.compress_raw(data)

# LZ4: Block compression with acceleration
def compress_with_speed(data, speed_factor=10):
    """LZ4 compression optimized for speed.""" 
    return cramjam.lz4.compress_block(
        data,
        acceleration=speed_factor,
        store_size=True
    )

# XZ: Optimized for executable files
def compress_executable(binary_data):
    """XZ compression optimized for x86 executables."""
    options = cramjam.xz.Options().set_preset(6).set_dict_size(2**20)
    
    filter_chain = cramjam.xz.FilterChain()
    filter_chain.append_filter(cramjam.xz.FilterChainItem(cramjam.xz.Filter.X86))
    filter_chain.append_filter(cramjam.xz.FilterChainItem(cramjam.xz.Filter.Lzma2, options))
    
    return cramjam.xz.compress(
        binary_data,
        filters=filter_chain,
        check=cramjam.xz.Check.Sha256
    )

Install with Tessl CLI

npx tessl i tessl/pypi-cramjam

docs

advanced-compression.md

core-utilities.md

index.md

standard-compression.md

tile.json