CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-cramjam

Extremely thin and easy-to-install Python bindings to de/compression algorithms in Rust

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

core-utilities.mddocs/

Core Utilities

Core classes and utilities for buffer management and error handling in cramjam.

Imports

from cramjam import Buffer, File, CompressionError, DecompressionError, BufferProtocol

Buffer Class

class Buffer:
    """Buffer class implementing both readable and writable buffer protocols."""
    
    def __init__(self, data: BufferProtocol | None = None, copy: bool | None = True) -> None:
        """Initialize buffer.
        
        Args:
            data: Anything implementing the buffer protocol (optional)
            copy: Whether to make a copy of the provided data (default: True)
        """

The Buffer class provides a memory buffer with file-like interface for efficient data operations.

Basic Usage

import cramjam

# Create empty buffer
buffer = cramjam.Buffer()

# Create buffer from data
buffer = cramjam.Buffer(b"Hello World")

# Create buffer without copying (references original data)
data = bytearray(b"Original data")
buffer = cramjam.Buffer(data, copy=False)

Read Operations

def read(self, n_bytes: int | None = -1) -> bytes:
    """Read from buffer at current position.
    
    Args:
        n_bytes: Number of bytes to read, -1 for all remaining
        
    Returns:
        bytes: Data read from buffer
    """

def readinto(self, output: BufferProtocol) -> int:
    """Read from buffer into another buffer object.
    
    Args:
        output: Buffer protocol object to read data into
        
    Returns:
        int: Number of bytes read
    """

Write Operations

def write(self, input: BufferProtocol) -> int:
    """Write bytes to the buffer.
    
    Args:
        input: Data implementing Buffer Protocol to write
        
    Returns:
        int: Number of bytes written
    """

Position Operations

def seek(self, position: int, whence: int | None = 0) -> int:
    """Seek to position within the buffer.
    
    Args:
        position: Target position
        whence: 0 (from start), 1 (from current), 2 (from end)
        
    Returns:
        int: New position
    """

def tell(self) -> int:
    """Get current position of the buffer."""

def seekable(self) -> bool:
    """Check if buffer is seekable (always True for compatibility)."""

Size Operations

def len(self) -> int:
    """Get length of the underlying buffer."""

def set_len(self, size: int) -> None:
    """Set buffer length. Truncates if smaller, null-fills if larger."""

def truncate(self) -> None:
    """Truncate the buffer."""

# Magic methods for convenience
def __len__(self) -> int:
    """Get buffer length."""

def __bool__(self) -> bool:
    """Check if buffer has content."""

Memory Management

def get_view_reference(self) -> None | Any:
    """Get PyObject this Buffer references as view.
    
    Returns:
        None if Buffer owns its data, PyObject reference otherwise
    """

def get_view_reference_count(self) -> None | int:
    """Get reference count of PyObject this Buffer references.
    
    Returns:
        None if Buffer owns its data, reference count otherwise
    """

Buffer Usage Examples

import cramjam

# Create and manipulate buffer
buffer = cramjam.Buffer()
buffer.write(b"Hello ")
buffer.write(b"World!")
buffer.seek(0)
data = buffer.read()  # b"Hello World!"

# Use as compression target
source = b"Data to compress" * 1000
output_buffer = cramjam.Buffer()
cramjam.gzip.compress_into(source, output_buffer)

# Read compressed data
output_buffer.seek(0) 
compressed_data = output_buffer.read()

File Class

class File:
    """File-like object owned on Rust side."""
    
    def __init__(self, path: str, read: bool | None = None, write: bool | None = None, 
                 truncate: bool | None = None, append: bool | None = None) -> None:
        """Open file with specified modes.
        
        Args:
            path: File path string
            read: Enable read mode (optional)
            write: Enable write mode (optional) 
            truncate: Enable truncate mode (optional)
            append: Enable append mode (optional)
        """

File Operations

The File class provides the same interface as Buffer but operates on actual files:

# Read operations
def read(self, n_bytes: int | None = None) -> bytes:
    """Read from file at current position."""

def readinto(self, output: BufferProtocol) -> int:
    """Read from file into buffer object."""

# Write operations  
def write(self, input: BufferProtocol) -> int:
    """Write bytes to file."""

# Position operations
def seek(self, position: int, whence: int | None = 0) -> int:
    """Seek to position within file."""

def tell(self) -> int:
    """Get current file position."""

def seekable(self) -> bool:
    """Check if file is seekable (always True)."""

# Size operations
def len(self) -> int:
    """Get file length in bytes."""

def set_len(self, size: int) -> None:
    """Set file length. Truncates if smaller, null-fills if larger."""

def truncate(self) -> None:
    """Truncate the file."""

File Usage Examples

import cramjam

# Open file for reading and writing
file_obj = cramjam.File("data.bin", read=True, write=True)

# Write compressed data directly to file
source_data = b"Large dataset" * 10000
cramjam.zstd.compress_into(source_data, file_obj)

# Read back and decompress
file_obj.seek(0)
compressed_data = file_obj.read()
decompressed = cramjam.zstd.decompress(compressed_data)

# Append mode for logs
log_file = cramjam.File("compressed.log", write=True, append=True)
log_entry = b"Log entry data"
compressed_entry = cramjam.gzip.compress(log_entry)
log_file.write(compressed_entry)

Exception Classes

CompressionError

class CompressionError(Exception):
    """Cramjam-specific exception for failed compression operations."""

Raised when compression operations fail due to:

  • Invalid input data
  • Insufficient output buffer space
  • Algorithm-specific limitations
import cramjam

try:
    # Attempt compression
    result = cramjam.brotli.compress(invalid_data)
except cramjam.CompressionError as e:
    print(f"Compression failed: {e}")

DecompressionError

class DecompressionError(Exception):
    """Cramjam-specific exception for failed decompression operations."""

Raised when decompression operations fail due to:

  • Corrupted compressed data
  • Wrong decompression algorithm
  • Truncated input
import cramjam

try:
    # Attempt decompression
    result = cramjam.gzip.decompress(corrupted_data)
except cramjam.DecompressionError as e:
    print(f"Decompression failed: {e}")

BufferProtocol Type

BufferProtocol = Any  # Type alias for buffer protocol objects

Type alias representing objects that implement the Python buffer protocol:

  • bytes - Immutable byte strings
  • bytearray - Mutable byte arrays
  • memoryview - Memory view objects
  • Custom objects implementing __buffer__ method

All cramjam functions accept BufferProtocol objects as input, providing flexibility in data handling while maintaining performance through the buffer protocol's zero-copy semantics where possible.

Memory Management Best Practices

Performance Tips

  1. Use bytearray for inputs when possible - avoids double allocation on Rust side
  2. Pre-allocate buffers for *_into functions to avoid repeated allocations
  3. Use copy=False in Buffer constructor when safe to reference original data
  4. Monitor reference counts with Buffer memory management methods when working with large datasets

Memory-Efficient Patterns

import cramjam

# Efficient: Pre-allocated buffer pattern
source = bytearray(b"Large data" * 100000)  # bytearray is faster
output = cramjam.Buffer()  # Pre-allocated output
bytes_written = cramjam.zstd.compress_into(source, output)

# Memory view pattern for zero-copy operations  
large_data = bytearray(1024 * 1024)  # 1MB buffer
view = memoryview(large_data)[1000:2000]  # Slice without copying
compressed = cramjam.lz4.compress(view)

# Reference pattern (be careful about data lifetime)
original_data = bytearray(b"Persistent data")
buffer = cramjam.Buffer(original_data, copy=False)  # References original
# Ensure original_data stays alive while buffer is in use

Install with Tessl CLI

npx tessl i tessl/pypi-cramjam

docs

advanced-compression.md

core-utilities.md

index.md

standard-compression.md

tile.json