Extremely thin and easy-to-install Python bindings to de/compression algorithms in Rust
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Core classes and utilities for buffer management and error handling in cramjam.
from cramjam import Buffer, File, CompressionError, DecompressionError, BufferProtocolclass Buffer:
"""Buffer class implementing both readable and writable buffer protocols."""
def __init__(self, data: BufferProtocol | None = None, copy: bool | None = True) -> None:
"""Initialize buffer.
Args:
data: Anything implementing the buffer protocol (optional)
copy: Whether to make a copy of the provided data (default: True)
"""The Buffer class provides a memory buffer with file-like interface for efficient data operations.
import cramjam
# Create empty buffer
buffer = cramjam.Buffer()
# Create buffer from data
buffer = cramjam.Buffer(b"Hello World")
# Create buffer without copying (references original data)
data = bytearray(b"Original data")
buffer = cramjam.Buffer(data, copy=False)def read(self, n_bytes: int | None = -1) -> bytes:
"""Read from buffer at current position.
Args:
n_bytes: Number of bytes to read, -1 for all remaining
Returns:
bytes: Data read from buffer
"""
def readinto(self, output: BufferProtocol) -> int:
"""Read from buffer into another buffer object.
Args:
output: Buffer protocol object to read data into
Returns:
int: Number of bytes read
"""def write(self, input: BufferProtocol) -> int:
"""Write bytes to the buffer.
Args:
input: Data implementing Buffer Protocol to write
Returns:
int: Number of bytes written
"""def seek(self, position: int, whence: int | None = 0) -> int:
"""Seek to position within the buffer.
Args:
position: Target position
whence: 0 (from start), 1 (from current), 2 (from end)
Returns:
int: New position
"""
def tell(self) -> int:
"""Get current position of the buffer."""
def seekable(self) -> bool:
"""Check if buffer is seekable (always True for compatibility)."""def len(self) -> int:
"""Get length of the underlying buffer."""
def set_len(self, size: int) -> None:
"""Set buffer length. Truncates if smaller, null-fills if larger."""
def truncate(self) -> None:
"""Truncate the buffer."""
# Magic methods for convenience
def __len__(self) -> int:
"""Get buffer length."""
def __bool__(self) -> bool:
"""Check if buffer has content."""def get_view_reference(self) -> None | Any:
"""Get PyObject this Buffer references as view.
Returns:
None if Buffer owns its data, PyObject reference otherwise
"""
def get_view_reference_count(self) -> None | int:
"""Get reference count of PyObject this Buffer references.
Returns:
None if Buffer owns its data, reference count otherwise
"""import cramjam
# Create and manipulate buffer
buffer = cramjam.Buffer()
buffer.write(b"Hello ")
buffer.write(b"World!")
buffer.seek(0)
data = buffer.read() # b"Hello World!"
# Use as compression target
source = b"Data to compress" * 1000
output_buffer = cramjam.Buffer()
cramjam.gzip.compress_into(source, output_buffer)
# Read compressed data
output_buffer.seek(0)
compressed_data = output_buffer.read()class File:
"""File-like object owned on Rust side."""
def __init__(self, path: str, read: bool | None = None, write: bool | None = None,
truncate: bool | None = None, append: bool | None = None) -> None:
"""Open file with specified modes.
Args:
path: File path string
read: Enable read mode (optional)
write: Enable write mode (optional)
truncate: Enable truncate mode (optional)
append: Enable append mode (optional)
"""The File class provides the same interface as Buffer but operates on actual files:
# Read operations
def read(self, n_bytes: int | None = None) -> bytes:
"""Read from file at current position."""
def readinto(self, output: BufferProtocol) -> int:
"""Read from file into buffer object."""
# Write operations
def write(self, input: BufferProtocol) -> int:
"""Write bytes to file."""
# Position operations
def seek(self, position: int, whence: int | None = 0) -> int:
"""Seek to position within file."""
def tell(self) -> int:
"""Get current file position."""
def seekable(self) -> bool:
"""Check if file is seekable (always True)."""
# Size operations
def len(self) -> int:
"""Get file length in bytes."""
def set_len(self, size: int) -> None:
"""Set file length. Truncates if smaller, null-fills if larger."""
def truncate(self) -> None:
"""Truncate the file."""import cramjam
# Open file for reading and writing
file_obj = cramjam.File("data.bin", read=True, write=True)
# Write compressed data directly to file
source_data = b"Large dataset" * 10000
cramjam.zstd.compress_into(source_data, file_obj)
# Read back and decompress
file_obj.seek(0)
compressed_data = file_obj.read()
decompressed = cramjam.zstd.decompress(compressed_data)
# Append mode for logs
log_file = cramjam.File("compressed.log", write=True, append=True)
log_entry = b"Log entry data"
compressed_entry = cramjam.gzip.compress(log_entry)
log_file.write(compressed_entry)class CompressionError(Exception):
"""Cramjam-specific exception for failed compression operations."""Raised when compression operations fail due to:
import cramjam
try:
# Attempt compression
result = cramjam.brotli.compress(invalid_data)
except cramjam.CompressionError as e:
print(f"Compression failed: {e}")class DecompressionError(Exception):
"""Cramjam-specific exception for failed decompression operations."""Raised when decompression operations fail due to:
import cramjam
try:
# Attempt decompression
result = cramjam.gzip.decompress(corrupted_data)
except cramjam.DecompressionError as e:
print(f"Decompression failed: {e}")BufferProtocol = Any # Type alias for buffer protocol objectsType alias representing objects that implement the Python buffer protocol:
bytes - Immutable byte stringsbytearray - Mutable byte arraysmemoryview - Memory view objects__buffer__ methodAll cramjam functions accept BufferProtocol objects as input, providing flexibility in data handling while maintaining performance through the buffer protocol's zero-copy semantics where possible.
*_into functions to avoid repeated allocationsimport cramjam
# Efficient: Pre-allocated buffer pattern
source = bytearray(b"Large data" * 100000) # bytearray is faster
output = cramjam.Buffer() # Pre-allocated output
bytes_written = cramjam.zstd.compress_into(source, output)
# Memory view pattern for zero-copy operations
large_data = bytearray(1024 * 1024) # 1MB buffer
view = memoryview(large_data)[1000:2000] # Slice without copying
compressed = cramjam.lz4.compress(view)
# Reference pattern (be careful about data lifetime)
original_data = bytearray(b"Persistent data")
buffer = cramjam.Buffer(original_data, copy=False) # References original
# Ensure original_data stays alive while buffer is in useInstall with Tessl CLI
npx tessl i tessl/pypi-cramjam