Zstandard bindings for Python providing high-performance compression and decompression operations
Advanced buffer management for zero-copy operations, efficient batch processing, and high-performance data handling in compression and decompression workflows.
Individual buffer segments that provide efficient access to portions of larger buffers without copying data.
class BufferSegment:
@property
def offset(self) -> int:
"""Offset of this segment within the parent buffer."""
def __len__(self) -> int:
"""Get segment length in bytes."""
def tobytes(self) -> bytes:
"""
Convert segment to bytes.
Returns:
bytes: Copy of segment data
"""Usage Example:
import zstandard as zstd
# Buffer segments are typically returned by compression operations
compressor = zstd.ZstdCompressor()
result = compressor.multi_compress_to_buffer([b"data1", b"data2", b"data3"])
# Access individual segments
for i, segment in enumerate(result):
print(f"Segment {i}: offset={segment.offset}, length={len(segment)}")
data = segment.tobytes()
process_data(data)Collections of buffer segments that provide efficient iteration and access patterns.
class BufferSegments:
def __len__(self) -> int:
"""Get number of segments in collection."""
def __getitem__(self, i: int) -> BufferSegment:
"""
Get segment by index.
Parameters:
- i: int, segment index
Returns:
BufferSegment: Segment at index
"""Usage Example:
import zstandard as zstd
# BufferSegments collections are returned by some operations
compressor = zstd.ZstdCompressor()
result = compressor.multi_compress_to_buffer([b"data1", b"data2"])
# Iterate over segments
for segment in result:
data = segment.tobytes()
print(f"Segment data: {len(data)} bytes")
# Access by index
first_segment = result[0]
second_segment = result[1]Buffers that contain multiple segments, providing both the raw data and segment boundary information.
class BufferWithSegments:
@property
def size(self) -> int:
"""Total buffer size in bytes."""
def __init__(self, data: bytes, segments: bytes):
"""
Create buffer with segment information.
Parameters:
- data: bytes, raw buffer data
- segments: bytes, segment boundary information
"""
def __len__(self) -> int:
"""Get number of segments."""
def __getitem__(self, i: int) -> BufferSegment:
"""
Get segment by index.
Parameters:
- i: int, segment index
Returns:
BufferSegment: Segment at index
"""
def segments(self):
"""Get segments iterator."""
def tobytes(self) -> bytes:
"""
Convert entire buffer to bytes.
Returns:
bytes: Complete buffer data
"""Usage Example:
import zstandard as zstd
# Create buffer with segments manually (advanced usage)
data = b"concatenated data from multiple sources"
# segments contains boundary information (format is internal)
segments = b"..." # segment boundary data
buffer = zstd.BufferWithSegments(data, segments)
print(f"Buffer size: {buffer.size} bytes")
print(f"Number of segments: {len(buffer)}")
# Access segments
for i in range(len(buffer)):
segment = buffer[i]
segment_data = segment.tobytes()
print(f"Segment {i}: {len(segment_data)} bytes")
# Get all data
all_data = buffer.tobytes()Collections of multiple buffers with segments, used for batch operations and efficient data management.
class BufferWithSegmentsCollection:
def __init__(self, *args):
"""
Create collection of buffers with segments.
Parameters:
- *args: BufferWithSegments objects
"""
def __len__(self) -> int:
"""Get number of buffers in collection."""
def __getitem__(self, i: int) -> BufferSegment:
"""
Get segment by global index across all buffers.
Parameters:
- i: int, global segment index
Returns:
BufferSegment: Segment at index
"""
def size(self) -> int:
"""
Get total size of all buffers.
Returns:
int: Total size in bytes
"""Usage Example:
import zstandard as zstd
# Collections are typically returned by multi-threaded operations
compressor = zstd.ZstdCompressor()
data_items = [b"item1", b"item2", b"item3", b"item4"]
# Multi-compress returns a collection
collection = compressor.multi_compress_to_buffer(data_items, threads=2)
print(f"Collection size: {collection.size()} bytes")
print(f"Number of items: {len(collection)}")
# Access compressed items
for i in range(len(collection)):
segment = collection[i]
compressed_data = segment.tobytes()
print(f"Item {i}: {len(compressed_data)} bytes compressed")Efficient batch compression that returns results in buffer collections for optimal memory usage.
class ZstdCompressor:
def multi_compress_to_buffer(
self,
data,
threads: int = 0
) -> BufferWithSegmentsCollection:
"""
Compress multiple data items to buffer collection.
Parameters:
- data: list[bytes], BufferWithSegments, or BufferWithSegmentsCollection
- threads: int, number of threads (0 = auto)
Returns:
BufferWithSegmentsCollection: Compressed data in buffer collection
"""Usage Example:
import zstandard as zstd
compressor = zstd.ZstdCompressor(level=5)
# Prepare data for batch compression
documents = [
b'{"id": 1, "text": "First document"}',
b'{"id": 2, "text": "Second document"}',
b'{"id": 3, "text": "Third document"}',
b'{"id": 4, "text": "Fourth document"}'
]
# Compress in parallel
result = compressor.multi_compress_to_buffer(documents, threads=4)
# Process results efficiently
total_original = sum(len(doc) for doc in documents)
total_compressed = result.size()
print(f"Compressed {total_original} bytes to {total_compressed} bytes")
print(f"Compression ratio: {total_original/total_compressed:.2f}:1")
# Extract individual compressed documents
compressed_docs = []
for i in range(len(result)):
segment = result[i]
compressed_docs.append(segment.tobytes())Efficient batch decompression using buffer collections for high-throughput processing.
class ZstdDecompressor:
def multi_decompress_to_buffer(
self,
frames,
decompressed_sizes: bytes = b"",
threads: int = 0
) -> BufferWithSegmentsCollection:
"""
Decompress multiple frames to buffer collection.
Parameters:
- frames: list[bytes], BufferWithSegments, or BufferWithSegmentsCollection
- decompressed_sizes: bytes, expected decompressed sizes (optional optimization)
- threads: int, number of threads (0 = auto)
Returns:
BufferWithSegmentsCollection: Decompressed data in buffer collection
"""Usage Example:
import zstandard as zstd
decompressor = zstd.ZstdDecompressor()
# Compressed frames from previous example
compressed_frames = compressed_docs
# Decompress in parallel
result = decompressor.multi_decompress_to_buffer(compressed_frames, threads=4)
print(f"Decompressed {len(compressed_frames)} frames")
print(f"Total decompressed size: {result.size()} bytes")
# Extract decompressed data
decompressed_docs = []
for i in range(len(result)):
segment = result[i]
decompressed_docs.append(segment.tobytes())
# Verify round-trip
for i, (original, decompressed) in enumerate(zip(documents, decompressed_docs)):
assert original == decompressed, f"Mismatch in document {i}"Advanced usage patterns that minimize memory copying for maximum performance.
Usage Example:
import zstandard as zstd
def process_large_dataset(data_items):
"""Process large dataset with minimal memory copying."""
compressor = zstd.ZstdCompressor(level=3)
# Compress in batches to manage memory
batch_size = 1000
all_results = []
for i in range(0, len(data_items), batch_size):
batch = data_items[i:i+batch_size]
# Multi-compress returns BufferWithSegmentsCollection
compressed_batch = compressor.multi_compress_to_buffer(batch, threads=4)
# Process segments without copying unless necessary
for j in range(len(compressed_batch)):
segment = compressed_batch[j]
# Only copy if we need to persist the data
if need_to_store(j):
data = segment.tobytes()
store_data(i + j, data)
else:
# Use segment directly for temporary operations
process_segment_in_place(segment)
return all_results
def stream_compress_with_buffers(input_stream, output_stream):
"""Stream compression using buffers for efficiency."""
compressor = zstd.ZstdCompressor()
# Read chunks and compress in batches
chunks = []
chunk_size = 64 * 1024 # 64KB chunks
while True:
chunk = input_stream.read(chunk_size)
if not chunk:
break
chunks.append(chunk)
# Process in batches of 100 chunks
if len(chunks) >= 100:
result = compressor.multi_compress_to_buffer(chunks, threads=2)
# Write compressed data
for i in range(len(result)):
segment = result[i]
output_stream.write(segment.tobytes())
chunks = []
# Process remaining chunks
if chunks:
result = compressor.multi_compress_to_buffer(chunks, threads=2)
for i in range(len(result)):
segment = result[i]
output_stream.write(segment.tobytes())Buffer operations provide efficient memory usage patterns for high-performance applications.
Memory Usage Example:
import zstandard as zstd
def analyze_buffer_memory():
"""Analyze memory usage of buffer operations."""
compressor = zstd.ZstdCompressor()
# Large dataset
data = [b"x" * 1024 for _ in range(1000)] # 1000 x 1KB items
print(f"Original data: {sum(len(item) for item in data)} bytes")
print(f"Compressor memory: {compressor.memory_size()} bytes")
# Compress to buffer collection
result = compressor.multi_compress_to_buffer(data, threads=4)
print(f"Compressed size: {result.size()} bytes")
print(f"Number of segments: {len(result)}")
# Efficient iteration without copying
for i, segment in enumerate(result):
# segment.tobytes() copies data - avoid if possible
size = len(segment) # No copy required
offset = segment.offset # No copy required
if i < 5: # Show first few
print(f"Segment {i}: size={size}, offset={offset}")tobytes() only when you need a copy of the dataInstall with Tessl CLI
npx tessl i tessl/pypi-zstandard