0
# Advanced Compression
1
2
Sophisticated compression capabilities including customizable parameters, streaming interfaces, dictionary support, and multi-threading for high-performance applications.
3
4
## Capabilities
5
6
### ZstdCompressor
7
8
Main compression class providing full control over compression parameters and advanced compression modes.
9
10
```python { .api }
11
class ZstdCompressor:
12
def __init__(
13
self,
14
level: int = 3,
15
dict_data: ZstdCompressionDict = None,
16
compression_params: ZstdCompressionParameters = None,
17
write_checksum: bool = None,
18
write_content_size: bool = None,
19
write_dict_id: bool = None,
20
threads: int = 0
21
):
22
"""
23
Create a compression context.
24
25
Parameters:
26
- level: int, compression level (1-22, default 3)
27
- dict_data: ZstdCompressionDict, compression dictionary
28
- compression_params: ZstdCompressionParameters, detailed parameters
29
- write_checksum: bool, include integrity checksum
30
- write_content_size: bool, write original size in frame header
31
- write_dict_id: bool, write dictionary ID in frame header
32
- threads: int, number of threads for compression (0 = auto)
33
"""
34
35
def memory_size(self) -> int:
36
"""Get memory usage of compression context in bytes."""
37
38
def compress(self, data: bytes) -> bytes:
39
"""
40
Compress data in one operation.
41
42
Parameters:
43
- data: bytes-like object to compress
44
45
Returns:
46
bytes: Compressed data
47
"""
48
49
def compressobj(self, size: int = -1) -> ZstdCompressionObj:
50
"""
51
Create a compression object for streaming operations.
52
53
Parameters:
54
- size: int, hint about total size of data to compress
55
56
Returns:
57
ZstdCompressionObj: Streaming compression object
58
"""
59
60
def chunker(self, size: int = -1, chunk_size: int = -1) -> ZstdCompressionChunker:
61
"""
62
Create a compression chunker for processing data in chunks.
63
64
Parameters:
65
- size: int, hint about total size of data
66
- chunk_size: int, preferred chunk size
67
68
Returns:
69
ZstdCompressionChunker: Chunking compression interface
70
"""
71
```
72
73
**Usage Example:**
74
75
```python
76
import zstandard as zstd
77
78
# Basic compressor
79
compressor = zstd.ZstdCompressor(level=10)
80
compressed = compressor.compress(b"Data to compress")
81
82
# High-performance compressor with threading
83
compressor = zstd.ZstdCompressor(level=3, threads=4)
84
compressed = compressor.compress(large_data)
85
86
# Memory usage monitoring
87
print(f"Compressor memory usage: {compressor.memory_size()} bytes")
88
```
89
90
### Streaming Compression
91
92
Stream-based compression for handling large data without loading everything into memory.
93
94
```python { .api }
95
class ZstdCompressor:
96
def stream_writer(
97
self,
98
writer,
99
size: int = -1,
100
write_size: int = -1,
101
write_return_read: bool = False,
102
closefd: bool = True
103
) -> ZstdCompressionWriter:
104
"""
105
Create a streaming compression writer.
106
107
Parameters:
108
- writer: file-like object to write compressed data to
109
- size: int, hint about total size of data
110
- write_size: int, preferred write size
111
- write_return_read: bool, return read count instead of write count
112
- closefd: bool, whether to close writer when done
113
114
Returns:
115
ZstdCompressionWriter: Streaming compression writer
116
"""
117
118
def stream_reader(
119
self,
120
source,
121
size: int = -1,
122
read_size: int = -1,
123
closefd: bool = True
124
) -> ZstdCompressionReader:
125
"""
126
Create a streaming compression reader.
127
128
Parameters:
129
- source: file-like object or bytes to read from
130
- size: int, hint about total size of data
131
- read_size: int, preferred read size
132
- closefd: bool, whether to close source when done
133
134
Returns:
135
ZstdCompressionReader: Streaming compression reader
136
"""
137
138
def copy_stream(
139
self,
140
ifh,
141
ofh,
142
size: int = -1,
143
read_size: int = -1,
144
write_size: int = -1
145
) -> tuple[int, int]:
146
"""
147
Copy and compress data between streams.
148
149
Parameters:
150
- ifh: input file-like object
151
- ofh: output file-like object
152
- size: int, hint about total size
153
- read_size: int, read buffer size
154
- write_size: int, write buffer size
155
156
Returns:
157
tuple[int, int]: (bytes_read, bytes_written)
158
"""
159
```
160
161
**Usage Examples:**
162
163
```python
164
import zstandard as zstd
165
import io
166
167
compressor = zstd.ZstdCompressor(level=5)
168
169
# Stream writer - compress data as you write
170
output = io.BytesIO()
171
with compressor.stream_writer(output) as writer:
172
writer.write(b"First chunk of data")
173
writer.write(b"Second chunk of data")
174
writer.write(b"Final chunk")
175
176
compressed_data = output.getvalue()
177
178
# Stream reader - compress data as you read
179
data = b"Large amount of data to compress"
180
reader = compressor.stream_reader(io.BytesIO(data))
181
compressed_chunks = []
182
while True:
183
chunk = reader.read(8192)
184
if not chunk:
185
break
186
compressed_chunks.append(chunk)
187
188
# Copy between streams with compression
189
with open('input.txt', 'rb') as input_file, \
190
open('output.zst', 'wb') as output_file:
191
bytes_read, bytes_written = compressor.copy_stream(input_file, output_file)
192
print(f"Read {bytes_read} bytes, wrote {bytes_written} bytes")
193
```
194
195
### Iterative Compression
196
197
Compress data in chunks and yield compressed output incrementally, useful for processing large data streams.
198
199
```python { .api }
200
class ZstdCompressor:
201
def read_to_iter(
202
self,
203
reader,
204
size: int = -1,
205
read_size: int = -1,
206
write_size: int = -1
207
) -> Generator[bytes, None, None]:
208
"""
209
Compress data from reader and yield compressed chunks.
210
211
Parameters:
212
- reader: file-like object or bytes to read from
213
- size: int, hint about total size of data
214
- read_size: int, read buffer size
215
- write_size: int, write buffer size
216
217
Yields:
218
bytes: Compressed data chunks
219
"""
220
```
221
222
**Usage Example:**
223
224
```python
225
import zstandard as zstd
226
import io
227
228
compressor = zstd.ZstdCompressor(level=5)
229
230
# Process large data iteratively
231
large_data = b"Very large data content that needs streaming compression..."
232
reader = io.BytesIO(large_data)
233
234
# Compress and process chunks as they're produced
235
compressed_chunks = []
236
for chunk in compressor.read_to_iter(reader):
237
compressed_chunks.append(chunk)
238
# Process each chunk immediately to save memory
239
process_compressed_chunk(chunk)
240
241
# Combine all chunks if needed
242
final_compressed = b''.join(compressed_chunks)
243
```
244
245
### Multi-Threading Compression
246
247
Parallel compression for improved performance on multi-core systems.
248
249
```python { .api }
250
class ZstdCompressor:
251
def multi_compress_to_buffer(
252
self,
253
data,
254
threads: int = 0
255
) -> BufferWithSegmentsCollection:
256
"""
257
Compress multiple data items in parallel.
258
259
Parameters:
260
- data: BufferWithSegments, BufferWithSegmentsCollection, or list of bytes
261
- threads: int, number of threads (0 = auto, -1 = no threading)
262
263
Returns:
264
BufferWithSegmentsCollection: Collection of compressed segments
265
"""
266
```
267
268
**Usage Example:**
269
270
```python
271
import zstandard as zstd
272
273
compressor = zstd.ZstdCompressor(level=3)
274
275
# Prepare multiple data items
276
data_items = [
277
b"First piece of data to compress",
278
b"Second piece of data to compress",
279
b"Third piece of data to compress"
280
]
281
282
# Compress in parallel
283
result = compressor.multi_compress_to_buffer(data_items, threads=4)
284
285
# Access compressed segments
286
for i in range(len(result)):
287
segment = result[i]
288
print(f"Segment {i}: {len(segment)} bytes")
289
compressed_data = segment.tobytes()
290
```
291
292
### Frame Progression Monitoring
293
294
Monitor compression progress and statistics during multi-threaded operations.
295
296
```python { .api }
297
class ZstdCompressor:
298
def frame_progression(self) -> tuple[int, int, int]:
299
"""
300
Get compression progress information.
301
302
Returns:
303
tuple[int, int, int]: (bytes_read, bytes_written, bytes_flushed)
304
"""
305
```
306
307
**Usage Example:**
308
309
```python
310
import zstandard as zstd
311
312
compressor = zstd.ZstdCompressor(level=5, threads=4)
313
314
# Start compression
315
data = b"Large data to monitor compression progress"
316
compressed = compressor.compress(data)
317
318
# Get progression statistics
319
bytes_read, bytes_written, bytes_flushed = compressor.frame_progression()
320
print(f"Read: {bytes_read}, Written: {bytes_written}, Flushed: {bytes_flushed}")
321
```
322
323
### Compression Parameters
324
325
Fine-grained control over compression behavior through detailed parameter configuration.
326
327
```python { .api }
328
class ZstdCompressionParameters:
329
def __init__(
330
self,
331
format: int = FORMAT_ZSTD1,
332
compression_level: int = 3,
333
window_log: int = 0,
334
hash_log: int = 0,
335
chain_log: int = 0,
336
search_log: int = 0,
337
min_match: int = 0,
338
target_length: int = 0,
339
strategy: int = 0,
340
write_content_size: int = -1,
341
write_checksum: int = -1,
342
write_dict_id: int = -1,
343
job_size: int = 0,
344
overlap_log: int = 0,
345
force_max_window: int = 0,
346
enable_ldm: int = 0,
347
ldm_hash_log: int = 0,
348
ldm_min_match: int = 0,
349
ldm_bucket_size_log: int = 0,
350
ldm_hash_rate_log: int = 0,
351
threads: int = 0
352
):
353
"""
354
Create detailed compression parameters.
355
356
Parameters:
357
- format: int, compression format (FORMAT_ZSTD1, FORMAT_ZSTD1_MAGICLESS)
358
- compression_level: int, compression level (1-22)
359
- window_log: int, window size as power of 2 (10-31)
360
- hash_log: int, hash table size as power of 2 (6-26)
361
- chain_log: int, chain table size as power of 2 (6-28)
362
- search_log: int, search length as power of 2 (1-26)
363
- min_match: int, minimum match length (3-7)
364
- target_length: int, target match length (0-999)
365
- strategy: int, compression strategy (STRATEGY_*)
366
- write_content_size: int, write content size (-1=auto, 0=no, 1=yes)
367
- write_checksum: int, write checksum (-1=auto, 0=no, 1=yes)
368
- write_dict_id: int, write dictionary ID (-1=auto, 0=no, 1=yes)
369
- job_size: int, job size for threading
370
- overlap_log: int, overlap size as power of 2
371
- force_max_window: int, force maximum window size
372
- enable_ldm: int, enable long distance matching
373
- ldm_hash_log: int, LDM hash table size as power of 2
374
- ldm_min_match: int, LDM minimum match length
375
- ldm_bucket_size_log: int, LDM bucket size as power of 2
376
- ldm_hash_rate_log: int, LDM hash rate as power of 2
377
- threads: int, number of threads
378
"""
379
380
@staticmethod
381
def from_level(
382
level: int,
383
source_size: int = 0,
384
dict_size: int = 0,
385
**kwargs
386
) -> ZstdCompressionParameters:
387
"""
388
Create parameters from compression level with optional hints.
389
390
Parameters:
391
- level: int, compression level (1-22)
392
- source_size: int, hint about source data size
393
- dict_size: int, dictionary size if using dictionary
394
- **kwargs: additional parameter overrides
395
396
Returns:
397
ZstdCompressionParameters: Configured parameters
398
"""
399
400
def estimated_compression_context_size(self) -> int:
401
"""Estimate memory usage for these parameters in bytes."""
402
403
class CompressionParameters(ZstdCompressionParameters):
404
"""Compatibility alias for ZstdCompressionParameters."""
405
```
406
407
**Usage Example:**
408
409
```python
410
import zstandard as zstd
411
412
# Create parameters from level with custom tweaks
413
params = zstd.ZstdCompressionParameters.from_level(
414
level=9,
415
source_size=1024*1024, # 1MB hint
416
strategy=zstd.STRATEGY_BTULTRA2,
417
enable_ldm=1
418
)
419
420
# Use custom parameters
421
compressor = zstd.ZstdCompressor(compression_params=params)
422
compressed = compressor.compress(data)
423
424
# Check memory usage
425
memory_usage = params.estimated_compression_context_size()
426
print(f"Estimated memory usage: {memory_usage} bytes")
427
```
428
429
### Streaming Objects
430
431
Low-level streaming compression objects for fine-grained control over compression process.
432
433
```python { .api }
434
class ZstdCompressionObj:
435
def compress(self, data: bytes) -> bytes:
436
"""
437
Compress data chunk.
438
439
Parameters:
440
- data: bytes to compress
441
442
Returns:
443
bytes: Compressed data (may be empty)
444
"""
445
446
def flush(self, flush_mode: int = COMPRESSOBJ_FLUSH_FINISH) -> bytes:
447
"""
448
Flush compression buffer.
449
450
Parameters:
451
- flush_mode: int, flush mode (COMPRESSOBJ_FLUSH_FINISH, COMPRESSOBJ_FLUSH_BLOCK)
452
453
Returns:
454
bytes: Final compressed data
455
"""
456
457
class ZstdCompressionChunker:
458
def compress(self, data: bytes):
459
"""Compress data and yield chunks."""
460
461
def flush(self):
462
"""Flush any remaining data."""
463
464
def finish(self):
465
"""Finish compression and yield final chunks."""
466
```
467
468
**Usage Example:**
469
470
```python
471
import zstandard as zstd
472
473
compressor = zstd.ZstdCompressor(level=5)
474
475
# Streaming object
476
obj = compressor.compressobj()
477
compressed_chunks = []
478
479
# Compress data in chunks
480
compressed_chunks.append(obj.compress(b"First chunk"))
481
compressed_chunks.append(obj.compress(b"Second chunk"))
482
compressed_chunks.append(obj.flush()) # Final data
483
484
# Chunker interface
485
chunker = compressor.chunker()
486
for chunk in chunker.compress(b"Data to compress"):
487
process_compressed_chunk(chunk)
488
489
for chunk in chunker.finish():
490
process_final_chunk(chunk)
491
```