0
# Advanced Compression
1
2
Advanced compression modules with specialized features beyond standard compress/decompress operations.
3
4
## Imports
5
6
```python { .api }
7
from cramjam import snappy, lz4, xz
8
```
9
10
## Snappy Module
11
12
Fast compression with support for both framed and raw formats.
13
14
### Standard Framed Operations
15
16
```python { .api }
17
def compress(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
18
"""Snappy compression using framed encoding.
19
20
Args:
21
data: Input data to compress
22
output_len: Optional expected output length
23
24
Returns:
25
Buffer: Compressed data with framing
26
"""
27
28
def decompress(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
29
"""Snappy decompression using framed encoding.
30
31
Args:
32
data: Framed compressed data to decompress
33
output_len: Optional expected output length
34
35
Returns:
36
Buffer: Decompressed data
37
"""
38
```
39
40
### Raw Format Operations
41
42
```python { .api }
43
def compress_raw(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
44
"""Snappy compression without framed encoding.
45
46
Args:
47
data: Input data to compress
48
output_len: Optional expected output length
49
50
Returns:
51
Buffer: Raw compressed data (no framing headers)
52
"""
53
54
def decompress_raw(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
55
"""Snappy decompression without framed encoding.
56
57
Args:
58
data: Raw compressed data to decompress
59
output_len: Optional expected output length
60
61
Returns:
62
Buffer: Decompressed data
63
"""
64
```
65
66
### Direct Buffer Operations
67
68
```python { .api }
69
# Framed format
70
def compress_into(input: BufferProtocol, output: BufferProtocol) -> int:
71
"""Compress into output buffer using framed format."""
72
73
def decompress_into(input: BufferProtocol, output: BufferProtocol) -> int:
74
"""Decompress from framed format into output buffer."""
75
76
# Raw format
77
def compress_raw_into(input: BufferProtocol, output: BufferProtocol) -> int:
78
"""Compress into output buffer using raw format."""
79
80
def decompress_raw_into(input: BufferProtocol, output: BufferProtocol) -> int:
81
"""Decompress from raw format into output buffer."""
82
```
83
84
### Utility Functions
85
86
```python { .api }
87
def compress_raw_max_len(data: BufferProtocol) -> int:
88
"""Get expected max compressed length for snappy raw compression.
89
90
This is the size of buffer that should be passed to compress_raw_into.
91
92
Args:
93
data: Input data to estimate compressed size for
94
95
Returns:
96
int: Maximum possible compressed size
97
"""
98
99
def decompress_raw_len(data: BufferProtocol) -> int:
100
"""Get decompressed length for the given raw compressed data.
101
102
This is the size of buffer that should be passed to decompress_raw_into.
103
104
Args:
105
data: Raw compressed data
106
107
Returns:
108
int: Exact decompressed data size
109
"""
110
```
111
112
### Streaming Classes
113
114
```python { .api }
115
class Compressor:
116
"""Snappy compressor for streaming compression (framed format)."""
117
118
def __init__(self) -> None:
119
"""Initialize streaming compressor."""
120
121
def compress(self, input: bytes) -> int:
122
"""Compress input into the current compressor's stream."""
123
124
def flush(self) -> Buffer:
125
"""Flush and return current compressed stream."""
126
127
def finish(self) -> Buffer:
128
"""Consume compressor state and return final compressed stream."""
129
130
class Decompressor:
131
"""Snappy streaming decompressor."""
132
```
133
134
### Snappy Usage Examples
135
136
```python { .api }
137
import cramjam
138
139
data = b"Snappy compression test" * 1000
140
141
# Framed format (standard, includes headers)
142
framed_compressed = cramjam.snappy.compress(data)
143
framed_decompressed = cramjam.snappy.decompress(framed_compressed)
144
145
# Raw format (no headers, smaller output)
146
raw_compressed = cramjam.snappy.compress_raw(data)
147
raw_decompressed = cramjam.snappy.decompress_raw(raw_compressed)
148
149
# Efficient raw format with pre-calculated sizes
150
max_compressed_size = cramjam.snappy.compress_raw_max_len(data)
151
output_buffer = cramjam.Buffer()
152
output_buffer.set_len(max_compressed_size)
153
actual_size = cramjam.snappy.compress_raw_into(data, output_buffer)
154
155
# Decompress with known size
156
decompressed_size = cramjam.snappy.decompress_raw_len(raw_compressed)
157
decomp_buffer = cramjam.Buffer()
158
decomp_buffer.set_len(decompressed_size)
159
cramjam.snappy.decompress_raw_into(raw_compressed, decomp_buffer)
160
```
161
162
## LZ4 Module
163
164
Ultra-fast compression with block operations and advanced parameters.
165
166
### Standard Frame Operations
167
168
```python { .api }
169
def compress(data: BufferProtocol, level: Optional[int] = None, output_len: Optional[int] = None) -> Buffer:
170
"""LZ4 frame compression.
171
172
Args:
173
data: Input data to compress
174
level: Compression level (optional)
175
output_len: Optional expected output length (currently ignored)
176
177
Note: output_len is ignored; underlying algorithm does not support reading to slice
178
"""
179
180
def decompress(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
181
"""LZ4 frame decompression.
182
183
Args:
184
data: LZ4 frame compressed data
185
output_len: Optional expected output length (currently ignored)
186
187
Note: output_len is ignored; underlying algorithm does not support reading to slice
188
"""
189
190
def compress_into(input: BufferProtocol, output: BufferProtocol, level: Optional[int] = None) -> int:
191
"""Compress into output buffer using LZ4 frame format."""
192
193
def decompress_into(input: BufferProtocol, output: BufferProtocol) -> int:
194
"""Decompress LZ4 frame into output buffer."""
195
```
196
197
### Block Operations
198
199
```python { .api }
200
def compress_block(data: BufferProtocol, output_len: Optional[int] = None, mode: Optional[str] = None,
201
acceleration: Optional[int] = None, compression: Optional[int] = None,
202
store_size: Optional[bool] = None) -> Buffer:
203
"""LZ4 block compression with advanced parameters.
204
205
Args:
206
data: Input data to compress
207
output_len: Optional expected output length
208
mode: Compression mode (optional)
209
acceleration: Acceleration parameter for faster compression (optional)
210
compression: Compression parameter for better ratio (optional)
211
store_size: Whether to store size in header for decompression (optional)
212
213
Returns:
214
Buffer: Compressed block data
215
"""
216
217
def decompress_block(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
218
"""LZ4 block decompression.
219
220
Args:
221
data: Compressed block data
222
output_len: Optional upper bound length of decompressed data.
223
If not provided, assumes store_size=True was used during compression
224
225
Returns:
226
Buffer: Decompressed data
227
"""
228
229
def compress_block_into(data: BufferProtocol, output: BufferProtocol, mode: Optional[str] = None,
230
acceleration: Optional[int] = None, store_size: Optional[bool] = None) -> int:
231
"""LZ4 block compression into pre-allocated buffer.
232
233
Args:
234
data: Input data to compress
235
output: Pre-allocated output buffer
236
mode: Compression mode (optional)
237
acceleration: Acceleration parameter (optional)
238
store_size: Whether to store size in header (optional)
239
240
Returns:
241
int: Number of bytes written
242
"""
243
244
def decompress_block_into(input: BufferProtocol, output: BufferProtocol, output_len: Optional[int] = None) -> int:
245
"""LZ4 block decompression into pre-allocated buffer.
246
247
Args:
248
input: Compressed block data
249
output: Pre-allocated output buffer
250
output_len: Optional output length hint
251
252
Returns:
253
int: Number of bytes written
254
"""
255
```
256
257
### Utility Functions
258
259
```python { .api }
260
def compress_block_bound(src: BufferProtocol) -> int:
261
"""Determine guaranteed buffer size for block compression.
262
263
Args:
264
src: Source data to compress
265
266
Returns:
267
int: Buffer size guaranteed to hold compression result
268
269
Raises:
270
Error: If data is too long to be compressed by LZ4
271
"""
272
```
273
274
### Enhanced Streaming Classes
275
276
```python { .api }
277
class Compressor:
278
"""LZ4 streaming compressor with advanced options."""
279
280
def __init__(self, level: Optional[int] = None, content_checksum: Optional[bool] = None,
281
block_linked: Optional[bool] = None) -> None:
282
"""Initialize LZ4 compressor.
283
284
Args:
285
level: Compression level (optional)
286
content_checksum: Enable content checksum (optional)
287
block_linked: Enable block linking for better compression (optional)
288
"""
289
290
def compress(self, input: bytes) -> int:
291
"""Add data to compression stream."""
292
293
def flush(self) -> Buffer:
294
"""Flush and return current compressed stream."""
295
296
def finish(self) -> Buffer:
297
"""Finish compression and return final stream."""
298
299
class Decompressor:
300
"""LZ4 streaming decompressor."""
301
302
def __init__(self, *args, **kwargs) -> None:
303
"""Initialize decompressor with flexible arguments."""
304
305
def decompress(self, data: bytes) -> Buffer:
306
"""Decompress data chunk."""
307
```
308
309
### LZ4 Usage Examples
310
311
```python { .api }
312
import cramjam
313
314
data = b"LZ4 ultra-fast compression" * 2000
315
316
# Standard frame compression
317
compressed = cramjam.lz4.compress(data, level=1) # Fast compression
318
decompressed = cramjam.lz4.decompress(compressed)
319
320
# Block compression with size storage
321
block_compressed = cramjam.lz4.compress_block(data, store_size=True)
322
block_decompressed = cramjam.lz4.decompress_block(block_compressed) # No output_len needed
323
324
# Block compression with acceleration
325
fast_compressed = cramjam.lz4.compress_block(data, acceleration=10, store_size=True)
326
327
# Pre-allocated buffer with bound calculation
328
bound_size = cramjam.lz4.compress_block_bound(data)
329
output = cramjam.Buffer()
330
output.set_len(bound_size)
331
actual_size = cramjam.lz4.compress_block_into(data, output, acceleration=5)
332
333
# Advanced streaming with options
334
compressor = cramjam.lz4.Compressor(level=5, content_checksum=True, block_linked=True)
335
compressor.compress(b"First chunk")
336
compressor.compress(b"Second chunk")
337
result = compressor.finish()
338
```
339
340
## XZ/LZMA Module
341
342
High-ratio compression with comprehensive configuration options.
343
344
### Enums and Configuration
345
346
```python { .api }
347
# Compression formats
348
class Format(Enum):
349
AUTO = ... # Auto-detect format
350
XZ = ... # XZ format
351
ALONE = ... # Legacy LZMA alone format
352
RAW = ... # Raw LZMA data
353
354
# Checksum types
355
class Check(Enum):
356
NONE = ... # No checksum
357
Crc32 = ... # CRC32 checksum
358
Crc64 = ... # CRC64 checksum
359
Sha256 = ... # SHA256 checksum
360
361
# Available filters
362
class Filter(Enum):
363
Lzma1 = ... # LZMA1 algorithm
364
Lzma2 = ... # LZMA2 algorithm (default)
365
X86 = ... # x86 BCJ filter
366
PowerPC = ... # PowerPC BCJ filter
367
Ia64 = ... # IA-64 BCJ filter
368
Arm = ... # ARM BCJ filter
369
ArmThumb = ... # ARM-Thumb BCJ filter
370
Sparc = ... # SPARC BCJ filter
371
372
# Match finder algorithms
373
class MatchFinder(Enum):
374
HashChain3 = ... # Hash chain with 3-byte hashing
375
HashChain4 = ... # Hash chain with 4-byte hashing
376
BinaryTree2 = ... # Binary tree with 2-byte hashing
377
BinaryTree3 = ... # Binary tree with 3-byte hashing
378
BinaryTree4 = ... # Binary tree with 4-byte hashing
379
380
# Compression modes
381
class Mode(Enum):
382
Fast = ... # Fast compression mode
383
Normal = ... # Normal compression mode
384
```
385
386
### Configuration Classes
387
388
```python { .api }
389
class Options:
390
"""Configuration options for XZ compression."""
391
392
def __init__(self) -> None:
393
"""Initialize options object."""
394
395
def set_preset(self, preset: int) -> Options:
396
"""Set compression preset (0-9).
397
398
Returns: Self for method chaining
399
"""
400
401
def set_dict_size(self, dict_size: int) -> Options:
402
"""Set dictionary size in bytes."""
403
404
def set_lc(self, lc: int) -> Options:
405
"""Set literal context bits (0-4)."""
406
407
def set_lp(self, lp: int) -> Options:
408
"""Set literal position bits (0-4)."""
409
410
def set_pb(self, pb: int) -> Options:
411
"""Set position bits (0-4)."""
412
413
def set_mode(self, mode: Mode) -> Options:
414
"""Set compression mode."""
415
416
def set_nice_len(self, nice_len: int) -> Options:
417
"""Set nice length parameter (3-273)."""
418
419
def set_mf(self, mf: MatchFinder) -> Options:
420
"""Set match finder algorithm."""
421
422
def set_depth(self, depth: int) -> Options:
423
"""Set search depth (0-1000)."""
424
425
class FilterChainItem:
426
"""Individual filter in compression chain."""
427
428
def __init__(self, filter: Filter, options: Optional[Options] = None) -> None:
429
"""Initialize filter chain item.
430
431
Args:
432
filter: Filter type to use
433
options: Optional configuration for this filter
434
"""
435
436
class FilterChain:
437
"""Chain of filters for advanced compression pipeline."""
438
439
def __init__(self) -> None:
440
"""Initialize empty filter chain."""
441
442
def append_filter(self, filter_chain_item: FilterChainItem) -> None:
443
"""Add filter to the chain.
444
445
Args:
446
filter_chain_item: Configured filter to append
447
"""
448
```
449
450
### Compression Functions
451
452
```python { .api }
453
def compress(data: BufferProtocol, preset: Optional[int] = None, format: Optional[Format] = None,
454
check: Optional[Check] = None, filters: Optional[FilterChain] = None,
455
options: Optional[Options] = None, output_len: Optional[int] = None) -> Buffer:
456
"""LZMA compression with comprehensive options.
457
458
Args:
459
data: Input data to compress
460
preset: Compression preset (0-9, default uses library default)
461
format: Compression format (default: XZ)
462
check: Checksum type (default: Crc64 for XZ format)
463
filters: Custom filter chain (optional)
464
options: Fine-grained compression options (optional)
465
output_len: Optional expected output length
466
467
Returns:
468
Buffer: Compressed data
469
"""
470
471
def decompress(data: BufferProtocol, output_len: Optional[int] = None) -> Buffer:
472
"""LZMA decompression (auto-detects format).
473
474
Args:
475
data: Compressed data to decompress
476
output_len: Optional expected output length
477
478
Returns:
479
Buffer: Decompressed data
480
"""
481
482
def compress_into(input: BufferProtocol, output: BufferProtocol, preset: Optional[int] = None,
483
format: Optional[Format] = None, check: Optional[Check] = None,
484
filters: Optional[FilterChain] = None, options: Optional[Options] = None) -> int:
485
"""LZMA compression directly into output buffer."""
486
487
def decompress_into(data: BufferProtocol, output: BufferProtocol) -> int:
488
"""LZMA decompression directly into output buffer."""
489
```
490
491
### XZ Usage Examples
492
493
```python { .api }
494
import cramjam
495
496
data = b"XZ compression with advanced options" * 1000
497
498
# Simple compression with preset
499
compressed = cramjam.xz.compress(data, preset=6)
500
decompressed = cramjam.xz.decompress(compressed)
501
502
# Custom format and checksum
503
compressed_custom = cramjam.xz.compress(
504
data,
505
format=cramjam.xz.Format.XZ,
506
check=cramjam.xz.Check.Sha256
507
)
508
509
# Advanced options configuration
510
options = (cramjam.xz.Options()
511
.set_preset(5)
512
.set_dict_size(1024 * 1024) # 1MB dictionary
513
.set_mode(cramjam.xz.Mode.Normal)
514
.set_mf(cramjam.xz.MatchFinder.BinaryTree4)
515
.set_depth(100))
516
517
compressed_advanced = cramjam.xz.compress(data, options=options)
518
519
# Custom filter chain with BCJ filter for x86 binaries
520
filter_chain = cramjam.xz.FilterChain()
521
filter_chain.append_filter(cramjam.xz.FilterChainItem(cramjam.xz.Filter.X86))
522
filter_chain.append_filter(cramjam.xz.FilterChainItem(cramjam.xz.Filter.Lzma2, options))
523
524
compressed_bcj = cramjam.xz.compress(
525
data,
526
filters=filter_chain,
527
format=cramjam.xz.Format.XZ,
528
check=cramjam.xz.Check.Crc64
529
)
530
531
# Legacy LZMA alone format
532
compressed_alone = cramjam.xz.compress(
533
data,
534
format=cramjam.xz.Format.ALONE,
535
preset=9 # Maximum compression
536
)
537
```
538
539
## Advanced Patterns and Best Practices
540
541
### Algorithm Selection Criteria
542
543
```python { .api }
544
# Choose algorithm based on requirements
545
import cramjam
546
547
def compress_data(data, priority='balanced'):
548
"""Compress data based on priority."""
549
550
if priority == 'speed':
551
# Ultra-fast compression
552
return cramjam.lz4.compress(data, level=1)
553
554
elif priority == 'size':
555
# Maximum compression ratio
556
return cramjam.xz.compress(data, preset=9)
557
558
elif priority == 'balanced':
559
# Good speed/size balance
560
return cramjam.zstd.compress(data, level=6)
561
562
elif priority == 'compatibility':
563
# Maximum compatibility
564
return cramjam.gzip.compress(data, level=6)
565
```
566
567
### Memory-Efficient Processing
568
569
```python { .api }
570
import cramjam
571
572
def compress_large_file(input_path, output_path, algorithm='zstd'):
573
"""Compress large file with memory efficiency."""
574
575
# Use streaming for large files
576
if algorithm == 'lz4':
577
compressor = cramjam.lz4.Compressor(
578
level=5,
579
content_checksum=True,
580
block_linked=True
581
)
582
elif algorithm == 'zstd':
583
compressor = cramjam.zstd.Compressor(level=6)
584
else:
585
compressor = cramjam.gzip.Compressor(level=6)
586
587
with open(input_path, 'rb') as infile, open(output_path, 'wb') as outfile:
588
while chunk := infile.read(1024 * 1024): # 1MB chunks
589
compressor.compress(chunk)
590
# Write intermediate results to avoid memory buildup
591
compressed_chunk = compressor.flush()
592
if compressed_chunk:
593
outfile.write(bytes(compressed_chunk))
594
595
# Write final data
596
final_data = compressor.finish()
597
outfile.write(bytes(final_data))
598
```
599
600
### Format-Specific Optimizations
601
602
```python { .api }
603
import cramjam
604
605
# Snappy: Raw format for minimal overhead
606
def fast_compress_raw(data):
607
"""Ultra-fast compression with minimal headers."""
608
return cramjam.snappy.compress_raw(data)
609
610
# LZ4: Block compression with acceleration
611
def compress_with_speed(data, speed_factor=10):
612
"""LZ4 compression optimized for speed."""
613
return cramjam.lz4.compress_block(
614
data,
615
acceleration=speed_factor,
616
store_size=True
617
)
618
619
# XZ: Optimized for executable files
620
def compress_executable(binary_data):
621
"""XZ compression optimized for x86 executables."""
622
options = cramjam.xz.Options().set_preset(6).set_dict_size(2**20)
623
624
filter_chain = cramjam.xz.FilterChain()
625
filter_chain.append_filter(cramjam.xz.FilterChainItem(cramjam.xz.Filter.X86))
626
filter_chain.append_filter(cramjam.xz.FilterChainItem(cramjam.xz.Filter.Lzma2, options))
627
628
return cramjam.xz.compress(
629
binary_data,
630
filters=filter_chain,
631
check=cramjam.xz.Check.Sha256
632
)
633
```