0
# Advanced Decompression
1
2
Sophisticated decompression capabilities including streaming interfaces, frame analysis, batch processing, and dictionary support for high-performance data decompression.
3
4
## Capabilities
5
6
### ZstdDecompressor
7
8
Main decompression class providing full control over decompression parameters and advanced decompression modes.
9
10
```python { .api }
11
class ZstdDecompressor:
12
def __init__(
13
self,
14
dict_data: ZstdCompressionDict = None,
15
max_window_size: int = 0,
16
format: int = FORMAT_ZSTD1
17
):
18
"""
19
Create a decompression context.
20
21
Parameters:
22
- dict_data: ZstdCompressionDict, decompression dictionary
23
- max_window_size: int, maximum window size (0 = unlimited)
24
- format: int, expected format (FORMAT_ZSTD1, FORMAT_ZSTD1_MAGICLESS)
25
"""
26
27
def memory_size(self) -> int:
28
"""Get memory usage of decompression context in bytes."""
29
30
def decompress(
31
self,
32
data: bytes,
33
max_output_size: int = 0,
34
read_across_frames: bool = False,
35
allow_extra_data: bool = False
36
) -> bytes:
37
"""
38
Decompress data in one operation.
39
40
Parameters:
41
- data: bytes-like object containing compressed data
42
- max_output_size: int, maximum output size (0 = unlimited)
43
- read_across_frames: bool, read multiple frames
44
- allow_extra_data: bool, allow trailing data after frame
45
46
Returns:
47
bytes: Decompressed data
48
"""
49
50
def decompressobj(
51
self,
52
write_size: int = -1,
53
read_across_frames: bool = False
54
) -> ZstdDecompressionObj:
55
"""
56
Create a decompression object for streaming operations.
57
58
Parameters:
59
- write_size: int, preferred write size
60
- read_across_frames: bool, process multiple frames
61
62
Returns:
63
ZstdDecompressionObj: Streaming decompression object
64
"""
65
```
66
67
**Usage Example:**
68
69
```python
70
import zstandard as zstd
71
72
# Basic decompressor
73
decompressor = zstd.ZstdDecompressor()
74
decompressed = decompressor.decompress(compressed_data)
75
76
# Decompressor with safety limits
77
decompressor = zstd.ZstdDecompressor(max_window_size=1<<20) # 1MB window limit
78
decompressed = decompressor.decompress(
79
compressed_data,
80
max_output_size=10*1024*1024 # 10MB output limit
81
)
82
83
# Memory usage monitoring
84
print(f"Decompressor memory usage: {decompressor.memory_size()} bytes")
85
```
86
87
### Streaming Decompression
88
89
Stream-based decompression for handling large compressed data without loading everything into memory.
90
91
```python { .api }
92
class ZstdDecompressor:
93
def stream_reader(
94
self,
95
source,
96
read_size: int = -1,
97
read_across_frames: bool = False,
98
closefd: bool = False
99
) -> ZstdDecompressionReader:
100
"""
101
Create a streaming decompression reader.
102
103
Parameters:
104
- source: file-like object or bytes to read from
105
- read_size: int, preferred read size
106
- read_across_frames: bool, read multiple frames
107
- closefd: bool, whether to close source when done
108
109
Returns:
110
ZstdDecompressionReader: Streaming decompression reader
111
"""
112
113
def stream_writer(
114
self,
115
writer,
116
write_size: int = -1,
117
write_return_read: bool = False,
118
closefd: bool = True
119
) -> ZstdDecompressionWriter:
120
"""
121
Create a streaming decompression writer.
122
123
Parameters:
124
- writer: file-like object to write decompressed data to
125
- write_size: int, preferred write size
126
- write_return_read: bool, return read count instead of write count
127
- closefd: bool, whether to close writer when done
128
129
Returns:
130
ZstdDecompressionWriter: Streaming decompression writer
131
"""
132
133
def copy_stream(
134
self,
135
ifh,
136
ofh,
137
read_size: int = -1,
138
write_size: int = -1
139
) -> tuple[int, int]:
140
"""
141
Copy and decompress data between streams.
142
143
Parameters:
144
- ifh: input file-like object with compressed data
145
- ofh: output file-like object for decompressed data
146
- read_size: int, read buffer size
147
- write_size: int, write buffer size
148
149
Returns:
150
tuple[int, int]: (bytes_read, bytes_written)
151
"""
152
153
def read_to_iter(
154
self,
155
reader,
156
read_size: int = -1,
157
write_size: int = -1,
158
skip_bytes: int = 0
159
):
160
"""
161
Create iterator that yields decompressed chunks.
162
163
Parameters:
164
- reader: file-like object or bytes to read from
165
- read_size: int, read buffer size
166
- write_size: int, output chunk size
167
- skip_bytes: int, bytes to skip at start
168
169
Yields:
170
bytes: Decompressed data chunks
171
"""
172
```
173
174
**Usage Examples:**
175
176
```python
177
import zstandard as zstd
178
import io
179
180
decompressor = zstd.ZstdDecompressor()
181
182
# Stream reader - decompress data as you read
183
compressed_data = b"..." # compressed data
184
reader = decompressor.stream_reader(io.BytesIO(compressed_data))
185
decompressed_chunks = []
186
while True:
187
chunk = reader.read(8192)
188
if not chunk:
189
break
190
decompressed_chunks.append(chunk)
191
192
# Stream writer - decompress data as you write
193
output = io.BytesIO()
194
with decompressor.stream_writer(output) as writer:
195
writer.write(compressed_chunk1)
196
writer.write(compressed_chunk2)
197
198
decompressed_data = output.getvalue()
199
200
# Copy between streams with decompression
201
with open('compressed.zst', 'rb') as input_file, \
202
open('decompressed.txt', 'wb') as output_file:
203
bytes_read, bytes_written = decompressor.copy_stream(input_file, output_file)
204
print(f"Read {bytes_read} bytes, wrote {bytes_written} bytes")
205
206
# Iterator interface
207
for chunk in decompressor.read_to_iter(io.BytesIO(compressed_data)):
208
process_decompressed_chunk(chunk)
209
```
210
211
### Multi-Threading Decompression
212
213
Parallel decompression for improved performance when processing multiple compressed items.
214
215
```python { .api }
216
class ZstdDecompressor:
217
def multi_decompress_to_buffer(
218
self,
219
frames,
220
decompressed_sizes: bytes = b"",
221
threads: int = 0
222
) -> BufferWithSegmentsCollection:
223
"""
224
Decompress multiple frames in parallel.
225
226
Parameters:
227
- frames: BufferWithSegments, BufferWithSegmentsCollection, or list of bytes
228
- decompressed_sizes: bytes, expected sizes of decompressed data
229
- threads: int, number of threads (0 = auto, -1 = no threading)
230
231
Returns:
232
BufferWithSegmentsCollection: Collection of decompressed segments
233
"""
234
```
235
236
**Usage Example:**
237
238
```python
239
import zstandard as zstd
240
241
decompressor = zstd.ZstdDecompressor()
242
243
# Prepare multiple compressed frames
244
compressed_frames = [
245
compressed_data1,
246
compressed_data2,
247
compressed_data3
248
]
249
250
# Decompress in parallel
251
result = decompressor.multi_decompress_to_buffer(compressed_frames, threads=4)
252
253
# Access decompressed segments
254
for i in range(len(result)):
255
segment = result[i]
256
print(f"Segment {i}: {len(segment)} bytes")
257
decompressed_data = segment.tobytes()
258
```
259
260
### Dictionary Chain Decompression
261
262
Special decompression mode for processing dictionary-compressed frame chains.
263
264
```python { .api }
265
class ZstdDecompressor:
266
def decompress_content_dict_chain(self, frames: list[bytes]) -> bytes:
267
"""
268
Decompress a chain of frames where each frame uses the previous as dictionary.
269
270
Parameters:
271
- frames: list of bytes, frames in dependency order
272
273
Returns:
274
bytes: Final decompressed data
275
"""
276
```
277
278
**Usage Example:**
279
280
```python
281
import zstandard as zstd
282
283
decompressor = zstd.ZstdDecompressor()
284
285
# Frames where each uses previous as dictionary
286
frame_chain = [
287
base_frame,
288
dependent_frame1,
289
dependent_frame2
290
]
291
292
# Decompress the chain
293
final_data = decompressor.decompress_content_dict_chain(frame_chain)
294
```
295
296
### Streaming Objects
297
298
Low-level streaming decompression objects for fine-grained control over the decompression process.
299
300
```python { .api }
301
class ZstdDecompressionObj:
302
def decompress(self, data: bytes) -> bytes:
303
"""
304
Decompress data chunk.
305
306
Parameters:
307
- data: bytes to decompress
308
309
Returns:
310
bytes: Decompressed data (may be empty)
311
"""
312
313
def flush(self, length: int = -1) -> bytes:
314
"""
315
Flush decompression buffer.
316
317
Parameters:
318
- length: int, maximum bytes to return
319
320
Returns:
321
bytes: Remaining decompressed data
322
"""
323
324
@property
325
def unused_data(self) -> bytes:
326
"""Unused input data after frame end."""
327
328
@property
329
def unconsumed_tail(self) -> bytes:
330
"""Input data not yet processed."""
331
332
@property
333
def eof(self) -> bool:
334
"""Whether end of frame has been reached."""
335
```
336
337
**Usage Example:**
338
339
```python
340
import zstandard as zstd
341
342
decompressor = zstd.ZstdDecompressor()
343
obj = decompressor.decompressobj()
344
345
decompressed_chunks = []
346
compressed_data = b"..." # compressed data
347
348
# Process data in chunks
349
chunk_size = 8192
350
for i in range(0, len(compressed_data), chunk_size):
351
chunk = compressed_data[i:i+chunk_size]
352
decompressed = obj.decompress(chunk)
353
if decompressed:
354
decompressed_chunks.append(decompressed)
355
356
# Check if frame is complete
357
if obj.eof:
358
break
359
360
# Get any remaining data
361
remaining = obj.flush()
362
if remaining:
363
decompressed_chunks.append(remaining)
364
365
# Check for unused data
366
if obj.unused_data:
367
print(f"Unused data: {len(obj.unused_data)} bytes")
368
```
369
370
### File-Like Interfaces
371
372
Stream readers and writers that provide full file-like interfaces for decompression.
373
374
```python { .api }
375
class ZstdDecompressionReader:
376
def read(self, size: int = -1) -> bytes:
377
"""Read decompressed data."""
378
379
def read1(self, size: int = -1) -> bytes:
380
"""Read at most one buffer worth of data."""
381
382
def readinto(self, b) -> int:
383
"""Read data into pre-allocated buffer."""
384
385
def readinto1(self, b) -> int:
386
"""Read data into buffer, at most one read operation."""
387
388
def readline(self, size: int = -1) -> bytes:
389
"""Read line from decompressed data."""
390
391
def readlines(self, hint: int = -1) -> list[bytes]:
392
"""Read lines from decompressed data."""
393
394
def seek(self, pos: int, whence: int = 0) -> int:
395
"""Seek within decompressed data (limited support)."""
396
397
def tell(self) -> int:
398
"""Get current position."""
399
400
def close(self):
401
"""Close reader and underlying source."""
402
403
class ZstdDecompressionWriter:
404
def write(self, data: bytes) -> int:
405
"""Write compressed data for decompression."""
406
407
def flush(self):
408
"""Flush any buffered data."""
409
410
def close(self):
411
"""Close writer and underlying destination."""
412
```
413
414
**Usage Example:**
415
416
```python
417
import zstandard as zstd
418
419
decompressor = zstd.ZstdDecompressor()
420
421
# Use as file-like reader
422
with open('data.zst', 'rb') as f:
423
reader = decompressor.stream_reader(f)
424
425
# Read line by line
426
for line in reader:
427
process_line(line)
428
429
# Random access (if supported)
430
reader.seek(0)
431
first_chunk = reader.read(1024)
432
433
# Use as file-like writer
434
with open('output.txt', 'wb') as f:
435
writer = decompressor.stream_writer(f)
436
437
# Write compressed data for decompression
438
writer.write(compressed_chunk1)
439
writer.write(compressed_chunk2)
440
writer.flush()
441
```