0
# I/O System and Callbacks
1
2
py7zr provides a flexible I/O abstraction layer and callback system that enables custom extraction destinations, progress monitoring, and pluggable storage backends. The I/O system supports file-based, memory-based, and custom implementations, while callbacks provide real-time progress reporting during archive operations.
3
4
## Capabilities
5
6
### I/O Abstraction Layer
7
8
Base abstract class defining the I/O interface for archive operations.
9
10
```python { .api }
11
class Py7zIO:
12
"""
13
Abstract base class for py7zr I/O operations.
14
15
Provides interface for reading, writing, and seeking operations
16
used during archive extraction and creation.
17
"""
18
19
def write(self, s):
20
"""
21
Write bytes to the I/O stream.
22
23
Parameters:
24
- s: bytes, data to write
25
26
Returns:
27
int: number of bytes written
28
"""
29
30
def read(self, size=None):
31
"""
32
Read bytes from the I/O stream.
33
34
Parameters:
35
- size: int, number of bytes to read (None for all)
36
37
Returns:
38
bytes: data read from stream
39
"""
40
41
def seek(self, offset, whence=0):
42
"""
43
Change stream position.
44
45
Parameters:
46
- offset: int, offset in bytes
47
- whence: int, reference point (0=start, 1=current, 2=end)
48
49
Returns:
50
int: new absolute position
51
"""
52
53
def flush(self):
54
"""
55
Flush any buffered write data.
56
"""
57
58
def size(self):
59
"""
60
Get total size of the stream.
61
62
Returns:
63
int: stream size in bytes
64
"""
65
```
66
67
### Concrete I/O Implementations
68
69
Ready-to-use I/O implementations for common scenarios.
70
71
```python { .api }
72
class HashIO(Py7zIO):
73
"""
74
I/O wrapper that computes hash while writing.
75
76
Useful for verifying file integrity during extraction.
77
"""
78
def __init__(self, filename): ...
79
80
class Py7zBytesIO(Py7zIO):
81
"""
82
Memory-based I/O with size limits.
83
84
Stores data in memory with optional size constraints.
85
"""
86
def __init__(self, filename, limit=None): ...
87
88
class NullIO(Py7zIO):
89
"""
90
Null device I/O that discards all writes.
91
92
Useful for testing or when only checking archive contents.
93
"""
94
def __init__(self): ...
95
96
class MemIO(Py7zIO):
97
"""
98
Memory-based I/O with factory pattern.
99
100
Combines memory storage with factory-based creation.
101
"""
102
def __init__(self, fname, factory): ...
103
104
class Buffer:
105
"""
106
Utility buffer for byte operations.
107
"""
108
def __init__(self, size=16): ...
109
```
110
111
#### Usage Examples
112
113
```python
114
import py7zr
115
from py7zr import HashIO, Py7zBytesIO, NullIO
116
117
# Extract to memory with size limit
118
class MemoryFactory(py7zr.WriterFactory):
119
def create(self, filename):
120
return Py7zBytesIO(filename, limit=1024*1024) # 1MB limit
121
122
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
123
archive.extractall(factory=MemoryFactory())
124
125
# Extract with hash verification
126
class HashFactory(py7zr.WriterFactory):
127
def create(self, filename):
128
return HashIO(filename)
129
130
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
131
archive.extractall(factory=HashFactory())
132
133
# Test extraction without writing files
134
class TestFactory(py7zr.WriterFactory):
135
def create(self, filename):
136
return NullIO()
137
138
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
139
archive.extractall(factory=TestFactory())
140
```
141
142
### Factory Pattern
143
144
Factory classes for creating I/O instances during archive operations.
145
146
```python { .api }
147
class WriterFactory:
148
"""
149
Abstract factory for creating Py7zIO writers.
150
151
Enables custom I/O backend selection during extraction.
152
"""
153
def create(self, filename):
154
"""
155
Create I/O writer for specified filename.
156
157
Parameters:
158
- filename: str, target filename
159
160
Returns:
161
Py7zIO: I/O instance for the file
162
"""
163
164
class HashIOFactory(WriterFactory):
165
"""Factory for creating HashIO instances."""
166
def create(self, filename): ...
167
168
class BytesIOFactory(WriterFactory):
169
"""Factory for creating BytesIO instances with size limits."""
170
def __init__(self, limit=None): ...
171
def create(self, filename): ...
172
173
class NullIOFactory(WriterFactory):
174
"""Factory for creating NullIO instances."""
175
def create(self, filename): ...
176
```
177
178
#### Custom Factory Example
179
180
```python
181
import py7zr
182
from py7zr import WriterFactory, Py7zIO
183
import os
184
185
class CustomFileFactory(WriterFactory):
186
"""Custom factory that creates files with specific permissions."""
187
188
def __init__(self, base_path, permissions=0o644):
189
self.base_path = base_path
190
self.permissions = permissions
191
192
def create(self, filename):
193
full_path = os.path.join(self.base_path, filename)
194
os.makedirs(os.path.dirname(full_path), exist_ok=True)
195
196
class CustomFileIO(Py7zIO):
197
def __init__(self, path, perms):
198
self.path = path
199
self.perms = perms
200
self.file = open(path, 'wb')
201
202
def write(self, data):
203
return self.file.write(data)
204
205
def close(self):
206
self.file.close()
207
os.chmod(self.path, self.perms)
208
209
return CustomFileIO(full_path, self.permissions)
210
211
# Use custom factory
212
factory = CustomFileFactory('/tmp/extracted', permissions=0o755)
213
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
214
archive.extractall(factory=factory)
215
```
216
217
### Callback System
218
219
Progress reporting and event handling during archive operations.
220
221
```python { .api }
222
class Callback:
223
"""
224
Abstract base class for operation callbacks.
225
226
Provides hooks for monitoring and controlling archive operations.
227
"""
228
229
def report_start_preparation(self):
230
"""
231
Called at the start of operation preparation phase.
232
"""
233
234
def report_start(self, processing_file_path, processing_bytes):
235
"""
236
Called when starting to process a file.
237
238
Parameters:
239
- processing_file_path: str, path of file being processed
240
- processing_bytes: int, total bytes to process
241
"""
242
243
def report_update(self, decompressed_bytes):
244
"""
245
Called periodically during processing with progress info.
246
247
Parameters:
248
- decompressed_bytes: int, bytes processed so far
249
"""
250
251
def report_end(self, processing_file_path, wrote_bytes):
252
"""
253
Called when file processing is complete.
254
255
Parameters:
256
- processing_file_path: str, path of processed file
257
- wrote_bytes: int, total bytes written
258
"""
259
260
def report_warning(self, message):
261
"""
262
Called when a warning occurs during processing.
263
264
Parameters:
265
- message: str, warning message
266
"""
267
268
def report_postprocess(self):
269
"""
270
Called during post-processing phase.
271
"""
272
```
273
274
### Concrete Callback Implementations
275
276
Pre-built callback implementations for common use cases.
277
278
```python { .api }
279
class ExtractCallback(Callback):
280
"""
281
Default callback implementation for extraction operations.
282
283
Provides basic progress reporting to stdout.
284
"""
285
286
class ArchiveCallback(Callback):
287
"""
288
Default callback implementation for archive creation operations.
289
290
Provides basic progress reporting to stdout.
291
"""
292
```
293
294
#### Custom Callback Examples
295
296
```python
297
import py7zr
298
from py7zr import Callback
299
300
class ProgressCallback(Callback):
301
"""Custom callback with progress bar."""
302
303
def __init__(self):
304
self.current_file = None
305
self.total_bytes = 0
306
self.processed_bytes = 0
307
308
def report_start_preparation(self):
309
print("Preparing archive operation...")
310
311
def report_start(self, processing_file_path, processing_bytes):
312
self.current_file = processing_file_path
313
self.total_bytes = processing_bytes
314
self.processed_bytes = 0
315
print(f"Processing: {processing_file_path}")
316
317
def report_update(self, decompressed_bytes):
318
self.processed_bytes += decompressed_bytes
319
if self.total_bytes > 0:
320
percent = (self.processed_bytes / self.total_bytes) * 100
321
print(f"Progress: {percent:.1f}% ({self.processed_bytes}/{self.total_bytes} bytes)")
322
323
def report_end(self, processing_file_path, wrote_bytes):
324
print(f"Completed: {processing_file_path} ({wrote_bytes} bytes)")
325
326
def report_warning(self, message):
327
print(f"Warning: {message}")
328
329
# Use custom callback
330
callback = ProgressCallback()
331
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
332
archive.extractall(callback=callback)
333
```
334
335
```python
336
class LoggingCallback(Callback):
337
"""Callback that logs to file."""
338
339
def __init__(self, log_file):
340
self.log_file = log_file
341
342
def report_start(self, processing_file_path, processing_bytes):
343
with open(self.log_file, 'a') as f:
344
f.write(f"START: {processing_file_path} ({processing_bytes} bytes)\\n")
345
346
def report_end(self, processing_file_path, wrote_bytes):
347
with open(self.log_file, 'a') as f:
348
f.write(f"END: {processing_file_path} ({wrote_bytes} bytes)\\n")
349
350
def report_warning(self, message):
351
with open(self.log_file, 'a') as f:
352
f.write(f"WARNING: {message}\\n")
353
354
# Use logging callback
355
callback = LoggingCallback('extraction.log')
356
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
357
archive.extractall(callback=callback)
358
```
359
360
### Advanced I/O Patterns
361
362
Complex usage patterns combining I/O and callbacks.
363
364
#### Streaming Extraction to Network
365
366
```python
367
import py7zr
368
import socket
369
from py7zr import WriterFactory, Py7zIO
370
371
class NetworkIO(Py7zIO):
372
"""Stream extracted files over network."""
373
374
def __init__(self, filename, socket_conn):
375
self.filename = filename
376
self.socket = socket_conn
377
self.bytes_sent = 0
378
379
def write(self, data):
380
# Send filename header first time
381
if self.bytes_sent == 0:
382
header = f"FILE:{self.filename}\\n".encode()
383
self.socket.send(header)
384
385
self.socket.send(data)
386
self.bytes_sent += len(data)
387
return len(data)
388
389
class NetworkFactory(WriterFactory):
390
def __init__(self, socket_conn):
391
self.socket = socket_conn
392
393
def create(self, filename):
394
return NetworkIO(filename, self.socket)
395
396
# Extract over network
397
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
398
s.connect(('remote_host', 8080))
399
factory = NetworkFactory(s)
400
401
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
402
archive.extractall(factory=factory)
403
```
404
405
#### Conditional Extraction with Callbacks
406
407
```python
408
class SelectiveCallback(Callback):
409
"""Callback that can skip files based on criteria."""
410
411
def __init__(self, max_file_size=1024*1024):
412
self.max_file_size = max_file_size
413
self.skip_current = False
414
415
def report_start(self, processing_file_path, processing_bytes):
416
if processing_bytes > self.max_file_size:
417
print(f"Skipping large file: {processing_file_path} ({processing_bytes} bytes)")
418
self.skip_current = True
419
return False # Skip this file
420
else:
421
self.skip_current = False
422
return True # Process this file
423
424
# Note: Actual file skipping requires integration with extraction logic
425
```
426
427
## Integration Examples
428
429
### With Progress Bars (tqdm)
430
431
```python
432
import py7zr
433
from py7zr import Callback
434
from tqdm import tqdm
435
436
class TqdmCallback(Callback):
437
def __init__(self):
438
self.pbar = None
439
440
def report_start(self, processing_file_path, processing_bytes):
441
self.pbar = tqdm(total=processing_bytes,
442
desc=f"Extracting {processing_file_path}",
443
unit='B', unit_scale=True)
444
445
def report_update(self, decompressed_bytes):
446
if self.pbar:
447
self.pbar.update(decompressed_bytes)
448
449
def report_end(self, processing_file_path, wrote_bytes):
450
if self.pbar:
451
self.pbar.close()
452
453
# Extract with progress bar
454
callback = TqdmCallback()
455
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
456
archive.extractall(callback=callback)
457
```
458
459
### With Cloud Storage
460
461
```python
462
import py7zr
463
from py7zr import WriterFactory, Py7zIO
464
import boto3
465
466
class S3IO(Py7zIO):
467
"""Upload extracted files directly to S3."""
468
469
def __init__(self, filename, s3_client, bucket, prefix=""):
470
self.filename = filename
471
self.s3_client = s3_client
472
self.bucket = bucket
473
self.key = f"{prefix}/{filename}" if prefix else filename
474
self.buffer = BytesIO()
475
476
def write(self, data):
477
return self.buffer.write(data)
478
479
def close(self):
480
self.buffer.seek(0)
481
self.s3_client.upload_fileobj(self.buffer, self.bucket, self.key)
482
483
class S3Factory(WriterFactory):
484
def __init__(self, bucket, prefix=""):
485
self.s3_client = boto3.client('s3')
486
self.bucket = bucket
487
self.prefix = prefix
488
489
def create(self, filename):
490
return S3IO(filename, self.s3_client, self.bucket, self.prefix)
491
492
# Extract directly to S3
493
factory = S3Factory('my-bucket', 'extracted-files')
494
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
495
archive.extractall(factory=factory)
496
```