0
# Exception Handling
1
2
py7zr provides comprehensive error handling with specific exception types for different failure modes. The exception hierarchy enables precise error handling for various scenarios including invalid archives, compression errors, missing passwords, and security violations.
3
4
## Capabilities
5
6
### Exception Hierarchy
7
8
py7zr exceptions inherit from standard Python exceptions with specific types for archive-related errors.
9
10
```python { .api }
11
class ArchiveError(Exception):
12
"""
13
Base class for all archive-related errors.
14
15
Parent class for py7zr-specific exceptions.
16
"""
17
18
class Bad7zFile(ArchiveError):
19
"""
20
Raised when archive file is invalid or corrupted.
21
22
Indicates the file is not a valid 7z archive or has structural damage.
23
"""
24
25
class DecompressionError(ArchiveError):
26
"""
27
Raised when decompression operation fails.
28
29
Indicates failure during decompression process, possibly due to
30
corrupted data or unsupported compression method.
31
"""
32
33
class PasswordRequired(Exception):
34
"""
35
Raised when password is needed for encrypted archive.
36
37
Indicates the archive is password-protected and no password
38
was provided or the provided password is incorrect.
39
"""
40
41
class UnsupportedCompressionMethodError(ArchiveError):
42
"""
43
Raised when archive uses unsupported compression method.
44
45
Indicates the archive contains data compressed with a method
46
not supported by the current py7zr installation.
47
"""
48
def __init__(self, data, message):
49
"""
50
Parameters:
51
- data: bytes, problematic data causing the error
52
- message: str, descriptive error message
53
"""
54
55
class CrcError(ArchiveError):
56
"""
57
Raised when CRC verification fails during extraction.
58
59
Indicates data corruption detected through checksum mismatch.
60
"""
61
def __init__(self, expected, actual, filename):
62
"""
63
Parameters:
64
- expected: int, expected CRC value
65
- actual: int, calculated CRC value
66
- filename: str, name of file with CRC error
67
"""
68
69
class InternalError(ArchiveError):
70
"""
71
Raised when internal py7zr error occurs.
72
73
Indicates unexpected internal state or logic error.
74
"""
75
76
class AbsolutePathError(Exception):
77
"""
78
Raised when archive contains absolute paths.
79
80
Security exception raised when archive member paths contain
81
absolute paths that could overwrite system files.
82
"""
83
```
84
85
### Common Error Scenarios
86
87
#### Invalid Archive Files
88
89
```python
90
import py7zr
91
92
try:
93
with py7zr.SevenZipFile('not_an_archive.txt', 'r') as archive:
94
archive.extractall()
95
except py7zr.Bad7zFile as e:
96
print(f"Invalid archive file: {e}")
97
# Handle invalid archive - maybe try different format
98
99
# Pre-check if file is valid 7z archive
100
if py7zr.is_7zfile('suspect_file.7z'):
101
# Safe to process
102
with py7zr.SevenZipFile('suspect_file.7z', 'r') as archive:
103
archive.extractall()
104
else:
105
print("File is not a valid 7z archive")
106
```
107
108
#### Password Protection
109
110
```python
111
import py7zr
112
113
try:
114
with py7zr.SevenZipFile('encrypted.7z', 'r') as archive:
115
archive.extractall()
116
except py7zr.PasswordRequired as e:
117
print("Archive is password protected")
118
password = input("Enter password: ")
119
120
try:
121
with py7zr.SevenZipFile('encrypted.7z', 'r', password=password) as archive:
122
archive.extractall()
123
except py7zr.PasswordRequired:
124
print("Invalid password")
125
126
# Check if password needed before attempting extraction
127
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
128
if archive.needs_password():
129
password = input("Password required: ")
130
# Re-open with password
131
with py7zr.SevenZipFile('archive.7z', 'r', password=password) as archive:
132
archive.extractall()
133
else:
134
archive.extractall()
135
```
136
137
#### Decompression Errors
138
139
```python
140
import py7zr
141
142
try:
143
with py7zr.SevenZipFile('corrupted.7z', 'r') as archive:
144
archive.extractall()
145
except py7zr.DecompressionError as e:
146
print(f"Decompression failed: {e}")
147
# Archive may be partially corrupted
148
# Try extracting individual files
149
try:
150
file_list = archive.getnames()
151
for filename in file_list:
152
try:
153
archive.extract(filename)
154
print(f"Successfully extracted: {filename}")
155
except py7zr.DecompressionError:
156
print(f"Failed to extract: {filename}")
157
except:
158
print("Archive is severely corrupted")
159
```
160
161
#### CRC Verification Failures
162
163
```python
164
import py7zr
165
166
try:
167
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
168
archive.extractall()
169
except py7zr.CrcError as e:
170
print(f"CRC error in file '{e.filename}': expected {e.expected:08x}, got {e.actual:08x}")
171
# File is corrupted, but other files might be OK
172
# Could continue with other files or abort
173
174
# Option 1: Continue with other files
175
file_list = archive.getnames()
176
for filename in file_list:
177
if filename != e.filename:
178
try:
179
archive.extract(filename)
180
except py7zr.CrcError as crc_err:
181
print(f"Another CRC error: {crc_err.filename}")
182
```
183
184
#### Unsupported Compression Methods
185
186
```python
187
import py7zr
188
189
try:
190
with py7zr.SevenZipFile('advanced_archive.7z', 'r') as archive:
191
archive.extractall()
192
except py7zr.UnsupportedCompressionMethodError as e:
193
print(f"Unsupported compression method: {e}")
194
print("This archive requires additional compression libraries")
195
196
# Suggest installing additional dependencies
197
print("Try installing optional dependencies:")
198
print(" pip install py7zr[full]")
199
print(" # or individually:")
200
print(" pip install pyzstd pyppmd brotli")
201
```
202
203
#### Security Violations
204
205
```python
206
import py7zr
207
208
try:
209
with py7zr.SevenZipFile('malicious.7z', 'r') as archive:
210
archive.extractall()
211
except py7zr.AbsolutePathError as e:
212
print(f"Security violation: {e}")
213
print("Archive contains absolute paths that could overwrite system files")
214
215
# Could extract to safe directory instead
216
safe_extraction_path = '/tmp/safe_extraction'
217
try:
218
archive.extractall(path=safe_extraction_path)
219
except py7zr.AbsolutePathError:
220
print("Even safe extraction failed - archive is malicious")
221
```
222
223
## Comprehensive Error Handling Patterns
224
225
### Robust Archive Processing
226
227
```python
228
import py7zr
229
import os
230
import logging
231
232
def robust_extract(archive_path, output_path, password=None):
233
"""
234
Robustly extract 7z archive with comprehensive error handling.
235
236
Parameters:
237
- archive_path: str, path to archive file
238
- output_path: str, extraction destination
239
- password: str, optional password
240
241
Returns:
242
tuple: (success: bool, extracted_files: list, errors: list)
243
"""
244
extracted_files = []
245
errors = []
246
247
# Pre-check if file exists and is valid
248
if not os.path.exists(archive_path):
249
return False, [], [f"Archive file not found: {archive_path}"]
250
251
if not py7zr.is_7zfile(archive_path):
252
return False, [], [f"Invalid 7z archive: {archive_path}"]
253
254
try:
255
with py7zr.SevenZipFile(archive_path, 'r', password=password) as archive:
256
# Check if password needed
257
if archive.needs_password() and not password:
258
return False, [], ["Password required but not provided"]
259
260
# Get file list first
261
try:
262
file_list = archive.getnames()
263
except py7zr.PasswordRequired:
264
return False, [], ["Invalid password"]
265
except Exception as e:
266
return False, [], [f"Cannot read archive contents: {e}"]
267
268
# Extract files individually for better error handling
269
for filename in file_list:
270
try:
271
archive.extract(filename, path=output_path)
272
extracted_files.append(filename)
273
logging.info(f"Extracted: {filename}")
274
except py7zr.CrcError as e:
275
error_msg = f"CRC error in {e.filename}: expected {e.expected:08x}, got {e.actual:08x}"
276
errors.append(error_msg)
277
logging.error(error_msg)
278
except py7zr.DecompressionError as e:
279
error_msg = f"Decompression failed for {filename}: {e}"
280
errors.append(error_msg)
281
logging.error(error_msg)
282
except py7zr.AbsolutePathError as e:
283
error_msg = f"Security violation in {filename}: {e}"
284
errors.append(error_msg)
285
logging.error(error_msg)
286
except Exception as e:
287
error_msg = f"Unexpected error extracting {filename}: {e}"
288
errors.append(error_msg)
289
logging.error(error_msg)
290
291
except py7zr.Bad7zFile as e:
292
return False, extracted_files, [f"Invalid archive: {e}"]
293
except py7zr.PasswordRequired:
294
return False, extracted_files, ["Password required"]
295
except py7zr.UnsupportedCompressionMethodError as e:
296
return False, extracted_files, [f"Unsupported compression: {e}"]
297
except Exception as e:
298
return False, extracted_files, [f"Unexpected error: {e}"]
299
300
success = len(errors) == 0
301
return success, extracted_files, errors
302
303
# Usage
304
success, files, errors = robust_extract('archive.7z', '/tmp/output')
305
if success:
306
print(f"Successfully extracted {len(files)} files")
307
else:
308
print(f"Extraction completed with {len(errors)} errors:")
309
for error in errors:
310
print(f" - {error}")
311
```
312
313
### Validation and Testing
314
315
```python
316
import py7zr
317
318
def validate_archive(archive_path, password=None):
319
"""
320
Comprehensively validate 7z archive.
321
322
Returns:
323
dict: validation results with details
324
"""
325
result = {
326
'valid': False,
327
'encrypted': False,
328
'file_count': 0,
329
'total_size': 0,
330
'compression_methods': [],
331
'errors': [],
332
'warnings': []
333
}
334
335
try:
336
# Basic file format check
337
if not py7zr.is_7zfile(archive_path):
338
result['errors'].append("Not a valid 7z archive format")
339
return result
340
341
with py7zr.SevenZipFile(archive_path, 'r', password=password) as archive:
342
# Check encryption
343
result['encrypted'] = archive.password_protected
344
345
if archive.needs_password() and not password:
346
result['errors'].append("Password required")
347
return result
348
349
# Get archive info
350
try:
351
archive_info = archive.archiveinfo()
352
result['total_size'] = archive_info.size
353
result['compression_methods'] = archive_info.method_names
354
except Exception as e:
355
result['warnings'].append(f"Cannot read archive info: {e}")
356
357
# Test extraction
358
try:
359
archive.test()
360
result['valid'] = True
361
except py7zr.CrcError as e:
362
result['errors'].append(f"CRC error: {e}")
363
except py7zr.DecompressionError as e:
364
result['errors'].append(f"Decompression error: {e}")
365
except Exception as e:
366
result['errors'].append(f"Test failed: {e}")
367
368
# Count files
369
try:
370
file_list = archive.getnames()
371
result['file_count'] = len(file_list)
372
except Exception as e:
373
result['warnings'].append(f"Cannot read file list: {e}")
374
375
except py7zr.PasswordRequired:
376
result['errors'].append("Invalid password")
377
except py7zr.Bad7zFile as e:
378
result['errors'].append(f"Bad archive file: {e}")
379
except Exception as e:
380
result['errors'].append(f"Validation error: {e}")
381
382
return result
383
384
# Usage
385
validation = validate_archive('archive.7z')
386
print(f"Valid: {validation['valid']}")
387
print(f"Files: {validation['file_count']}")
388
print(f"Errors: {validation['errors']}")
389
```
390
391
### Recovery and Partial Extraction
392
393
```python
394
import py7zr
395
import os
396
397
def recover_partial_archive(archive_path, output_path, password=None):
398
"""
399
Attempt to recover as much data as possible from corrupted archive.
400
401
Returns:
402
dict: recovery results
403
"""
404
result = {
405
'recovered_files': [],
406
'failed_files': [],
407
'total_attempts': 0,
408
'success_rate': 0.0
409
}
410
411
try:
412
with py7zr.SevenZipFile(archive_path, 'r', password=password) as archive:
413
file_list = archive.getnames()
414
result['total_attempts'] = len(file_list)
415
416
for filename in file_list:
417
try:
418
# Try to extract individual file
419
archive.extract(filename, path=output_path)
420
result['recovered_files'].append(filename)
421
print(f"✓ Recovered: {filename}")
422
except (py7zr.CrcError, py7zr.DecompressionError) as e:
423
result['failed_files'].append({
424
'filename': filename,
425
'error': str(e),
426
'error_type': type(e).__name__
427
})
428
print(f"✗ Failed: {filename} ({type(e).__name__})")
429
except Exception as e:
430
result['failed_files'].append({
431
'filename': filename,
432
'error': str(e),
433
'error_type': 'UnexpectedError'
434
})
435
print(f"✗ Error: {filename} ({e})")
436
437
if result['total_attempts'] > 0:
438
result['success_rate'] = len(result['recovered_files']) / result['total_attempts']
439
440
except Exception as e:
441
print(f"Cannot access archive: {e}")
442
443
return result
444
445
# Usage
446
recovery = recover_partial_archive('corrupted.7z', '/tmp/recovery')
447
print(f"Recovery rate: {recovery['success_rate']:.1%}")
448
print(f"Recovered {len(recovery['recovered_files'])} out of {recovery['total_attempts']} files")
449
```
450
451
## Best Practices
452
453
### Exception Handling Guidelines
454
455
1. **Always use specific exception types** rather than catching generic `Exception`
456
2. **Check archive validity** with `is_7zfile()` before processing
457
3. **Handle password requirements** gracefully with `needs_password()`
458
4. **Implement retry logic** for transient errors
459
5. **Log errors** for debugging and monitoring
460
6. **Provide user-friendly error messages** while preserving technical details for logs
461
7. **Consider partial recovery** for corrupted archives when possible
462
8. **Validate security** by catching `AbsolutePathError` and similar threats
463
464
### Error Recovery Strategies
465
466
```python
467
import py7zr
468
import time
469
470
def extract_with_retry(archive_path, output_path, max_retries=3, password=None):
471
"""Extract with retry logic for transient errors."""
472
473
for attempt in range(max_retries):
474
try:
475
with py7zr.SevenZipFile(archive_path, 'r', password=password) as archive:
476
archive.extractall(path=output_path)
477
return True
478
except (py7zr.DecompressionError, IOError) as e:
479
if attempt < max_retries - 1:
480
print(f"Attempt {attempt + 1} failed: {e}. Retrying...")
481
time.sleep(2 ** attempt) # Exponential backoff
482
else:
483
print(f"All {max_retries} attempts failed")
484
raise
485
except (py7zr.Bad7zFile, py7zr.PasswordRequired, py7zr.AbsolutePathError):
486
# Don't retry for these errors
487
raise
488
489
return False
490
```