Pure python 7-zip library providing comprehensive 7z archive format support with compression, decompression, encryption and CLI tools
—
py7zr provides comprehensive error handling with specific exception types for different failure modes. The exception hierarchy enables precise error handling for various scenarios including invalid archives, compression errors, missing passwords, and security violations.
py7zr exceptions inherit from standard Python exceptions with specific types for archive-related errors.
class ArchiveError(Exception):
"""
Base class for all archive-related errors.
Parent class for py7zr-specific exceptions.
"""
class Bad7zFile(ArchiveError):
"""
Raised when archive file is invalid or corrupted.
Indicates the file is not a valid 7z archive or has structural damage.
"""
class DecompressionError(ArchiveError):
"""
Raised when decompression operation fails.
Indicates failure during decompression process, possibly due to
corrupted data or unsupported compression method.
"""
class PasswordRequired(Exception):
"""
Raised when password is needed for encrypted archive.
Indicates the archive is password-protected and no password
was provided or the provided password is incorrect.
"""
class UnsupportedCompressionMethodError(ArchiveError):
"""
Raised when archive uses unsupported compression method.
Indicates the archive contains data compressed with a method
not supported by the current py7zr installation.
"""
def __init__(self, data, message):
"""
Parameters:
- data: bytes, problematic data causing the error
- message: str, descriptive error message
"""
class CrcError(ArchiveError):
"""
Raised when CRC verification fails during extraction.
Indicates data corruption detected through checksum mismatch.
"""
def __init__(self, expected, actual, filename):
"""
Parameters:
- expected: int, expected CRC value
- actual: int, calculated CRC value
- filename: str, name of file with CRC error
"""
class InternalError(ArchiveError):
"""
Raised when internal py7zr error occurs.
Indicates unexpected internal state or logic error.
"""
class AbsolutePathError(Exception):
"""
Raised when archive contains absolute paths.
Security exception raised when archive member paths contain
absolute paths that could overwrite system files.
"""import py7zr
try:
with py7zr.SevenZipFile('not_an_archive.txt', 'r') as archive:
archive.extractall()
except py7zr.Bad7zFile as e:
print(f"Invalid archive file: {e}")
# Handle invalid archive - maybe try different format
# Pre-check if file is valid 7z archive
if py7zr.is_7zfile('suspect_file.7z'):
# Safe to process
with py7zr.SevenZipFile('suspect_file.7z', 'r') as archive:
archive.extractall()
else:
print("File is not a valid 7z archive")import py7zr
try:
with py7zr.SevenZipFile('encrypted.7z', 'r') as archive:
archive.extractall()
except py7zr.PasswordRequired as e:
print("Archive is password protected")
password = input("Enter password: ")
try:
with py7zr.SevenZipFile('encrypted.7z', 'r', password=password) as archive:
archive.extractall()
except py7zr.PasswordRequired:
print("Invalid password")
# Check if password needed before attempting extraction
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
if archive.needs_password():
password = input("Password required: ")
# Re-open with password
with py7zr.SevenZipFile('archive.7z', 'r', password=password) as archive:
archive.extractall()
else:
archive.extractall()import py7zr
try:
with py7zr.SevenZipFile('corrupted.7z', 'r') as archive:
archive.extractall()
except py7zr.DecompressionError as e:
print(f"Decompression failed: {e}")
# Archive may be partially corrupted
# Try extracting individual files
try:
file_list = archive.getnames()
for filename in file_list:
try:
archive.extract(filename)
print(f"Successfully extracted: {filename}")
except py7zr.DecompressionError:
print(f"Failed to extract: {filename}")
except:
print("Archive is severely corrupted")import py7zr
try:
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
archive.extractall()
except py7zr.CrcError as e:
print(f"CRC error in file '{e.filename}': expected {e.expected:08x}, got {e.actual:08x}")
# File is corrupted, but other files might be OK
# Could continue with other files or abort
# Option 1: Continue with other files
file_list = archive.getnames()
for filename in file_list:
if filename != e.filename:
try:
archive.extract(filename)
except py7zr.CrcError as crc_err:
print(f"Another CRC error: {crc_err.filename}")import py7zr
try:
with py7zr.SevenZipFile('advanced_archive.7z', 'r') as archive:
archive.extractall()
except py7zr.UnsupportedCompressionMethodError as e:
print(f"Unsupported compression method: {e}")
print("This archive requires additional compression libraries")
# Suggest installing additional dependencies
print("Try installing optional dependencies:")
print(" pip install py7zr[full]")
print(" # or individually:")
print(" pip install pyzstd pyppmd brotli")import py7zr
try:
with py7zr.SevenZipFile('malicious.7z', 'r') as archive:
archive.extractall()
except py7zr.AbsolutePathError as e:
print(f"Security violation: {e}")
print("Archive contains absolute paths that could overwrite system files")
# Could extract to safe directory instead
safe_extraction_path = '/tmp/safe_extraction'
try:
archive.extractall(path=safe_extraction_path)
except py7zr.AbsolutePathError:
print("Even safe extraction failed - archive is malicious")import py7zr
import os
import logging
def robust_extract(archive_path, output_path, password=None):
"""
Robustly extract 7z archive with comprehensive error handling.
Parameters:
- archive_path: str, path to archive file
- output_path: str, extraction destination
- password: str, optional password
Returns:
tuple: (success: bool, extracted_files: list, errors: list)
"""
extracted_files = []
errors = []
# Pre-check if file exists and is valid
if not os.path.exists(archive_path):
return False, [], [f"Archive file not found: {archive_path}"]
if not py7zr.is_7zfile(archive_path):
return False, [], [f"Invalid 7z archive: {archive_path}"]
try:
with py7zr.SevenZipFile(archive_path, 'r', password=password) as archive:
# Check if password needed
if archive.needs_password() and not password:
return False, [], ["Password required but not provided"]
# Get file list first
try:
file_list = archive.getnames()
except py7zr.PasswordRequired:
return False, [], ["Invalid password"]
except Exception as e:
return False, [], [f"Cannot read archive contents: {e}"]
# Extract files individually for better error handling
for filename in file_list:
try:
archive.extract(filename, path=output_path)
extracted_files.append(filename)
logging.info(f"Extracted: {filename}")
except py7zr.CrcError as e:
error_msg = f"CRC error in {e.filename}: expected {e.expected:08x}, got {e.actual:08x}"
errors.append(error_msg)
logging.error(error_msg)
except py7zr.DecompressionError as e:
error_msg = f"Decompression failed for {filename}: {e}"
errors.append(error_msg)
logging.error(error_msg)
except py7zr.AbsolutePathError as e:
error_msg = f"Security violation in {filename}: {e}"
errors.append(error_msg)
logging.error(error_msg)
except Exception as e:
error_msg = f"Unexpected error extracting {filename}: {e}"
errors.append(error_msg)
logging.error(error_msg)
except py7zr.Bad7zFile as e:
return False, extracted_files, [f"Invalid archive: {e}"]
except py7zr.PasswordRequired:
return False, extracted_files, ["Password required"]
except py7zr.UnsupportedCompressionMethodError as e:
return False, extracted_files, [f"Unsupported compression: {e}"]
except Exception as e:
return False, extracted_files, [f"Unexpected error: {e}"]
success = len(errors) == 0
return success, extracted_files, errors
# Usage
success, files, errors = robust_extract('archive.7z', '/tmp/output')
if success:
print(f"Successfully extracted {len(files)} files")
else:
print(f"Extraction completed with {len(errors)} errors:")
for error in errors:
print(f" - {error}")import py7zr
def validate_archive(archive_path, password=None):
"""
Comprehensively validate 7z archive.
Returns:
dict: validation results with details
"""
result = {
'valid': False,
'encrypted': False,
'file_count': 0,
'total_size': 0,
'compression_methods': [],
'errors': [],
'warnings': []
}
try:
# Basic file format check
if not py7zr.is_7zfile(archive_path):
result['errors'].append("Not a valid 7z archive format")
return result
with py7zr.SevenZipFile(archive_path, 'r', password=password) as archive:
# Check encryption
result['encrypted'] = archive.password_protected
if archive.needs_password() and not password:
result['errors'].append("Password required")
return result
# Get archive info
try:
archive_info = archive.archiveinfo()
result['total_size'] = archive_info.size
result['compression_methods'] = archive_info.method_names
except Exception as e:
result['warnings'].append(f"Cannot read archive info: {e}")
# Test extraction
try:
archive.test()
result['valid'] = True
except py7zr.CrcError as e:
result['errors'].append(f"CRC error: {e}")
except py7zr.DecompressionError as e:
result['errors'].append(f"Decompression error: {e}")
except Exception as e:
result['errors'].append(f"Test failed: {e}")
# Count files
try:
file_list = archive.getnames()
result['file_count'] = len(file_list)
except Exception as e:
result['warnings'].append(f"Cannot read file list: {e}")
except py7zr.PasswordRequired:
result['errors'].append("Invalid password")
except py7zr.Bad7zFile as e:
result['errors'].append(f"Bad archive file: {e}")
except Exception as e:
result['errors'].append(f"Validation error: {e}")
return result
# Usage
validation = validate_archive('archive.7z')
print(f"Valid: {validation['valid']}")
print(f"Files: {validation['file_count']}")
print(f"Errors: {validation['errors']}")import py7zr
import os
def recover_partial_archive(archive_path, output_path, password=None):
"""
Attempt to recover as much data as possible from corrupted archive.
Returns:
dict: recovery results
"""
result = {
'recovered_files': [],
'failed_files': [],
'total_attempts': 0,
'success_rate': 0.0
}
try:
with py7zr.SevenZipFile(archive_path, 'r', password=password) as archive:
file_list = archive.getnames()
result['total_attempts'] = len(file_list)
for filename in file_list:
try:
# Try to extract individual file
archive.extract(filename, path=output_path)
result['recovered_files'].append(filename)
print(f"✓ Recovered: {filename}")
except (py7zr.CrcError, py7zr.DecompressionError) as e:
result['failed_files'].append({
'filename': filename,
'error': str(e),
'error_type': type(e).__name__
})
print(f"✗ Failed: {filename} ({type(e).__name__})")
except Exception as e:
result['failed_files'].append({
'filename': filename,
'error': str(e),
'error_type': 'UnexpectedError'
})
print(f"✗ Error: {filename} ({e})")
if result['total_attempts'] > 0:
result['success_rate'] = len(result['recovered_files']) / result['total_attempts']
except Exception as e:
print(f"Cannot access archive: {e}")
return result
# Usage
recovery = recover_partial_archive('corrupted.7z', '/tmp/recovery')
print(f"Recovery rate: {recovery['success_rate']:.1%}")
print(f"Recovered {len(recovery['recovered_files'])} out of {recovery['total_attempts']} files")Exceptionis_7zfile() before processingneeds_password()AbsolutePathError and similar threatsimport py7zr
import time
def extract_with_retry(archive_path, output_path, max_retries=3, password=None):
"""Extract with retry logic for transient errors."""
for attempt in range(max_retries):
try:
with py7zr.SevenZipFile(archive_path, 'r', password=password) as archive:
archive.extractall(path=output_path)
return True
except (py7zr.DecompressionError, IOError) as e:
if attempt < max_retries - 1:
print(f"Attempt {attempt + 1} failed: {e}. Retrying...")
time.sleep(2 ** attempt) # Exponential backoff
else:
print(f"All {max_retries} attempts failed")
raise
except (py7zr.Bad7zFile, py7zr.PasswordRequired, py7zr.AbsolutePathError):
# Don't retry for these errors
raise
return FalseInstall with Tessl CLI
npx tessl i tessl/pypi-py7zr