CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-py7zr

Pure python 7-zip library providing comprehensive 7z archive format support with compression, decompression, encryption and CLI tools

Pending
Overview
Eval results
Files

exceptions.mddocs/

Exception Handling

py7zr provides comprehensive error handling with specific exception types for different failure modes. The exception hierarchy enables precise error handling for various scenarios including invalid archives, compression errors, missing passwords, and security violations.

Capabilities

Exception Hierarchy

py7zr exceptions inherit from standard Python exceptions with specific types for archive-related errors.

class ArchiveError(Exception):
    """
    Base class for all archive-related errors.
    
    Parent class for py7zr-specific exceptions.
    """

class Bad7zFile(ArchiveError):
    """
    Raised when archive file is invalid or corrupted.
    
    Indicates the file is not a valid 7z archive or has structural damage.
    """

class DecompressionError(ArchiveError):
    """
    Raised when decompression operation fails.
    
    Indicates failure during decompression process, possibly due to
    corrupted data or unsupported compression method.
    """

class PasswordRequired(Exception):
    """
    Raised when password is needed for encrypted archive.
    
    Indicates the archive is password-protected and no password
    was provided or the provided password is incorrect.
    """

class UnsupportedCompressionMethodError(ArchiveError):
    """
    Raised when archive uses unsupported compression method.
    
    Indicates the archive contains data compressed with a method
    not supported by the current py7zr installation.
    """
    def __init__(self, data, message):
        """
        Parameters:
        - data: bytes, problematic data causing the error
        - message: str, descriptive error message
        """

class CrcError(ArchiveError):
    """
    Raised when CRC verification fails during extraction.
    
    Indicates data corruption detected through checksum mismatch.
    """
    def __init__(self, expected, actual, filename):
        """
        Parameters:
        - expected: int, expected CRC value
        - actual: int, calculated CRC value  
        - filename: str, name of file with CRC error
        """

class InternalError(ArchiveError):
    """
    Raised when internal py7zr error occurs.
    
    Indicates unexpected internal state or logic error.
    """

class AbsolutePathError(Exception):
    """
    Raised when archive contains absolute paths.
    
    Security exception raised when archive member paths contain
    absolute paths that could overwrite system files.
    """

Common Error Scenarios

Invalid Archive Files

import py7zr

try:
    with py7zr.SevenZipFile('not_an_archive.txt', 'r') as archive:
        archive.extractall()
except py7zr.Bad7zFile as e:
    print(f"Invalid archive file: {e}")
    # Handle invalid archive - maybe try different format

# Pre-check if file is valid 7z archive
if py7zr.is_7zfile('suspect_file.7z'):
    # Safe to process
    with py7zr.SevenZipFile('suspect_file.7z', 'r') as archive:
        archive.extractall()
else:
    print("File is not a valid 7z archive")

Password Protection

import py7zr

try:
    with py7zr.SevenZipFile('encrypted.7z', 'r') as archive:
        archive.extractall()
except py7zr.PasswordRequired as e:
    print("Archive is password protected")
    password = input("Enter password: ")
    
    try:
        with py7zr.SevenZipFile('encrypted.7z', 'r', password=password) as archive:
            archive.extractall()
    except py7zr.PasswordRequired:
        print("Invalid password")

# Check if password needed before attempting extraction
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
    if archive.needs_password():
        password = input("Password required: ")
        # Re-open with password
        with py7zr.SevenZipFile('archive.7z', 'r', password=password) as archive:
            archive.extractall()
    else:
        archive.extractall()

Decompression Errors

import py7zr

try:
    with py7zr.SevenZipFile('corrupted.7z', 'r') as archive:
        archive.extractall()
except py7zr.DecompressionError as e:
    print(f"Decompression failed: {e}")
    # Archive may be partially corrupted
    # Try extracting individual files
    try:
        file_list = archive.getnames()
        for filename in file_list:
            try:
                archive.extract(filename)
                print(f"Successfully extracted: {filename}")
            except py7zr.DecompressionError:
                print(f"Failed to extract: {filename}")
    except:
        print("Archive is severely corrupted")

CRC Verification Failures

import py7zr

try:
    with py7zr.SevenZipFile('archive.7z', 'r') as archive:
        archive.extractall()
except py7zr.CrcError as e:
    print(f"CRC error in file '{e.filename}': expected {e.expected:08x}, got {e.actual:08x}")
    # File is corrupted, but other files might be OK
    # Could continue with other files or abort
    
    # Option 1: Continue with other files
    file_list = archive.getnames()
    for filename in file_list:
        if filename != e.filename:
            try:
                archive.extract(filename)
            except py7zr.CrcError as crc_err:
                print(f"Another CRC error: {crc_err.filename}")

Unsupported Compression Methods

import py7zr

try:
    with py7zr.SevenZipFile('advanced_archive.7z', 'r') as archive:
        archive.extractall()
except py7zr.UnsupportedCompressionMethodError as e:
    print(f"Unsupported compression method: {e}")
    print("This archive requires additional compression libraries")
    
    # Suggest installing additional dependencies
    print("Try installing optional dependencies:")
    print("  pip install py7zr[full]")
    print("  # or individually:")
    print("  pip install pyzstd pyppmd brotli")

Security Violations

import py7zr

try:
    with py7zr.SevenZipFile('malicious.7z', 'r') as archive:
        archive.extractall()
except py7zr.AbsolutePathError as e:
    print(f"Security violation: {e}")
    print("Archive contains absolute paths that could overwrite system files")
    
    # Could extract to safe directory instead
    safe_extraction_path = '/tmp/safe_extraction'
    try:
        archive.extractall(path=safe_extraction_path)
    except py7zr.AbsolutePathError:
        print("Even safe extraction failed - archive is malicious")

Comprehensive Error Handling Patterns

Robust Archive Processing

import py7zr
import os
import logging

def robust_extract(archive_path, output_path, password=None):
    """
    Robustly extract 7z archive with comprehensive error handling.
    
    Parameters:
    - archive_path: str, path to archive file
    - output_path: str, extraction destination
    - password: str, optional password
    
    Returns:
    tuple: (success: bool, extracted_files: list, errors: list)
    """
    extracted_files = []
    errors = []
    
    # Pre-check if file exists and is valid
    if not os.path.exists(archive_path):
        return False, [], [f"Archive file not found: {archive_path}"]
    
    if not py7zr.is_7zfile(archive_path):
        return False, [], [f"Invalid 7z archive: {archive_path}"]
    
    try:
        with py7zr.SevenZipFile(archive_path, 'r', password=password) as archive:
            # Check if password needed
            if archive.needs_password() and not password:
                return False, [], ["Password required but not provided"]
            
            # Get file list first
            try:
                file_list = archive.getnames()
            except py7zr.PasswordRequired:
                return False, [], ["Invalid password"]
            except Exception as e:
                return False, [], [f"Cannot read archive contents: {e}"]
            
            # Extract files individually for better error handling
            for filename in file_list:
                try:
                    archive.extract(filename, path=output_path)
                    extracted_files.append(filename)
                    logging.info(f"Extracted: {filename}")
                except py7zr.CrcError as e:
                    error_msg = f"CRC error in {e.filename}: expected {e.expected:08x}, got {e.actual:08x}"
                    errors.append(error_msg)
                    logging.error(error_msg)
                except py7zr.DecompressionError as e:
                    error_msg = f"Decompression failed for {filename}: {e}"
                    errors.append(error_msg)
                    logging.error(error_msg)
                except py7zr.AbsolutePathError as e:
                    error_msg = f"Security violation in {filename}: {e}"
                    errors.append(error_msg)
                    logging.error(error_msg)
                except Exception as e:
                    error_msg = f"Unexpected error extracting {filename}: {e}"
                    errors.append(error_msg)
                    logging.error(error_msg)
    
    except py7zr.Bad7zFile as e:
        return False, extracted_files, [f"Invalid archive: {e}"]
    except py7zr.PasswordRequired:
        return False, extracted_files, ["Password required"]
    except py7zr.UnsupportedCompressionMethodError as e:
        return False, extracted_files, [f"Unsupported compression: {e}"]
    except Exception as e:
        return False, extracted_files, [f"Unexpected error: {e}"]
    
    success = len(errors) == 0
    return success, extracted_files, errors

# Usage
success, files, errors = robust_extract('archive.7z', '/tmp/output')
if success:
    print(f"Successfully extracted {len(files)} files")
else:
    print(f"Extraction completed with {len(errors)} errors:")
    for error in errors:
        print(f"  - {error}")

Validation and Testing

import py7zr

def validate_archive(archive_path, password=None):
    """
    Comprehensively validate 7z archive.
    
    Returns:
    dict: validation results with details
    """
    result = {
        'valid': False,
        'encrypted': False,
        'file_count': 0,
        'total_size': 0,
        'compression_methods': [],
        'errors': [],
        'warnings': []
    }
    
    try:
        # Basic file format check
        if not py7zr.is_7zfile(archive_path):
            result['errors'].append("Not a valid 7z archive format")
            return result
        
        with py7zr.SevenZipFile(archive_path, 'r', password=password) as archive:
            # Check encryption
            result['encrypted'] = archive.password_protected
            
            if archive.needs_password() and not password:
                result['errors'].append("Password required")
                return result
            
            # Get archive info
            try:
                archive_info = archive.archiveinfo()
                result['total_size'] = archive_info.size
                result['compression_methods'] = archive_info.method_names
            except Exception as e:
                result['warnings'].append(f"Cannot read archive info: {e}")
            
            # Test extraction
            try:
                archive.test()
                result['valid'] = True
            except py7zr.CrcError as e:
                result['errors'].append(f"CRC error: {e}")
            except py7zr.DecompressionError as e:
                result['errors'].append(f"Decompression error: {e}")
            except Exception as e:
                result['errors'].append(f"Test failed: {e}")
            
            # Count files
            try:
                file_list = archive.getnames()
                result['file_count'] = len(file_list)
            except Exception as e:
                result['warnings'].append(f"Cannot read file list: {e}")
    
    except py7zr.PasswordRequired:
        result['errors'].append("Invalid password")
    except py7zr.Bad7zFile as e:
        result['errors'].append(f"Bad archive file: {e}")
    except Exception as e:
        result['errors'].append(f"Validation error: {e}")
    
    return result

# Usage
validation = validate_archive('archive.7z')
print(f"Valid: {validation['valid']}")
print(f"Files: {validation['file_count']}")
print(f"Errors: {validation['errors']}")

Recovery and Partial Extraction

import py7zr
import os

def recover_partial_archive(archive_path, output_path, password=None):
    """
    Attempt to recover as much data as possible from corrupted archive.
    
    Returns:
    dict: recovery results
    """
    result = {
        'recovered_files': [],
        'failed_files': [],
        'total_attempts': 0,
        'success_rate': 0.0
    }
    
    try:
        with py7zr.SevenZipFile(archive_path, 'r', password=password) as archive:
            file_list = archive.getnames()
            result['total_attempts'] = len(file_list)
            
            for filename in file_list:
                try:
                    # Try to extract individual file
                    archive.extract(filename, path=output_path)
                    result['recovered_files'].append(filename)
                    print(f"✓ Recovered: {filename}")
                except (py7zr.CrcError, py7zr.DecompressionError) as e:
                    result['failed_files'].append({
                        'filename': filename,
                        'error': str(e),
                        'error_type': type(e).__name__
                    })
                    print(f"✗ Failed: {filename} ({type(e).__name__})")
                except Exception as e:
                    result['failed_files'].append({
                        'filename': filename,
                        'error': str(e),
                        'error_type': 'UnexpectedError'
                    })
                    print(f"✗ Error: {filename} ({e})")
            
            if result['total_attempts'] > 0:
                result['success_rate'] = len(result['recovered_files']) / result['total_attempts']
    
    except Exception as e:
        print(f"Cannot access archive: {e}")
    
    return result

# Usage
recovery = recover_partial_archive('corrupted.7z', '/tmp/recovery')
print(f"Recovery rate: {recovery['success_rate']:.1%}")
print(f"Recovered {len(recovery['recovered_files'])} out of {recovery['total_attempts']} files")

Best Practices

Exception Handling Guidelines

  1. Always use specific exception types rather than catching generic Exception
  2. Check archive validity with is_7zfile() before processing
  3. Handle password requirements gracefully with needs_password()
  4. Implement retry logic for transient errors
  5. Log errors for debugging and monitoring
  6. Provide user-friendly error messages while preserving technical details for logs
  7. Consider partial recovery for corrupted archives when possible
  8. Validate security by catching AbsolutePathError and similar threats

Error Recovery Strategies

import py7zr
import time

def extract_with_retry(archive_path, output_path, max_retries=3, password=None):
    """Extract with retry logic for transient errors."""
    
    for attempt in range(max_retries):
        try:
            with py7zr.SevenZipFile(archive_path, 'r', password=password) as archive:
                archive.extractall(path=output_path)
            return True
        except (py7zr.DecompressionError, IOError) as e:
            if attempt < max_retries - 1:
                print(f"Attempt {attempt + 1} failed: {e}. Retrying...")
                time.sleep(2 ** attempt)  # Exponential backoff
            else:
                print(f"All {max_retries} attempts failed")
                raise
        except (py7zr.Bad7zFile, py7zr.PasswordRequired, py7zr.AbsolutePathError):
            # Don't retry for these errors
            raise
    
    return False

Install with Tessl CLI

npx tessl i tessl/pypi-py7zr

docs

cli.md

core-operations.md

exceptions.md

index.md

io-callbacks.md

tile.json