CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-py7zr

Pure python 7-zip library providing comprehensive 7z archive format support with compression, decompression, encryption and CLI tools

Pending
Overview
Eval results
Files

io-callbacks.mddocs/

I/O System and Callbacks

py7zr provides a flexible I/O abstraction layer and callback system that enables custom extraction destinations, progress monitoring, and pluggable storage backends. The I/O system supports file-based, memory-based, and custom implementations, while callbacks provide real-time progress reporting during archive operations.

Capabilities

I/O Abstraction Layer

Base abstract class defining the I/O interface for archive operations.

class Py7zIO:
    """
    Abstract base class for py7zr I/O operations.
    
    Provides interface for reading, writing, and seeking operations
    used during archive extraction and creation.
    """
    
    def write(self, s):
        """
        Write bytes to the I/O stream.
        
        Parameters:
        - s: bytes, data to write
        
        Returns:
        int: number of bytes written
        """
    
    def read(self, size=None):
        """
        Read bytes from the I/O stream.
        
        Parameters:
        - size: int, number of bytes to read (None for all)
        
        Returns:
        bytes: data read from stream
        """
    
    def seek(self, offset, whence=0):
        """
        Change stream position.
        
        Parameters:
        - offset: int, offset in bytes
        - whence: int, reference point (0=start, 1=current, 2=end)
        
        Returns:
        int: new absolute position
        """
    
    def flush(self):
        """
        Flush any buffered write data.
        """
    
    def size(self):
        """
        Get total size of the stream.
        
        Returns:
        int: stream size in bytes
        """

Concrete I/O Implementations

Ready-to-use I/O implementations for common scenarios.

class HashIO(Py7zIO):
    """
    I/O wrapper that computes hash while writing.
    
    Useful for verifying file integrity during extraction.
    """
    def __init__(self, filename): ...

class Py7zBytesIO(Py7zIO):
    """
    Memory-based I/O with size limits.
    
    Stores data in memory with optional size constraints.
    """
    def __init__(self, filename, limit=None): ...

class NullIO(Py7zIO):
    """
    Null device I/O that discards all writes.
    
    Useful for testing or when only checking archive contents.
    """
    def __init__(self): ...

class MemIO(Py7zIO):
    """
    Memory-based I/O with factory pattern.
    
    Combines memory storage with factory-based creation.
    """
    def __init__(self, fname, factory): ...

class Buffer:
    """
    Utility buffer for byte operations.
    """
    def __init__(self, size=16): ...

Usage Examples

import py7zr
from py7zr import HashIO, Py7zBytesIO, NullIO

# Extract to memory with size limit
class MemoryFactory(py7zr.WriterFactory):
    def create(self, filename):
        return Py7zBytesIO(filename, limit=1024*1024)  # 1MB limit

with py7zr.SevenZipFile('archive.7z', 'r') as archive:
    archive.extractall(factory=MemoryFactory())

# Extract with hash verification
class HashFactory(py7zr.WriterFactory):
    def create(self, filename):
        return HashIO(filename)

with py7zr.SevenZipFile('archive.7z', 'r') as archive:
    archive.extractall(factory=HashFactory())

# Test extraction without writing files
class TestFactory(py7zr.WriterFactory):
    def create(self, filename):
        return NullIO()

with py7zr.SevenZipFile('archive.7z', 'r') as archive:
    archive.extractall(factory=TestFactory())

Factory Pattern

Factory classes for creating I/O instances during archive operations.

class WriterFactory:
    """
    Abstract factory for creating Py7zIO writers.
    
    Enables custom I/O backend selection during extraction.
    """
    def create(self, filename):
        """
        Create I/O writer for specified filename.
        
        Parameters:
        - filename: str, target filename
        
        Returns:
        Py7zIO: I/O instance for the file
        """

class HashIOFactory(WriterFactory):
    """Factory for creating HashIO instances."""
    def create(self, filename): ...

class BytesIOFactory(WriterFactory):
    """Factory for creating BytesIO instances with size limits."""
    def __init__(self, limit=None): ...
    def create(self, filename): ...

class NullIOFactory(WriterFactory):
    """Factory for creating NullIO instances.""" 
    def create(self, filename): ...

Custom Factory Example

import py7zr
from py7zr import WriterFactory, Py7zIO
import os

class CustomFileFactory(WriterFactory):
    """Custom factory that creates files with specific permissions."""
    
    def __init__(self, base_path, permissions=0o644):
        self.base_path = base_path
        self.permissions = permissions
    
    def create(self, filename):
        full_path = os.path.join(self.base_path, filename)
        os.makedirs(os.path.dirname(full_path), exist_ok=True)
        
        class CustomFileIO(Py7zIO):
            def __init__(self, path, perms):
                self.path = path
                self.perms = perms
                self.file = open(path, 'wb')
            
            def write(self, data):
                return self.file.write(data)
            
            def close(self):
                self.file.close()
                os.chmod(self.path, self.perms)
        
        return CustomFileIO(full_path, self.permissions)

# Use custom factory
factory = CustomFileFactory('/tmp/extracted', permissions=0o755)
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
    archive.extractall(factory=factory)

Callback System

Progress reporting and event handling during archive operations.

class Callback:
    """
    Abstract base class for operation callbacks.
    
    Provides hooks for monitoring and controlling archive operations.
    """
    
    def report_start_preparation(self):
        """
        Called at the start of operation preparation phase.
        """
    
    def report_start(self, processing_file_path, processing_bytes):
        """
        Called when starting to process a file.
        
        Parameters:
        - processing_file_path: str, path of file being processed
        - processing_bytes: int, total bytes to process
        """
    
    def report_update(self, decompressed_bytes):
        """
        Called periodically during processing with progress info.
        
        Parameters:
        - decompressed_bytes: int, bytes processed so far
        """
    
    def report_end(self, processing_file_path, wrote_bytes):
        """
        Called when file processing is complete.
        
        Parameters:
        - processing_file_path: str, path of processed file
        - wrote_bytes: int, total bytes written
        """
    
    def report_warning(self, message):
        """
        Called when a warning occurs during processing.
        
        Parameters:
        - message: str, warning message
        """
    
    def report_postprocess(self):
        """
        Called during post-processing phase.
        """

Concrete Callback Implementations

Pre-built callback implementations for common use cases.

class ExtractCallback(Callback):
    """
    Default callback implementation for extraction operations.
    
    Provides basic progress reporting to stdout.
    """

class ArchiveCallback(Callback):
    """
    Default callback implementation for archive creation operations.
    
    Provides basic progress reporting to stdout.
    """

Custom Callback Examples

import py7zr
from py7zr import Callback

class ProgressCallback(Callback):
    """Custom callback with progress bar."""
    
    def __init__(self):
        self.current_file = None
        self.total_bytes = 0
        self.processed_bytes = 0
    
    def report_start_preparation(self):
        print("Preparing archive operation...")
    
    def report_start(self, processing_file_path, processing_bytes):
        self.current_file = processing_file_path
        self.total_bytes = processing_bytes
        self.processed_bytes = 0
        print(f"Processing: {processing_file_path}")
    
    def report_update(self, decompressed_bytes):
        self.processed_bytes += decompressed_bytes
        if self.total_bytes > 0:
            percent = (self.processed_bytes / self.total_bytes) * 100
            print(f"Progress: {percent:.1f}% ({self.processed_bytes}/{self.total_bytes} bytes)")
    
    def report_end(self, processing_file_path, wrote_bytes):
        print(f"Completed: {processing_file_path} ({wrote_bytes} bytes)")
    
    def report_warning(self, message):
        print(f"Warning: {message}")

# Use custom callback
callback = ProgressCallback()
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
    archive.extractall(callback=callback)
class LoggingCallback(Callback):
    """Callback that logs to file."""
    
    def __init__(self, log_file):
        self.log_file = log_file
    
    def report_start(self, processing_file_path, processing_bytes):
        with open(self.log_file, 'a') as f:
            f.write(f"START: {processing_file_path} ({processing_bytes} bytes)\\n")
    
    def report_end(self, processing_file_path, wrote_bytes):
        with open(self.log_file, 'a') as f:
            f.write(f"END: {processing_file_path} ({wrote_bytes} bytes)\\n")
    
    def report_warning(self, message):
        with open(self.log_file, 'a') as f:
            f.write(f"WARNING: {message}\\n")

# Use logging callback
callback = LoggingCallback('extraction.log')
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
    archive.extractall(callback=callback)

Advanced I/O Patterns

Complex usage patterns combining I/O and callbacks.

Streaming Extraction to Network

import py7zr
import socket
from py7zr import WriterFactory, Py7zIO

class NetworkIO(Py7zIO):
    """Stream extracted files over network."""
    
    def __init__(self, filename, socket_conn):
        self.filename = filename
        self.socket = socket_conn
        self.bytes_sent = 0
    
    def write(self, data):
        # Send filename header first time
        if self.bytes_sent == 0:
            header = f"FILE:{self.filename}\\n".encode()
            self.socket.send(header)
        
        self.socket.send(data)
        self.bytes_sent += len(data)
        return len(data)

class NetworkFactory(WriterFactory):
    def __init__(self, socket_conn):
        self.socket = socket_conn
    
    def create(self, filename):
        return NetworkIO(filename, self.socket)

# Extract over network
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
    s.connect(('remote_host', 8080))
    factory = NetworkFactory(s)
    
    with py7zr.SevenZipFile('archive.7z', 'r') as archive:
        archive.extractall(factory=factory)

Conditional Extraction with Callbacks

class SelectiveCallback(Callback):
    """Callback that can skip files based on criteria."""
    
    def __init__(self, max_file_size=1024*1024):
        self.max_file_size = max_file_size
        self.skip_current = False
    
    def report_start(self, processing_file_path, processing_bytes):
        if processing_bytes > self.max_file_size:
            print(f"Skipping large file: {processing_file_path} ({processing_bytes} bytes)")
            self.skip_current = True
            return False  # Skip this file
        else:
            self.skip_current = False
            return True   # Process this file

# Note: Actual file skipping requires integration with extraction logic

Integration Examples

With Progress Bars (tqdm)

import py7zr
from py7zr import Callback
from tqdm import tqdm

class TqdmCallback(Callback):
    def __init__(self):
        self.pbar = None
    
    def report_start(self, processing_file_path, processing_bytes):
        self.pbar = tqdm(total=processing_bytes, 
                        desc=f"Extracting {processing_file_path}",
                        unit='B', unit_scale=True)
    
    def report_update(self, decompressed_bytes):
        if self.pbar:
            self.pbar.update(decompressed_bytes)
    
    def report_end(self, processing_file_path, wrote_bytes):
        if self.pbar:
            self.pbar.close()

# Extract with progress bar
callback = TqdmCallback()
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
    archive.extractall(callback=callback)

With Cloud Storage

import py7zr
from py7zr import WriterFactory, Py7zIO
import boto3

class S3IO(Py7zIO):
    """Upload extracted files directly to S3."""
    
    def __init__(self, filename, s3_client, bucket, prefix=""):
        self.filename = filename
        self.s3_client = s3_client
        self.bucket = bucket
        self.key = f"{prefix}/{filename}" if prefix else filename
        self.buffer = BytesIO()
    
    def write(self, data):
        return self.buffer.write(data)
    
    def close(self):
        self.buffer.seek(0)
        self.s3_client.upload_fileobj(self.buffer, self.bucket, self.key)

class S3Factory(WriterFactory):
    def __init__(self, bucket, prefix=""):
        self.s3_client = boto3.client('s3')
        self.bucket = bucket
        self.prefix = prefix
    
    def create(self, filename):
        return S3IO(filename, self.s3_client, self.bucket, self.prefix)

# Extract directly to S3
factory = S3Factory('my-bucket', 'extracted-files')
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
    archive.extractall(factory=factory)

Install with Tessl CLI

npx tessl i tessl/pypi-py7zr

docs

cli.md

core-operations.md

exceptions.md

index.md

io-callbacks.md

tile.json