Pure python 7-zip library providing comprehensive 7z archive format support with compression, decompression, encryption and CLI tools
—
py7zr provides a flexible I/O abstraction layer and callback system that enables custom extraction destinations, progress monitoring, and pluggable storage backends. The I/O system supports file-based, memory-based, and custom implementations, while callbacks provide real-time progress reporting during archive operations.
Base abstract class defining the I/O interface for archive operations.
class Py7zIO:
"""
Abstract base class for py7zr I/O operations.
Provides interface for reading, writing, and seeking operations
used during archive extraction and creation.
"""
def write(self, s):
"""
Write bytes to the I/O stream.
Parameters:
- s: bytes, data to write
Returns:
int: number of bytes written
"""
def read(self, size=None):
"""
Read bytes from the I/O stream.
Parameters:
- size: int, number of bytes to read (None for all)
Returns:
bytes: data read from stream
"""
def seek(self, offset, whence=0):
"""
Change stream position.
Parameters:
- offset: int, offset in bytes
- whence: int, reference point (0=start, 1=current, 2=end)
Returns:
int: new absolute position
"""
def flush(self):
"""
Flush any buffered write data.
"""
def size(self):
"""
Get total size of the stream.
Returns:
int: stream size in bytes
"""Ready-to-use I/O implementations for common scenarios.
class HashIO(Py7zIO):
"""
I/O wrapper that computes hash while writing.
Useful for verifying file integrity during extraction.
"""
def __init__(self, filename): ...
class Py7zBytesIO(Py7zIO):
"""
Memory-based I/O with size limits.
Stores data in memory with optional size constraints.
"""
def __init__(self, filename, limit=None): ...
class NullIO(Py7zIO):
"""
Null device I/O that discards all writes.
Useful for testing or when only checking archive contents.
"""
def __init__(self): ...
class MemIO(Py7zIO):
"""
Memory-based I/O with factory pattern.
Combines memory storage with factory-based creation.
"""
def __init__(self, fname, factory): ...
class Buffer:
"""
Utility buffer for byte operations.
"""
def __init__(self, size=16): ...import py7zr
from py7zr import HashIO, Py7zBytesIO, NullIO
# Extract to memory with size limit
class MemoryFactory(py7zr.WriterFactory):
def create(self, filename):
return Py7zBytesIO(filename, limit=1024*1024) # 1MB limit
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
archive.extractall(factory=MemoryFactory())
# Extract with hash verification
class HashFactory(py7zr.WriterFactory):
def create(self, filename):
return HashIO(filename)
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
archive.extractall(factory=HashFactory())
# Test extraction without writing files
class TestFactory(py7zr.WriterFactory):
def create(self, filename):
return NullIO()
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
archive.extractall(factory=TestFactory())Factory classes for creating I/O instances during archive operations.
class WriterFactory:
"""
Abstract factory for creating Py7zIO writers.
Enables custom I/O backend selection during extraction.
"""
def create(self, filename):
"""
Create I/O writer for specified filename.
Parameters:
- filename: str, target filename
Returns:
Py7zIO: I/O instance for the file
"""
class HashIOFactory(WriterFactory):
"""Factory for creating HashIO instances."""
def create(self, filename): ...
class BytesIOFactory(WriterFactory):
"""Factory for creating BytesIO instances with size limits."""
def __init__(self, limit=None): ...
def create(self, filename): ...
class NullIOFactory(WriterFactory):
"""Factory for creating NullIO instances."""
def create(self, filename): ...import py7zr
from py7zr import WriterFactory, Py7zIO
import os
class CustomFileFactory(WriterFactory):
"""Custom factory that creates files with specific permissions."""
def __init__(self, base_path, permissions=0o644):
self.base_path = base_path
self.permissions = permissions
def create(self, filename):
full_path = os.path.join(self.base_path, filename)
os.makedirs(os.path.dirname(full_path), exist_ok=True)
class CustomFileIO(Py7zIO):
def __init__(self, path, perms):
self.path = path
self.perms = perms
self.file = open(path, 'wb')
def write(self, data):
return self.file.write(data)
def close(self):
self.file.close()
os.chmod(self.path, self.perms)
return CustomFileIO(full_path, self.permissions)
# Use custom factory
factory = CustomFileFactory('/tmp/extracted', permissions=0o755)
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
archive.extractall(factory=factory)Progress reporting and event handling during archive operations.
class Callback:
"""
Abstract base class for operation callbacks.
Provides hooks for monitoring and controlling archive operations.
"""
def report_start_preparation(self):
"""
Called at the start of operation preparation phase.
"""
def report_start(self, processing_file_path, processing_bytes):
"""
Called when starting to process a file.
Parameters:
- processing_file_path: str, path of file being processed
- processing_bytes: int, total bytes to process
"""
def report_update(self, decompressed_bytes):
"""
Called periodically during processing with progress info.
Parameters:
- decompressed_bytes: int, bytes processed so far
"""
def report_end(self, processing_file_path, wrote_bytes):
"""
Called when file processing is complete.
Parameters:
- processing_file_path: str, path of processed file
- wrote_bytes: int, total bytes written
"""
def report_warning(self, message):
"""
Called when a warning occurs during processing.
Parameters:
- message: str, warning message
"""
def report_postprocess(self):
"""
Called during post-processing phase.
"""Pre-built callback implementations for common use cases.
class ExtractCallback(Callback):
"""
Default callback implementation for extraction operations.
Provides basic progress reporting to stdout.
"""
class ArchiveCallback(Callback):
"""
Default callback implementation for archive creation operations.
Provides basic progress reporting to stdout.
"""import py7zr
from py7zr import Callback
class ProgressCallback(Callback):
"""Custom callback with progress bar."""
def __init__(self):
self.current_file = None
self.total_bytes = 0
self.processed_bytes = 0
def report_start_preparation(self):
print("Preparing archive operation...")
def report_start(self, processing_file_path, processing_bytes):
self.current_file = processing_file_path
self.total_bytes = processing_bytes
self.processed_bytes = 0
print(f"Processing: {processing_file_path}")
def report_update(self, decompressed_bytes):
self.processed_bytes += decompressed_bytes
if self.total_bytes > 0:
percent = (self.processed_bytes / self.total_bytes) * 100
print(f"Progress: {percent:.1f}% ({self.processed_bytes}/{self.total_bytes} bytes)")
def report_end(self, processing_file_path, wrote_bytes):
print(f"Completed: {processing_file_path} ({wrote_bytes} bytes)")
def report_warning(self, message):
print(f"Warning: {message}")
# Use custom callback
callback = ProgressCallback()
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
archive.extractall(callback=callback)class LoggingCallback(Callback):
"""Callback that logs to file."""
def __init__(self, log_file):
self.log_file = log_file
def report_start(self, processing_file_path, processing_bytes):
with open(self.log_file, 'a') as f:
f.write(f"START: {processing_file_path} ({processing_bytes} bytes)\\n")
def report_end(self, processing_file_path, wrote_bytes):
with open(self.log_file, 'a') as f:
f.write(f"END: {processing_file_path} ({wrote_bytes} bytes)\\n")
def report_warning(self, message):
with open(self.log_file, 'a') as f:
f.write(f"WARNING: {message}\\n")
# Use logging callback
callback = LoggingCallback('extraction.log')
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
archive.extractall(callback=callback)Complex usage patterns combining I/O and callbacks.
import py7zr
import socket
from py7zr import WriterFactory, Py7zIO
class NetworkIO(Py7zIO):
"""Stream extracted files over network."""
def __init__(self, filename, socket_conn):
self.filename = filename
self.socket = socket_conn
self.bytes_sent = 0
def write(self, data):
# Send filename header first time
if self.bytes_sent == 0:
header = f"FILE:{self.filename}\\n".encode()
self.socket.send(header)
self.socket.send(data)
self.bytes_sent += len(data)
return len(data)
class NetworkFactory(WriterFactory):
def __init__(self, socket_conn):
self.socket = socket_conn
def create(self, filename):
return NetworkIO(filename, self.socket)
# Extract over network
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.connect(('remote_host', 8080))
factory = NetworkFactory(s)
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
archive.extractall(factory=factory)class SelectiveCallback(Callback):
"""Callback that can skip files based on criteria."""
def __init__(self, max_file_size=1024*1024):
self.max_file_size = max_file_size
self.skip_current = False
def report_start(self, processing_file_path, processing_bytes):
if processing_bytes > self.max_file_size:
print(f"Skipping large file: {processing_file_path} ({processing_bytes} bytes)")
self.skip_current = True
return False # Skip this file
else:
self.skip_current = False
return True # Process this file
# Note: Actual file skipping requires integration with extraction logicimport py7zr
from py7zr import Callback
from tqdm import tqdm
class TqdmCallback(Callback):
def __init__(self):
self.pbar = None
def report_start(self, processing_file_path, processing_bytes):
self.pbar = tqdm(total=processing_bytes,
desc=f"Extracting {processing_file_path}",
unit='B', unit_scale=True)
def report_update(self, decompressed_bytes):
if self.pbar:
self.pbar.update(decompressed_bytes)
def report_end(self, processing_file_path, wrote_bytes):
if self.pbar:
self.pbar.close()
# Extract with progress bar
callback = TqdmCallback()
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
archive.extractall(callback=callback)import py7zr
from py7zr import WriterFactory, Py7zIO
import boto3
class S3IO(Py7zIO):
"""Upload extracted files directly to S3."""
def __init__(self, filename, s3_client, bucket, prefix=""):
self.filename = filename
self.s3_client = s3_client
self.bucket = bucket
self.key = f"{prefix}/{filename}" if prefix else filename
self.buffer = BytesIO()
def write(self, data):
return self.buffer.write(data)
def close(self):
self.buffer.seek(0)
self.s3_client.upload_fileobj(self.buffer, self.bucket, self.key)
class S3Factory(WriterFactory):
def __init__(self, bucket, prefix=""):
self.s3_client = boto3.client('s3')
self.bucket = bucket
self.prefix = prefix
def create(self, filename):
return S3IO(filename, self.s3_client, self.bucket, self.prefix)
# Extract directly to S3
factory = S3Factory('my-bucket', 'extracted-files')
with py7zr.SevenZipFile('archive.7z', 'r') as archive:
archive.extractall(factory=factory)Install with Tessl CLI
npx tessl i tessl/pypi-py7zr