tessl/pypi-borgbackup

Deduplicated, encrypted, authenticated and compressed backups

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Archive Operations

Name: tessl/pypi-borgbackup
Author: tessl

Archive creation, extraction, listing, and deletion operations for managing backup snapshots in BorgBackup repositories.

import subprocess
import json

Capabilities

Archive Creation

Create new archives with files and directories, supporting various options for compression, exclusion patterns, and metadata collection.

def create_archive(repo_path: str, archive_name: str, source_paths: list, 
                  compression: str = None, exclude_patterns: list = None,
                  stats: bool = False, json_output: bool = False, **options) -> dict:
    """
    Create a new archive.
    
    Args:
        repo_path: Path to repository
        archive_name: Name for the new archive
        source_paths: List of paths to backup
        compression: Compression algorithm ('none', 'lz4', 'zstd', 'zlib', 'lzma')
        exclude_patterns: List of exclusion patterns
        stats: Include statistics in output
        json_output: Return JSON formatted output
        **options: Additional options like dry_run, progress, checkpoint_interval
        
    Returns:
        Dictionary with creation statistics if json_output=True
    """
    cmd = ['borg', 'create']
    if compression:
        cmd.extend(['--compression', compression])
    if exclude_patterns:
        for pattern in exclude_patterns:
            cmd.extend(['--exclude', pattern])
    if stats:
        cmd.append('--stats')
    if json_output:
        cmd.append('--json')
    if options.get('dry_run'):
        cmd.append('--dry-run')
    if options.get('progress'):
        cmd.append('--progress')
    if options.get('checkpoint_interval'):
        cmd.extend(['--checkpoint-interval', str(options['checkpoint_interval'])])
    
    cmd.append(f'{repo_path}::{archive_name}')
    cmd.extend(source_paths)
    
    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
    return json.loads(result.stdout) if json_output else None

Usage example:

import subprocess
import json

# Create archive with compression and JSON statistics (--json implies --stats)
result = subprocess.run([
    'borg', 'create', '--json', '--compression=zstd,6',
    '/backup/repo::documents-{now}', '/home/user/documents'
], capture_output=True, text=True, check=True)

stats = json.loads(result.stdout)
print(f"Original size: {stats['archive']['stats']['original_size']}")
print(f"Compressed size: {stats['archive']['stats']['compressed_size']}")

# Create with exclusion patterns
subprocess.run([
    'borg', 'create', '--progress',
    '--exclude=*.tmp', '--exclude=__pycache__',
    '/backup/repo::backup-{now}', '/home/user'
], check=True)

Archive Listing

List archives in a repository or list contents of specific archives.

def list_archives(repo_path: str, json_output: bool = True, 
                 short: bool = False, format_str: str = None) -> list:
    """
    List archives in repository.
    
    Args:
        repo_path: Path to repository
        json_output: Return JSON formatted output
        short: Show only archive names
        format_str: Custom format string for output
        
    Returns:
        List of archive information dictionaries if json_output=True
    """
    cmd = ['borg', 'list']
    if json_output:
        cmd.append('--json')
    if short:
        cmd.append('--short')
    if format_str:
        cmd.extend(['--format', format_str])
    cmd.append(repo_path)
    
    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
    return json.loads(result.stdout)['archives'] if json_output else result.stdout

def list_archive_contents(repo_path: str, archive_name: str, 
                         json_output: bool = True, pattern: str = None) -> list:
    """
    List contents of specific archive.
    
    Args:
        repo_path: Path to repository
        archive_name: Name of archive to list
        json_output: Return JSON Lines formatted output
        pattern: Pattern to filter files
        
    Returns:
        List of file information dictionaries if json_output=True
    """
    cmd = ['borg', 'list']
    if json_output:
        cmd.append('--json-lines')
    cmd.append(f'{repo_path}::{archive_name}')
    if pattern:
        cmd.append(pattern)
    
    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
    if json_output:
        return [json.loads(line) for line in result.stdout.strip().split('\n') if line]
    return result.stdout

Usage example:

import subprocess
import json

# List all archives
result = subprocess.run(['borg', 'list', '--json', '/backup/repo'], 
                       capture_output=True, text=True, check=True)
archives = json.loads(result.stdout)['archives']

for archive in archives:
    print(f"Archive: {archive['name']}, Date: {archive['start']}")

# List contents of specific archive with JSON Lines
result = subprocess.run(['borg', 'list', '--json-lines', '/backup/repo::documents-2023-12-01'], 
                       capture_output=True, text=True, check=True)
files = [json.loads(line) for line in result.stdout.strip().split('\n') if line]

for file_info in files:
    print(f"Path: {file_info['path']}, Size: {file_info['size']}")

Archive Information

Get detailed information about specific archives including statistics and metadata.

def get_archive_info(repo_path: str, archive_name: str = None, 
                    json_output: bool = True) -> dict:
    """
    Get detailed archive information.
    
    Args:
        repo_path: Path to repository
        archive_name: Specific archive name (optional, shows all if not specified)
        json_output: Return JSON formatted output
        
    Returns:
        Dictionary containing archive metadata and statistics
    """
    cmd = ['borg', 'info']
    if json_output:
        cmd.append('--json')
    
    if archive_name:
        cmd.append(f'{repo_path}::{archive_name}')
    else:
        cmd.append(repo_path)
    
    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
    return json.loads(result.stdout) if json_output else result.stdout

Usage example:

import subprocess
import json

# Get info for specific archive
result = subprocess.run(['borg', 'info', '--json', '/backup/repo::documents-2023-12-01'], 
                       capture_output=True, text=True, check=True)
info = json.loads(result.stdout)

archive = info['archives'][0]
print(f"Archive: {archive['name']}")
print(f"Duration: {archive['duration']} seconds")
print(f"Files: {archive['stats']['nfiles']}")
print(f"Original size: {archive['stats']['original_size']} bytes")

Archive Extraction

Extract files from archives with support for selective extraction and path manipulation.

def extract_archive(repo_path: str, archive_name: str, patterns: list = None,
                   destination: str = None, dry_run: bool = False,
                   strip_components: int = None, **options) -> None:
    """
    Extract archive contents.
    
    Args:
        repo_path: Path to repository
        archive_name: Name of archive to extract
        patterns: List of path patterns to extract (optional)
        destination: Extraction destination directory
        dry_run: Show what would be extracted without extracting
        strip_components: Strip N leading path components
        **options: Additional options like progress, sparse, numeric_owner
    """
    cmd = ['borg', 'extract']
    if dry_run:
        cmd.append('--dry-run')
    if destination:
        cmd.extend(['--destination', destination])
    if strip_components:
        cmd.extend(['--strip-components', str(strip_components)])
    if options.get('progress'):
        cmd.append('--progress')
    if options.get('sparse'):
        cmd.append('--sparse')
    if options.get('numeric_owner'):
        cmd.append('--numeric-owner')
    
    cmd.append(f'{repo_path}::{archive_name}')
    if patterns:
        cmd.extend(patterns)
    
    subprocess.run(cmd, check=True)

Usage example:

import subprocess

# Extract entire archive
subprocess.run(['borg', 'extract', '--progress', '/backup/repo::documents-2023-12-01'], check=True)

# Extract specific files/directories
subprocess.run(['borg', 'extract', '/backup/repo::documents-2023-12-01', 
               'home/user/documents/important.txt', 'home/user/documents/projects/'], check=True)

# Extract to specific destination
subprocess.run(['borg', 'extract', '--destination=/restore', 
               '/backup/repo::documents-2023-12-01'], check=True)

Archive Deletion

Delete archives from repository with safety options.

def delete_archive(repo_path: str, archive_name: str = None, 
                  dry_run: bool = False, stats: bool = False,
                  cache_only: bool = False) -> None:
    """
    Delete archive from repository.
    
    Args:
        repo_path: Path to repository
        archive_name: Name of archive to delete (if None, deletes entire repository)
        dry_run: Show what would be deleted without deleting
        stats: Show deletion statistics
        cache_only: Delete only from cache, not repository
    """
    cmd = ['borg', 'delete']
    if dry_run:
        cmd.append('--dry-run')
    if stats:
        cmd.append('--stats')
    if cache_only:
        cmd.append('--cache-only')
    
    if archive_name:
        cmd.append(f'{repo_path}::{archive_name}')
    else:
        cmd.append(repo_path)
    
    subprocess.run(cmd, check=True)

Usage example:

import subprocess

# Delete specific archive with stats
subprocess.run(['borg', 'delete', '--stats', '/backup/repo::old-backup-2023-01-01'], check=True)

# Dry run to see what would be deleted
subprocess.run(['borg', 'delete', '--dry-run', '/backup/repo::test-backup'], check=True)

# Delete entire repository (dangerous!)
# subprocess.run(['borg', 'delete', '/backup/repo'], check=True)

Archive Comparison

Compare archives or archive contents to identify differences.

def diff_archives(repo_path: str, archive1: str, archive2: str,
                 json_output: bool = False, sort: bool = False) -> list:
    """
    Compare two archives and show differences.
    
    Args:
        repo_path: Path to repository
        archive1: First archive name
        archive2: Second archive name
        json_output: Return JSON Lines formatted output
        sort: Sort output by file path
        
    Returns:
        List of difference dictionaries if json_output=True, otherwise string
    """
    cmd = ['borg', 'diff']
    if json_output:
        cmd.append('--json-lines')
    if sort:
        cmd.append('--sort')
    
    cmd.extend([f'{repo_path}::{archive1}', f'{repo_path}::{archive2}'])
    
    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
    if json_output:
        return [json.loads(line) for line in result.stdout.strip().split('\n') if line]
    return result.stdout

Usage example:

import subprocess
import json

# Compare two archives
result = subprocess.run(['borg', 'diff', 
                        '/backup/repo::backup-2023-12-01', 
                        '/backup/repo::backup-2023-12-02'], 
                       capture_output=True, text=True, check=True)
print(result.stdout)

# Compare with JSON Lines output
result = subprocess.run(['borg', 'diff', '--json-lines',
                        '/backup/repo::backup-2023-12-01', 
                        '/backup/repo::backup-2023-12-02'], 
                       capture_output=True, text=True, check=True)
differences = [json.loads(line) for line in result.stdout.strip().split('\n') if line]

Archive Export

Export archives as tar files for compatibility with standard tools.

def export_archive_as_tar(repo_path: str, archive_name: str, tar_file: str = None,
                         tar_filter: str = None, exclude_patterns: list = None) -> None:
    """
    Export archive as tar file.
    
    Args:
        repo_path: Path to repository
        archive_name: Name of archive to export
        tar_file: Output tar file path (use '-' for stdout)
        tar_filter: Tar filter to use ('auto', 'gzip', 'bzip2', 'xz', 'lzma')
        exclude_patterns: List of exclusion patterns
    """
    cmd = ['borg', 'export-tar']
    if tar_filter:
        cmd.extend(['--tar-filter', tar_filter])
    if exclude_patterns:
        for pattern in exclude_patterns:
            cmd.extend(['--exclude', pattern])
    
    cmd.append(f'{repo_path}::{archive_name}')
    if tar_file:
        cmd.append(tar_file)
    
    subprocess.run(cmd, check=True)

Usage example:

import subprocess

# Export archive as compressed tar file
subprocess.run([
    'borg', 'export-tar', '--tar-filter=gzip',
    '/backup/repo::documents-2023-12-01', 'documents.tar.gz'
], check=True)

# Export to stdout and pipe to another command
# subprocess.run(['borg', 'export-tar', '/backup/repo::backup', '-'], 
#                stdout=some_process.stdin, check=True)

Archive Import

Import archives from tar files to create BorgBackup archives.

def import_archive_from_tar(repo_path: str, archive_name: str, tar_file: str = None,
                           tar_filter: str = None, strip_components: int = None,
                           stats: bool = False, json_output: bool = False) -> dict:
    """
    Import archive from tar file.
    
    Args:
        repo_path: Path to repository
        archive_name: Name for the new archive
        tar_file: Input tar file path (use '-' for stdin)
        tar_filter: Tar filter to use ('auto', 'gzip', 'bzip2', 'xz', 'lzma')
        strip_components: Strip N leading path components
        stats: Include statistics in output
        json_output: Return JSON formatted output
        
    Returns:
        Dictionary with import statistics if json_output=True
    """
    cmd = ['borg', 'import-tar']
    if tar_filter:
        cmd.extend(['--tar-filter', tar_filter])
    if strip_components:
        cmd.extend(['--strip-components', str(strip_components)])
    if stats:
        cmd.append('--stats')
    if json_output:
        cmd.append('--json')
    
    cmd.append(f'{repo_path}::{archive_name}')
    if tar_file:
        cmd.append(tar_file)
    
    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
    return json.loads(result.stdout) if json_output else None

Usage example:

import subprocess
import json

# Import from tar file
subprocess.run([
    'borg', 'import-tar', '--tar-filter=gzip',
    '/backup/repo::imported-archive', 'backup.tar.gz'
], check=True)

# Import from stdin with statistics
result = subprocess.run([
    'borg', 'import-tar', '--json', '--stats',
    '/backup/repo::imported-data', '-'
], input=tar_data, capture_output=True, text=True, check=True)
import_stats = json.loads(result.stdout)

Archive Renaming

Rename archives within a repository.

def rename_archive(repo_path: str, old_name: str, new_name: str,
                  dry_run: bool = False) -> None:
    """
    Rename archive.
    
    Args:
        repo_path: Path to repository
        old_name: Current archive name
        new_name: New archive name
        dry_run: Show what would be renamed without renaming
    """
    cmd = ['borg', 'rename']
    if dry_run:
        cmd.append('--dry-run')
    cmd.extend([f'{repo_path}::{old_name}', new_name])
    subprocess.run(cmd, check=True)

Usage example:

import subprocess

# Rename archive
subprocess.run([
    'borg', 'rename', 
    '/backup/repo::old-backup-name', 'new-backup-name'
], check=True)

# Dry run to see what would be renamed
subprocess.run([
    'borg', 'rename', '--dry-run',
    '/backup/repo::test-archive', 'production-archive'
], check=True)

Types

class ArchiveStats:
    """Archive statistics structure"""
    def __init__(self):
        self.original_size: int      # Original size in bytes
        self.compressed_size: int    # Compressed size in bytes
        self.deduplicated_size: int  # Deduplicated size in bytes
        self.nfiles: int             # Number of files
        
class FileInfo:
    """File information in archive"""
    def __init__(self):
        self.path: str              # File path
        self.type: str              # File type ('d', 'f', 'l', etc.)
        self.mode: str              # File permissions
        self.user: str              # Owner username
        self.group: str             # Owner group
        self.uid: int               # User ID
        self.gid: int               # Group ID
        self.size: int              # File size
        self.mtime: str             # Modification time (ISO format)
        self.healthy: bool          # File health status
        
class ArchiveInfo:
    """Complete archive information"""
    def __init__(self):
        self.id: str                # Archive ID
        self.name: str              # Archive name
        self.start: str             # Start time (ISO format)  
        self.end: str               # End time (ISO format)
        self.duration: float        # Duration in seconds
        self.stats: ArchiveStats    # Archive statistics
        self.limits: dict           # Archive limits
        self.command_line: list     # Command used to create archive

Install with Tessl CLI