CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-qdrant-client

Client library for the Qdrant vector search engine

Pending
Overview
Eval results
Files

snapshots-backup.mddocs/

Snapshots & Backup

Collection and full database snapshot creation, management, and restoration capabilities for backup and disaster recovery.

Capabilities

Collection Snapshots

Create and manage snapshots of individual collections.

def create_snapshot(
    self,
    collection_name: str,
    wait: bool = True,
    **kwargs
) -> SnapshotDescription:
    """
    Create snapshot of collection.

    Parameters:
    - collection_name: Name of the collection
    - wait: Wait for snapshot creation to complete

    Returns:
        SnapshotDescription: Information about created snapshot
    """

def list_snapshots(
    self,
    collection_name: str,
    **kwargs
) -> List[SnapshotDescription]:
    """
    List all snapshots for collection.

    Parameters:
    - collection_name: Name of the collection

    Returns:
        List[SnapshotDescription]: List of available snapshots
    """

def delete_snapshot(
    self,
    collection_name: str,
    snapshot_name: str,
    wait: bool = True,
    **kwargs
) -> bool:
    """
    Delete collection snapshot.

    Parameters:
    - collection_name: Name of the collection
    - snapshot_name: Name of snapshot to delete
    - wait: Wait for deletion to complete

    Returns:
        bool: True if snapshot deleted successfully
    """

def recover_snapshot(
    self,
    collection_name: str,
    location: str,
    priority: Optional[SnapshotPriority] = None,
    checksum: Optional[str] = None,
    wait: bool = True,
    **kwargs
) -> bool:
    """
    Recover collection from snapshot.

    Parameters:
    - collection_name: Name of the collection to recover
    - location: Snapshot file location (URL or path)
    - priority: Recovery priority (replica or no_sync)
    - checksum: Expected snapshot checksum for verification
    - wait: Wait for recovery to complete

    Returns:
        bool: True if recovery initiated successfully
    """

Usage examples:

# Create collection snapshot
snapshot = client.create_snapshot(
    collection_name="important_collection",
    wait=True
)
print(f"Created snapshot: {snapshot.name}")

# List all snapshots
snapshots = client.list_snapshots("important_collection")
for snapshot in snapshots:
    print(f"Snapshot: {snapshot.name}, Size: {snapshot.size}, Created: {snapshot.creation_time}")

# Recover from snapshot
client.recover_snapshot(
    collection_name="restored_collection",
    location="file:///backups/important_collection_snapshot.tar",
    wait=True
)

Full Database Snapshots

Create snapshots of the entire Qdrant database.

def create_full_snapshot(
    self,
    wait: bool = True,
    **kwargs
) -> SnapshotDescription:
    """
    Create full database snapshot.

    Parameters:
    - wait: Wait for snapshot creation to complete

    Returns:
        SnapshotDescription: Information about created snapshot
    """

def list_full_snapshots(
    self,
    **kwargs
) -> List[SnapshotDescription]:
    """
    List all full database snapshots.

    Returns:
        List[SnapshotDescription]: List of available full snapshots
    """

def delete_full_snapshot(
    self,
    snapshot_name: str,
    wait: bool = True,
    **kwargs
) -> bool:
    """
    Delete full database snapshot.

    Parameters:
    - snapshot_name: Name of snapshot to delete
    - wait: Wait for deletion to complete

    Returns:
        bool: True if snapshot deleted successfully
    """

Usage examples:

# Create full database snapshot
full_snapshot = client.create_full_snapshot(wait=True)
print(f"Created full snapshot: {full_snapshot.name}")

# List all full snapshots
full_snapshots = client.list_full_snapshots()
for snapshot in full_snapshots:
    print(f"Full snapshot: {snapshot.name}, Size: {snapshot.size}")

# Delete old snapshot
client.delete_full_snapshot("old_snapshot_2023_01_01.tar")

Snapshot Information

Snapshot Description

class SnapshotDescription(BaseModel):
    name: str  # Snapshot filename
    creation_time: Optional[datetime] = None  # When snapshot was created
    size: int  # Snapshot file size in bytes
    checksum: Optional[str] = None  # SHA256 checksum for verification

Recovery Options

class SnapshotPriority(str, Enum):
    REPLICA = "replica"  # Recover as replica (read-only)
    NO_SYNC = "no_sync"  # Skip synchronization during recovery

Backup Strategies

Regular Collection Backups

Implement automated collection backup:

import schedule
import time
from datetime import datetime

def backup_collection(client: QdrantClient, collection_name: str):
    """Create timestamped collection backup."""
    try:
        snapshot = client.create_snapshot(collection_name, wait=True)
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        print(f"[{timestamp}] Backup created for {collection_name}: {snapshot.name}")
        
        # Clean up old snapshots (keep last 7)
        snapshots = client.list_snapshots(collection_name)
        if len(snapshots) > 7:
            old_snapshots = sorted(snapshots, key=lambda s: s.creation_time)[:-7]
            for old_snapshot in old_snapshots:
                client.delete_snapshot(collection_name, old_snapshot.name)
                print(f"Deleted old snapshot: {old_snapshot.name}")
                
    except Exception as e:
        print(f"Backup failed for {collection_name}: {e}")

# Schedule daily backups
schedule.every().day.at("02:00").do(backup_collection, client, "critical_collection")

# Keep backups running
while True:
    schedule.run_pending()
    time.sleep(60)

Full Database Backups

Weekly full database backup:

def backup_full_database(client: QdrantClient):
    """Create full database backup with retention."""
    try:
        snapshot = client.create_full_snapshot(wait=True)
        print(f"Full database backup created: {snapshot.name}")
        
        # Keep only last 4 weekly backups
        snapshots = client.list_full_snapshots()
        if len(snapshots) > 4:
            old_snapshots = sorted(snapshots, key=lambda s: s.creation_time)[:-4]
            for old_snapshot in old_snapshots:
                client.delete_full_snapshot(old_snapshot.name)
                print(f"Deleted old full snapshot: {old_snapshot.name}")
                
    except Exception as e:
        print(f"Full backup failed: {e}")

# Schedule weekly full backups
schedule.every().sunday.at("01:00").do(backup_full_database, client)

Cross-Region Backup

Backup to remote storage:

import shutil
import os

def backup_with_remote_storage(client: QdrantClient, collection_name: str, remote_path: str):
    """Create backup and copy to remote storage."""
    # Create local snapshot
    snapshot = client.create_snapshot(collection_name, wait=True)
    
    # Determine local snapshot path (depends on Qdrant configuration)
    local_snapshot_path = f"/qdrant/snapshots/{collection_name}/{snapshot.name}"
    remote_snapshot_path = f"{remote_path}/{collection_name}_{snapshot.name}"
    
    try:
        # Copy to remote storage (could be S3, NFS, etc.)
        shutil.copy2(local_snapshot_path, remote_snapshot_path)
        print(f"Backup copied to remote: {remote_snapshot_path}")
        
        # Verify checksum if available
        if snapshot.checksum:
            # Implement checksum verification
            print(f"Checksum: {snapshot.checksum}")
            
    except Exception as e:
        print(f"Remote backup failed: {e}")

Disaster Recovery

Collection Recovery Procedure

def restore_collection_from_backup(
    client: QdrantClient,
    collection_name: str,
    backup_location: str,
    checksum: Optional[str] = None
):
    """Restore collection from backup with verification."""
    try:
        # Check if collection exists
        if client.collection_exists(collection_name):
            print(f"Warning: Collection {collection_name} already exists")
            response = input("Delete existing collection? (y/N): ")
            if response.lower() == 'y':
                client.delete_collection(collection_name)
            else:
                return False
        
        # Recover from snapshot
        success = client.recover_snapshot(
            collection_name=collection_name,
            location=backup_location,
            checksum=checksum,
            wait=True
        )
        
        if success:
            # Verify restoration
            collection_info = client.get_collection(collection_name)
            print(f"Collection restored: {collection_info.points_count} points")
            return True
        else:
            print("Collection restoration failed")
            return False
            
    except Exception as e:
        print(f"Restoration error: {e}")
        return False

# Usage
success = restore_collection_from_backup(
    client,
    "critical_collection",
    "file:///backups/critical_collection_20240101_020000.tar",
    checksum="sha256:abc123..."
)

Point-in-Time Recovery

def point_in_time_recovery(
    client: QdrantClient,
    collection_name: str,
    target_datetime: datetime
):
    """Restore collection to specific point in time."""
    snapshots = client.list_snapshots(collection_name)
    
    # Find snapshot closest to target time (but not after)
    valid_snapshots = [
        s for s in snapshots 
        if s.creation_time and s.creation_time <= target_datetime
    ]
    
    if not valid_snapshots:
        print("No snapshots available for target time")
        return False
    
    # Select most recent valid snapshot
    target_snapshot = max(valid_snapshots, key=lambda s: s.creation_time)
    
    print(f"Restoring from snapshot {target_snapshot.name} created at {target_snapshot.creation_time}")
    
    # Restore collection
    backup_location = f"file:///qdrant/snapshots/{collection_name}/{target_snapshot.name}"
    return restore_collection_from_backup(
        client, 
        f"{collection_name}_restored",
        backup_location,
        target_snapshot.checksum
    )

Best Practices

Backup Scheduling

  1. Collection snapshots: Daily for critical collections
  2. Full snapshots: Weekly for complete database backup
  3. Retention policy: Keep 7 daily + 4 weekly + 12 monthly backups
  4. Off-peak scheduling: Run during low-traffic periods

Verification and Testing

def verify_backup_integrity(client: QdrantClient, collection_name: str):
    """Verify backup can be restored successfully."""
    snapshots = client.list_snapshots(collection_name)
    if not snapshots:
        return False
    
    latest_snapshot = max(snapshots, key=lambda s: s.creation_time)
    test_collection = f"{collection_name}_test_restore"
    
    try:
        # Test restoration
        backup_location = f"file:///qdrant/snapshots/{collection_name}/{latest_snapshot.name}"
        success = client.recover_snapshot(
            collection_name=test_collection,
            location=backup_location,
            checksum=latest_snapshot.checksum,
            wait=True
        )
        
        if success:
            # Verify data integrity
            original_info = client.get_collection(collection_name)
            restored_info = client.get_collection(test_collection)
            
            data_matches = (
                original_info.points_count == restored_info.points_count and
                original_info.vectors_count == restored_info.vectors_count
            )
            
            # Clean up test collection
            client.delete_collection(test_collection)
            
            return data_matches
        else:
            return False
            
    except Exception as e:
        print(f"Backup verification failed: {e}")
        # Clean up on failure
        if client.collection_exists(test_collection):
            client.delete_collection(test_collection)
        return False

# Schedule monthly backup verification
schedule.every().month.do(verify_backup_integrity, client, "critical_collection")

Storage Considerations

  • Local storage: Fast access, limited by disk space
  • Network storage: Slower but more reliable and scalable
  • Cloud storage: Most reliable, integrate with S3/GCS/Azure
  • Compression: Snapshots are typically compressed tar files
  • Encryption: Encrypt sensitive snapshots before remote storage

Monitoring and Alerting

def monitor_backup_health(client: QdrantClient, collections: List[str]):
    """Monitor backup freshness and alert on issues."""
    for collection_name in collections:
        try:
            snapshots = client.list_snapshots(collection_name)
            if not snapshots:
                print(f"ALERT: No snapshots found for {collection_name}")
                continue
            
            latest_snapshot = max(snapshots, key=lambda s: s.creation_time)
            age = datetime.now() - latest_snapshot.creation_time
            
            if age.days > 1:  # Alert if backup older than 1 day
                print(f"ALERT: Latest backup for {collection_name} is {age.days} days old")
            else:
                print(f"OK: {collection_name} backup is fresh ({age.hours} hours old)")
                
        except Exception as e:
            print(f"ERROR: Failed to check backups for {collection_name}: {e}")

Install with Tessl CLI

npx tessl i tessl/pypi-qdrant-client

docs

client-setup.md

clustering-sharding.md

collection-management.md

fastembed-integration.md

index.md

indexing-optimization.md

search-query.md

snapshots-backup.md

vector-operations.md

tile.json