0
# Snapshots & Backup
1
2
Collection and full database snapshot creation, management, and restoration capabilities for backup and disaster recovery.
3
4
## Capabilities
5
6
### Collection Snapshots
7
8
Create and manage snapshots of individual collections.
9
10
```python { .api }
11
def create_snapshot(
12
self,
13
collection_name: str,
14
wait: bool = True,
15
**kwargs
16
) -> SnapshotDescription:
17
"""
18
Create snapshot of collection.
19
20
Parameters:
21
- collection_name: Name of the collection
22
- wait: Wait for snapshot creation to complete
23
24
Returns:
25
SnapshotDescription: Information about created snapshot
26
"""
27
28
def list_snapshots(
29
self,
30
collection_name: str,
31
**kwargs
32
) -> List[SnapshotDescription]:
33
"""
34
List all snapshots for collection.
35
36
Parameters:
37
- collection_name: Name of the collection
38
39
Returns:
40
List[SnapshotDescription]: List of available snapshots
41
"""
42
43
def delete_snapshot(
44
self,
45
collection_name: str,
46
snapshot_name: str,
47
wait: bool = True,
48
**kwargs
49
) -> bool:
50
"""
51
Delete collection snapshot.
52
53
Parameters:
54
- collection_name: Name of the collection
55
- snapshot_name: Name of snapshot to delete
56
- wait: Wait for deletion to complete
57
58
Returns:
59
bool: True if snapshot deleted successfully
60
"""
61
62
def recover_snapshot(
63
self,
64
collection_name: str,
65
location: str,
66
priority: Optional[SnapshotPriority] = None,
67
checksum: Optional[str] = None,
68
wait: bool = True,
69
**kwargs
70
) -> bool:
71
"""
72
Recover collection from snapshot.
73
74
Parameters:
75
- collection_name: Name of the collection to recover
76
- location: Snapshot file location (URL or path)
77
- priority: Recovery priority (replica or no_sync)
78
- checksum: Expected snapshot checksum for verification
79
- wait: Wait for recovery to complete
80
81
Returns:
82
bool: True if recovery initiated successfully
83
"""
84
```
85
86
Usage examples:
87
88
```python
89
# Create collection snapshot
90
snapshot = client.create_snapshot(
91
collection_name="important_collection",
92
wait=True
93
)
94
print(f"Created snapshot: {snapshot.name}")
95
96
# List all snapshots
97
snapshots = client.list_snapshots("important_collection")
98
for snapshot in snapshots:
99
print(f"Snapshot: {snapshot.name}, Size: {snapshot.size}, Created: {snapshot.creation_time}")
100
101
# Recover from snapshot
102
client.recover_snapshot(
103
collection_name="restored_collection",
104
location="file:///backups/important_collection_snapshot.tar",
105
wait=True
106
)
107
```
108
109
### Full Database Snapshots
110
111
Create snapshots of the entire Qdrant database.
112
113
```python { .api }
114
def create_full_snapshot(
115
self,
116
wait: bool = True,
117
**kwargs
118
) -> SnapshotDescription:
119
"""
120
Create full database snapshot.
121
122
Parameters:
123
- wait: Wait for snapshot creation to complete
124
125
Returns:
126
SnapshotDescription: Information about created snapshot
127
"""
128
129
def list_full_snapshots(
130
self,
131
**kwargs
132
) -> List[SnapshotDescription]:
133
"""
134
List all full database snapshots.
135
136
Returns:
137
List[SnapshotDescription]: List of available full snapshots
138
"""
139
140
def delete_full_snapshot(
141
self,
142
snapshot_name: str,
143
wait: bool = True,
144
**kwargs
145
) -> bool:
146
"""
147
Delete full database snapshot.
148
149
Parameters:
150
- snapshot_name: Name of snapshot to delete
151
- wait: Wait for deletion to complete
152
153
Returns:
154
bool: True if snapshot deleted successfully
155
"""
156
```
157
158
Usage examples:
159
160
```python
161
# Create full database snapshot
162
full_snapshot = client.create_full_snapshot(wait=True)
163
print(f"Created full snapshot: {full_snapshot.name}")
164
165
# List all full snapshots
166
full_snapshots = client.list_full_snapshots()
167
for snapshot in full_snapshots:
168
print(f"Full snapshot: {snapshot.name}, Size: {snapshot.size}")
169
170
# Delete old snapshot
171
client.delete_full_snapshot("old_snapshot_2023_01_01.tar")
172
```
173
174
## Snapshot Information
175
176
### Snapshot Description
177
178
```python { .api }
179
class SnapshotDescription(BaseModel):
180
name: str # Snapshot filename
181
creation_time: Optional[datetime] = None # When snapshot was created
182
size: int # Snapshot file size in bytes
183
checksum: Optional[str] = None # SHA256 checksum for verification
184
```
185
186
### Recovery Options
187
188
```python { .api }
189
class SnapshotPriority(str, Enum):
190
REPLICA = "replica" # Recover as replica (read-only)
191
NO_SYNC = "no_sync" # Skip synchronization during recovery
192
```
193
194
## Backup Strategies
195
196
### Regular Collection Backups
197
198
Implement automated collection backup:
199
200
```python
201
import schedule
202
import time
203
from datetime import datetime
204
205
def backup_collection(client: QdrantClient, collection_name: str):
206
"""Create timestamped collection backup."""
207
try:
208
snapshot = client.create_snapshot(collection_name, wait=True)
209
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
210
print(f"[{timestamp}] Backup created for {collection_name}: {snapshot.name}")
211
212
# Clean up old snapshots (keep last 7)
213
snapshots = client.list_snapshots(collection_name)
214
if len(snapshots) > 7:
215
old_snapshots = sorted(snapshots, key=lambda s: s.creation_time)[:-7]
216
for old_snapshot in old_snapshots:
217
client.delete_snapshot(collection_name, old_snapshot.name)
218
print(f"Deleted old snapshot: {old_snapshot.name}")
219
220
except Exception as e:
221
print(f"Backup failed for {collection_name}: {e}")
222
223
# Schedule daily backups
224
schedule.every().day.at("02:00").do(backup_collection, client, "critical_collection")
225
226
# Keep backups running
227
while True:
228
schedule.run_pending()
229
time.sleep(60)
230
```
231
232
### Full Database Backups
233
234
Weekly full database backup:
235
236
```python
237
def backup_full_database(client: QdrantClient):
238
"""Create full database backup with retention."""
239
try:
240
snapshot = client.create_full_snapshot(wait=True)
241
print(f"Full database backup created: {snapshot.name}")
242
243
# Keep only last 4 weekly backups
244
snapshots = client.list_full_snapshots()
245
if len(snapshots) > 4:
246
old_snapshots = sorted(snapshots, key=lambda s: s.creation_time)[:-4]
247
for old_snapshot in old_snapshots:
248
client.delete_full_snapshot(old_snapshot.name)
249
print(f"Deleted old full snapshot: {old_snapshot.name}")
250
251
except Exception as e:
252
print(f"Full backup failed: {e}")
253
254
# Schedule weekly full backups
255
schedule.every().sunday.at("01:00").do(backup_full_database, client)
256
```
257
258
### Cross-Region Backup
259
260
Backup to remote storage:
261
262
```python
263
import shutil
264
import os
265
266
def backup_with_remote_storage(client: QdrantClient, collection_name: str, remote_path: str):
267
"""Create backup and copy to remote storage."""
268
# Create local snapshot
269
snapshot = client.create_snapshot(collection_name, wait=True)
270
271
# Determine local snapshot path (depends on Qdrant configuration)
272
local_snapshot_path = f"/qdrant/snapshots/{collection_name}/{snapshot.name}"
273
remote_snapshot_path = f"{remote_path}/{collection_name}_{snapshot.name}"
274
275
try:
276
# Copy to remote storage (could be S3, NFS, etc.)
277
shutil.copy2(local_snapshot_path, remote_snapshot_path)
278
print(f"Backup copied to remote: {remote_snapshot_path}")
279
280
# Verify checksum if available
281
if snapshot.checksum:
282
# Implement checksum verification
283
print(f"Checksum: {snapshot.checksum}")
284
285
except Exception as e:
286
print(f"Remote backup failed: {e}")
287
```
288
289
## Disaster Recovery
290
291
### Collection Recovery Procedure
292
293
```python
294
def restore_collection_from_backup(
295
client: QdrantClient,
296
collection_name: str,
297
backup_location: str,
298
checksum: Optional[str] = None
299
):
300
"""Restore collection from backup with verification."""
301
try:
302
# Check if collection exists
303
if client.collection_exists(collection_name):
304
print(f"Warning: Collection {collection_name} already exists")
305
response = input("Delete existing collection? (y/N): ")
306
if response.lower() == 'y':
307
client.delete_collection(collection_name)
308
else:
309
return False
310
311
# Recover from snapshot
312
success = client.recover_snapshot(
313
collection_name=collection_name,
314
location=backup_location,
315
checksum=checksum,
316
wait=True
317
)
318
319
if success:
320
# Verify restoration
321
collection_info = client.get_collection(collection_name)
322
print(f"Collection restored: {collection_info.points_count} points")
323
return True
324
else:
325
print("Collection restoration failed")
326
return False
327
328
except Exception as e:
329
print(f"Restoration error: {e}")
330
return False
331
332
# Usage
333
success = restore_collection_from_backup(
334
client,
335
"critical_collection",
336
"file:///backups/critical_collection_20240101_020000.tar",
337
checksum="sha256:abc123..."
338
)
339
```
340
341
### Point-in-Time Recovery
342
343
```python
344
def point_in_time_recovery(
345
client: QdrantClient,
346
collection_name: str,
347
target_datetime: datetime
348
):
349
"""Restore collection to specific point in time."""
350
snapshots = client.list_snapshots(collection_name)
351
352
# Find snapshot closest to target time (but not after)
353
valid_snapshots = [
354
s for s in snapshots
355
if s.creation_time and s.creation_time <= target_datetime
356
]
357
358
if not valid_snapshots:
359
print("No snapshots available for target time")
360
return False
361
362
# Select most recent valid snapshot
363
target_snapshot = max(valid_snapshots, key=lambda s: s.creation_time)
364
365
print(f"Restoring from snapshot {target_snapshot.name} created at {target_snapshot.creation_time}")
366
367
# Restore collection
368
backup_location = f"file:///qdrant/snapshots/{collection_name}/{target_snapshot.name}"
369
return restore_collection_from_backup(
370
client,
371
f"{collection_name}_restored",
372
backup_location,
373
target_snapshot.checksum
374
)
375
```
376
377
## Best Practices
378
379
### Backup Scheduling
380
381
1. **Collection snapshots**: Daily for critical collections
382
2. **Full snapshots**: Weekly for complete database backup
383
3. **Retention policy**: Keep 7 daily + 4 weekly + 12 monthly backups
384
4. **Off-peak scheduling**: Run during low-traffic periods
385
386
### Verification and Testing
387
388
```python
389
def verify_backup_integrity(client: QdrantClient, collection_name: str):
390
"""Verify backup can be restored successfully."""
391
snapshots = client.list_snapshots(collection_name)
392
if not snapshots:
393
return False
394
395
latest_snapshot = max(snapshots, key=lambda s: s.creation_time)
396
test_collection = f"{collection_name}_test_restore"
397
398
try:
399
# Test restoration
400
backup_location = f"file:///qdrant/snapshots/{collection_name}/{latest_snapshot.name}"
401
success = client.recover_snapshot(
402
collection_name=test_collection,
403
location=backup_location,
404
checksum=latest_snapshot.checksum,
405
wait=True
406
)
407
408
if success:
409
# Verify data integrity
410
original_info = client.get_collection(collection_name)
411
restored_info = client.get_collection(test_collection)
412
413
data_matches = (
414
original_info.points_count == restored_info.points_count and
415
original_info.vectors_count == restored_info.vectors_count
416
)
417
418
# Clean up test collection
419
client.delete_collection(test_collection)
420
421
return data_matches
422
else:
423
return False
424
425
except Exception as e:
426
print(f"Backup verification failed: {e}")
427
# Clean up on failure
428
if client.collection_exists(test_collection):
429
client.delete_collection(test_collection)
430
return False
431
432
# Schedule monthly backup verification
433
schedule.every().month.do(verify_backup_integrity, client, "critical_collection")
434
```
435
436
### Storage Considerations
437
438
- **Local storage**: Fast access, limited by disk space
439
- **Network storage**: Slower but more reliable and scalable
440
- **Cloud storage**: Most reliable, integrate with S3/GCS/Azure
441
- **Compression**: Snapshots are typically compressed tar files
442
- **Encryption**: Encrypt sensitive snapshots before remote storage
443
444
### Monitoring and Alerting
445
446
```python
447
def monitor_backup_health(client: QdrantClient, collections: List[str]):
448
"""Monitor backup freshness and alert on issues."""
449
for collection_name in collections:
450
try:
451
snapshots = client.list_snapshots(collection_name)
452
if not snapshots:
453
print(f"ALERT: No snapshots found for {collection_name}")
454
continue
455
456
latest_snapshot = max(snapshots, key=lambda s: s.creation_time)
457
age = datetime.now() - latest_snapshot.creation_time
458
459
if age.days > 1: # Alert if backup older than 1 day
460
print(f"ALERT: Latest backup for {collection_name} is {age.days} days old")
461
else:
462
print(f"OK: {collection_name} backup is fresh ({age.hours} hours old)")
463
464
except Exception as e:
465
print(f"ERROR: Failed to check backups for {collection_name}: {e}")
466
```