0
# Clustering & Sharding
1
2
Distributed operation support including shard key management, cluster operations, and multi-tenant configurations.
3
4
## Capabilities
5
6
### Shard Key Management
7
8
Create and manage shard keys for data distribution and isolation.
9
10
```python { .api }
11
def create_shard_key(
12
self,
13
collection_name: str,
14
shard_key: ShardKey,
15
shards_number: Optional[int] = None,
16
replication_factor: Optional[int] = None,
17
placement: Optional[List[int]] = None,
18
timeout: Optional[int] = None,
19
**kwargs
20
) -> bool:
21
"""
22
Create a shard key for collection.
23
24
Parameters:
25
- collection_name: Name of the collection
26
- shard_key: Shard key value
27
- shards_number: Number of shards for this key
28
- replication_factor: Replication factor for shards
29
- placement: Specific nodes for shard placement
30
- timeout: Request timeout
31
32
Returns:
33
bool: True if shard key created successfully
34
"""
35
36
def delete_shard_key(
37
self,
38
collection_name: str,
39
shard_key: ShardKey,
40
timeout: Optional[int] = None,
41
**kwargs
42
) -> bool:
43
"""
44
Delete a shard key from collection.
45
46
Parameters:
47
- collection_name: Name of the collection
48
- shard_key: Shard key value to delete
49
- timeout: Request timeout
50
51
Returns:
52
bool: True if shard key deleted successfully
53
"""
54
55
def list_shard_keys(
56
self,
57
collection_name: str,
58
**kwargs
59
) -> List[ShardKeyInfo]:
60
"""
61
List all shard keys for collection.
62
63
Parameters:
64
- collection_name: Name of the collection
65
66
Returns:
67
List[ShardKeyInfo]: List of shard key information
68
"""
69
```
70
71
Usage examples:
72
73
```python
74
from qdrant_client import models
75
76
# Create shard keys for multi-tenant application
77
client.create_shard_key(
78
collection_name="multi_tenant_collection",
79
shard_key="tenant_1",
80
shards_number=2,
81
replication_factor=1
82
)
83
84
client.create_shard_key(
85
collection_name="multi_tenant_collection",
86
shard_key="tenant_2",
87
shards_number=3,
88
replication_factor=2
89
)
90
91
# Use shard key in operations
92
client.upsert(
93
collection_name="multi_tenant_collection",
94
points=[
95
models.PointStruct(
96
id=1,
97
vector=[0.1, 0.2, 0.3],
98
payload={"data": "tenant_1_data"}
99
)
100
],
101
shard_key_selector=models.ShardKeySelector(shard_keys=["tenant_1"])
102
)
103
```
104
105
### Cluster Information
106
107
Get information about cluster nodes and health.
108
109
```python { .api }
110
def get_cluster_info(self, **kwargs) -> ClusterInfo:
111
"""
112
Get cluster information and status.
113
114
Returns:
115
ClusterInfo: Cluster status and node information
116
"""
117
118
def get_node_info(self, **kwargs) -> NodeInfo:
119
"""
120
Get current node information.
121
122
Returns:
123
NodeInfo: Current node status and configuration
124
"""
125
```
126
127
### Shard Operations
128
129
Manage individual shards and their distribution.
130
131
```python { .api }
132
def get_collection_cluster_info(
133
self,
134
collection_name: str,
135
**kwargs
136
) -> CollectionClusterInfo:
137
"""
138
Get cluster information for specific collection.
139
140
Parameters:
141
- collection_name: Name of the collection
142
143
Returns:
144
CollectionClusterInfo: Collection cluster status
145
"""
146
147
def update_collection_cluster_setup(
148
self,
149
collection_name: str,
150
move_shard: Optional[MoveShard] = None,
151
replicate_shard: Optional[ReplicateShard] = None,
152
abort_transfer: Optional[AbortTransfer] = None,
153
drop_replica: Optional[DropReplica] = None,
154
timeout: Optional[int] = None,
155
**kwargs
156
) -> bool:
157
"""
158
Update collection cluster configuration.
159
160
Parameters:
161
- collection_name: Name of the collection
162
- move_shard: Move shard between nodes
163
- replicate_shard: Create shard replica
164
- abort_transfer: Abort ongoing transfer
165
- drop_replica: Drop shard replica
166
- timeout: Request timeout
167
168
Returns:
169
bool: True if operation initiated successfully
170
"""
171
```
172
173
## Shard Key Types and Selectors
174
175
### Shard Key Definition
176
177
```python { .api }
178
# Shard key can be string or integer
179
ShardKey = Union[str, int]
180
181
class ShardKeySelector(BaseModel):
182
shard_keys: List[ShardKey] # List of shard keys to target
183
```
184
185
### Shard Key Usage in Operations
186
187
All data operations support shard key routing:
188
189
```python
190
# Point operations with shard key
191
client.upsert(
192
collection_name="collection",
193
points=points,
194
shard_key_selector=models.ShardKeySelector(shard_keys=["tenant_1"])
195
)
196
197
client.delete(
198
collection_name="collection",
199
points_selector=selector,
200
shard_key_selector=models.ShardKeySelector(shard_keys=["tenant_1"])
201
)
202
203
# Search operations with shard key
204
results = client.query_points(
205
collection_name="collection",
206
query=query_vector,
207
shard_key_selector=models.ShardKeySelector(shard_keys=["tenant_1"])
208
)
209
210
# Scroll with shard key
211
records, next_offset = client.scroll(
212
collection_name="collection",
213
shard_key_selector=models.ShardKeySelector(shard_keys=["tenant_1"])
214
)
215
```
216
217
## Cluster Management Types
218
219
### Cluster Information
220
221
```python { .api }
222
class ClusterInfo(BaseModel):
223
peer_id: int # Current node peer ID
224
peers: Dict[int, PeerInfo] # Information about all peers
225
raft_info: RaftInfo # Raft consensus information
226
227
class PeerInfo(BaseModel):
228
uri: str # Peer URI
229
state: PeerState # Peer connection state
230
231
class PeerState(str, Enum):
232
ALIVE = "Alive"
233
DEAD = "Dead"
234
PARTIAL = "Partial"
235
236
class RaftInfo(BaseModel):
237
term: int # Current Raft term
238
commit: int # Last committed entry
239
pending_operations: int # Pending operations count
240
leader: Optional[int] = None # Leader peer ID
241
role: RaftRole # Current node role
242
243
class RaftRole(str, Enum):
244
LEADER = "Leader"
245
FOLLOWER = "Follower"
246
CANDIDATE = "Candidate"
247
```
248
249
### Collection Cluster Information
250
251
```python { .api }
252
class CollectionClusterInfo(BaseModel):
253
peer_id: int # Current peer ID
254
shard_count: int # Total number of shards
255
local_shards: List[LocalShardInfo] # Shards on current node
256
remote_shards: List[RemoteShardInfo] # Shards on remote nodes
257
shard_transfers: List[ShardTransferInfo] # Ongoing transfers
258
259
class LocalShardInfo(BaseModel):
260
shard_id: int # Shard identifier
261
points_count: int # Number of points in shard
262
state: ShardState # Shard state
263
264
class RemoteShardInfo(BaseModel):
265
shard_id: int # Shard identifier
266
peer_id: int # Peer hosting the shard
267
state: ShardState # Shard state
268
269
class ShardState(str, Enum):
270
ACTIVE = "Active"
271
DEAD = "Dead"
272
PARTIAL = "Partial"
273
INITIALIZING = "Initializing"
274
LISTENER = "Listener"
275
```
276
277
### Shard Operations
278
279
```python { .api }
280
class MoveShard(BaseModel):
281
shard_id: int # Shard to move
282
from_peer_id: int # Source peer
283
to_peer_id: int # Destination peer
284
method: Optional[ShardTransferMethod] = None # Transfer method
285
286
class ReplicateShard(BaseModel):
287
shard_id: int # Shard to replicate
288
from_peer_id: int # Source peer
289
to_peer_id: int # Destination peer
290
method: Optional[ShardTransferMethod] = None # Transfer method
291
292
class DropReplica(BaseModel):
293
shard_id: int # Shard replica to drop
294
peer_id: int # Peer to drop replica from
295
296
class AbortTransfer(BaseModel):
297
shard_id: int # Shard transfer to abort
298
from_peer_id: int # Source peer
299
to_peer_id: int # Destination peer
300
301
class ShardTransferMethod(str, Enum):
302
STREAM_RECORDS = "stream_records" # Stream individual records
303
SNAPSHOT = "snapshot" # Transfer via snapshot
304
```
305
306
## Multi-Tenant Patterns
307
308
### Tenant Isolation
309
310
Use shard keys to isolate tenant data:
311
312
```python
313
class TenantClient:
314
def __init__(self, client: QdrantClient, tenant_id: str):
315
self.client = client
316
self.tenant_id = tenant_id
317
self.shard_selector = models.ShardKeySelector(shard_keys=[tenant_id])
318
319
def upsert_points(self, collection_name: str, points: List[models.PointStruct]):
320
return self.client.upsert(
321
collection_name=collection_name,
322
points=points,
323
shard_key_selector=self.shard_selector
324
)
325
326
def search(self, collection_name: str, query_vector: List[float], **kwargs):
327
return self.client.query_points(
328
collection_name=collection_name,
329
query=query_vector,
330
shard_key_selector=self.shard_selector,
331
**kwargs
332
)
333
334
# Usage
335
tenant_client = TenantClient(client, "tenant_1")
336
tenant_client.upsert_points("shared_collection", points)
337
results = tenant_client.search("shared_collection", query_vector)
338
```
339
340
### Hierarchical Sharding
341
342
Use multiple shard keys for hierarchical data organization:
343
344
```python
345
# Create nested shard keys for organization/team structure
346
client.create_shard_key(collection_name="docs", shard_key="org_1")
347
client.create_shard_key(collection_name="docs", shard_key="org_1_team_a")
348
client.create_shard_key(collection_name="docs", shard_key="org_1_team_b")
349
350
# Query across multiple related shards
351
results = client.query_points(
352
collection_name="docs",
353
query=query_vector,
354
shard_key_selector=models.ShardKeySelector(
355
shard_keys=["org_1_team_a", "org_1_team_b"] # Search both teams
356
)
357
)
358
```
359
360
## Performance Considerations
361
362
### Shard Key Distribution
363
364
- **Balanced sharding**: Ensure even data distribution across shard keys
365
- **Shard key cardinality**: Higher cardinality improves parallel processing
366
- **Query patterns**: Align shard keys with common query patterns
367
- **Cross-shard queries**: Minimize queries spanning multiple shards
368
369
### Replication Strategy
370
371
```python
372
# High availability setup
373
client.create_shard_key(
374
collection_name="critical_data",
375
shard_key="important_tenant",
376
shards_number=3, # Distribute across 3 shards
377
replication_factor=2 # 2 replicas per shard
378
)
379
```
380
381
### Monitoring Cluster Health
382
383
```python
384
def monitor_cluster_health(client: QdrantClient):
385
cluster_info = client.get_cluster_info()
386
387
print(f"Leader: {cluster_info.raft_info.leader}")
388
print(f"Pending operations: {cluster_info.raft_info.pending_operations}")
389
390
for peer_id, peer_info in cluster_info.peers.items():
391
print(f"Peer {peer_id}: {peer_info.state} ({peer_info.uri})")
392
393
# Check collection-specific cluster status
394
for collection in client.get_collections().collections:
395
coll_cluster = client.get_collection_cluster_info(collection.name)
396
transfers = len(coll_cluster.shard_transfers)
397
if transfers > 0:
398
print(f"Collection {collection.name}: {transfers} ongoing transfers")
399
```