or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

client-setup.mdclustering-sharding.mdcollection-management.mdfastembed-integration.mdindex.mdindexing-optimization.mdsearch-query.mdsnapshots-backup.mdvector-operations.md

clustering-sharding.mddocs/

0

# Clustering & Sharding

1

2

Distributed operation support including shard key management, cluster operations, and multi-tenant configurations.

3

4

## Capabilities

5

6

### Shard Key Management

7

8

Create and manage shard keys for data distribution and isolation.

9

10

```python { .api }

11

def create_shard_key(

12

self,

13

collection_name: str,

14

shard_key: ShardKey,

15

shards_number: Optional[int] = None,

16

replication_factor: Optional[int] = None,

17

placement: Optional[List[int]] = None,

18

timeout: Optional[int] = None,

19

**kwargs

20

) -> bool:

21

"""

22

Create a shard key for collection.

23

24

Parameters:

25

- collection_name: Name of the collection

26

- shard_key: Shard key value

27

- shards_number: Number of shards for this key

28

- replication_factor: Replication factor for shards

29

- placement: Specific nodes for shard placement

30

- timeout: Request timeout

31

32

Returns:

33

bool: True if shard key created successfully

34

"""

35

36

def delete_shard_key(

37

self,

38

collection_name: str,

39

shard_key: ShardKey,

40

timeout: Optional[int] = None,

41

**kwargs

42

) -> bool:

43

"""

44

Delete a shard key from collection.

45

46

Parameters:

47

- collection_name: Name of the collection

48

- shard_key: Shard key value to delete

49

- timeout: Request timeout

50

51

Returns:

52

bool: True if shard key deleted successfully

53

"""

54

55

def list_shard_keys(

56

self,

57

collection_name: str,

58

**kwargs

59

) -> List[ShardKeyInfo]:

60

"""

61

List all shard keys for collection.

62

63

Parameters:

64

- collection_name: Name of the collection

65

66

Returns:

67

List[ShardKeyInfo]: List of shard key information

68

"""

69

```

70

71

Usage examples:

72

73

```python

74

from qdrant_client import models

75

76

# Create shard keys for multi-tenant application

77

client.create_shard_key(

78

collection_name="multi_tenant_collection",

79

shard_key="tenant_1",

80

shards_number=2,

81

replication_factor=1

82

)

83

84

client.create_shard_key(

85

collection_name="multi_tenant_collection",

86

shard_key="tenant_2",

87

shards_number=3,

88

replication_factor=2

89

)

90

91

# Use shard key in operations

92

client.upsert(

93

collection_name="multi_tenant_collection",

94

points=[

95

models.PointStruct(

96

id=1,

97

vector=[0.1, 0.2, 0.3],

98

payload={"data": "tenant_1_data"}

99

)

100

],

101

shard_key_selector=models.ShardKeySelector(shard_keys=["tenant_1"])

102

)

103

```

104

105

### Cluster Information

106

107

Get information about cluster nodes and health.

108

109

```python { .api }

110

def get_cluster_info(self, **kwargs) -> ClusterInfo:

111

"""

112

Get cluster information and status.

113

114

Returns:

115

ClusterInfo: Cluster status and node information

116

"""

117

118

def get_node_info(self, **kwargs) -> NodeInfo:

119

"""

120

Get current node information.

121

122

Returns:

123

NodeInfo: Current node status and configuration

124

"""

125

```

126

127

### Shard Operations

128

129

Manage individual shards and their distribution.

130

131

```python { .api }

132

def get_collection_cluster_info(

133

self,

134

collection_name: str,

135

**kwargs

136

) -> CollectionClusterInfo:

137

"""

138

Get cluster information for specific collection.

139

140

Parameters:

141

- collection_name: Name of the collection

142

143

Returns:

144

CollectionClusterInfo: Collection cluster status

145

"""

146

147

def update_collection_cluster_setup(

148

self,

149

collection_name: str,

150

move_shard: Optional[MoveShard] = None,

151

replicate_shard: Optional[ReplicateShard] = None,

152

abort_transfer: Optional[AbortTransfer] = None,

153

drop_replica: Optional[DropReplica] = None,

154

timeout: Optional[int] = None,

155

**kwargs

156

) -> bool:

157

"""

158

Update collection cluster configuration.

159

160

Parameters:

161

- collection_name: Name of the collection

162

- move_shard: Move shard between nodes

163

- replicate_shard: Create shard replica

164

- abort_transfer: Abort ongoing transfer

165

- drop_replica: Drop shard replica

166

- timeout: Request timeout

167

168

Returns:

169

bool: True if operation initiated successfully

170

"""

171

```

172

173

## Shard Key Types and Selectors

174

175

### Shard Key Definition

176

177

```python { .api }

178

# Shard key can be string or integer

179

ShardKey = Union[str, int]

180

181

class ShardKeySelector(BaseModel):

182

shard_keys: List[ShardKey] # List of shard keys to target

183

```

184

185

### Shard Key Usage in Operations

186

187

All data operations support shard key routing:

188

189

```python

190

# Point operations with shard key

191

client.upsert(

192

collection_name="collection",

193

points=points,

194

shard_key_selector=models.ShardKeySelector(shard_keys=["tenant_1"])

195

)

196

197

client.delete(

198

collection_name="collection",

199

points_selector=selector,

200

shard_key_selector=models.ShardKeySelector(shard_keys=["tenant_1"])

201

)

202

203

# Search operations with shard key

204

results = client.query_points(

205

collection_name="collection",

206

query=query_vector,

207

shard_key_selector=models.ShardKeySelector(shard_keys=["tenant_1"])

208

)

209

210

# Scroll with shard key

211

records, next_offset = client.scroll(

212

collection_name="collection",

213

shard_key_selector=models.ShardKeySelector(shard_keys=["tenant_1"])

214

)

215

```

216

217

## Cluster Management Types

218

219

### Cluster Information

220

221

```python { .api }

222

class ClusterInfo(BaseModel):

223

peer_id: int # Current node peer ID

224

peers: Dict[int, PeerInfo] # Information about all peers

225

raft_info: RaftInfo # Raft consensus information

226

227

class PeerInfo(BaseModel):

228

uri: str # Peer URI

229

state: PeerState # Peer connection state

230

231

class PeerState(str, Enum):

232

ALIVE = "Alive"

233

DEAD = "Dead"

234

PARTIAL = "Partial"

235

236

class RaftInfo(BaseModel):

237

term: int # Current Raft term

238

commit: int # Last committed entry

239

pending_operations: int # Pending operations count

240

leader: Optional[int] = None # Leader peer ID

241

role: RaftRole # Current node role

242

243

class RaftRole(str, Enum):

244

LEADER = "Leader"

245

FOLLOWER = "Follower"

246

CANDIDATE = "Candidate"

247

```

248

249

### Collection Cluster Information

250

251

```python { .api }

252

class CollectionClusterInfo(BaseModel):

253

peer_id: int # Current peer ID

254

shard_count: int # Total number of shards

255

local_shards: List[LocalShardInfo] # Shards on current node

256

remote_shards: List[RemoteShardInfo] # Shards on remote nodes

257

shard_transfers: List[ShardTransferInfo] # Ongoing transfers

258

259

class LocalShardInfo(BaseModel):

260

shard_id: int # Shard identifier

261

points_count: int # Number of points in shard

262

state: ShardState # Shard state

263

264

class RemoteShardInfo(BaseModel):

265

shard_id: int # Shard identifier

266

peer_id: int # Peer hosting the shard

267

state: ShardState # Shard state

268

269

class ShardState(str, Enum):

270

ACTIVE = "Active"

271

DEAD = "Dead"

272

PARTIAL = "Partial"

273

INITIALIZING = "Initializing"

274

LISTENER = "Listener"

275

```

276

277

### Shard Operations

278

279

```python { .api }

280

class MoveShard(BaseModel):

281

shard_id: int # Shard to move

282

from_peer_id: int # Source peer

283

to_peer_id: int # Destination peer

284

method: Optional[ShardTransferMethod] = None # Transfer method

285

286

class ReplicateShard(BaseModel):

287

shard_id: int # Shard to replicate

288

from_peer_id: int # Source peer

289

to_peer_id: int # Destination peer

290

method: Optional[ShardTransferMethod] = None # Transfer method

291

292

class DropReplica(BaseModel):

293

shard_id: int # Shard replica to drop

294

peer_id: int # Peer to drop replica from

295

296

class AbortTransfer(BaseModel):

297

shard_id: int # Shard transfer to abort

298

from_peer_id: int # Source peer

299

to_peer_id: int # Destination peer

300

301

class ShardTransferMethod(str, Enum):

302

STREAM_RECORDS = "stream_records" # Stream individual records

303

SNAPSHOT = "snapshot" # Transfer via snapshot

304

```

305

306

## Multi-Tenant Patterns

307

308

### Tenant Isolation

309

310

Use shard keys to isolate tenant data:

311

312

```python

313

class TenantClient:

314

def __init__(self, client: QdrantClient, tenant_id: str):

315

self.client = client

316

self.tenant_id = tenant_id

317

self.shard_selector = models.ShardKeySelector(shard_keys=[tenant_id])

318

319

def upsert_points(self, collection_name: str, points: List[models.PointStruct]):

320

return self.client.upsert(

321

collection_name=collection_name,

322

points=points,

323

shard_key_selector=self.shard_selector

324

)

325

326

def search(self, collection_name: str, query_vector: List[float], **kwargs):

327

return self.client.query_points(

328

collection_name=collection_name,

329

query=query_vector,

330

shard_key_selector=self.shard_selector,

331

**kwargs

332

)

333

334

# Usage

335

tenant_client = TenantClient(client, "tenant_1")

336

tenant_client.upsert_points("shared_collection", points)

337

results = tenant_client.search("shared_collection", query_vector)

338

```

339

340

### Hierarchical Sharding

341

342

Use multiple shard keys for hierarchical data organization:

343

344

```python

345

# Create nested shard keys for organization/team structure

346

client.create_shard_key(collection_name="docs", shard_key="org_1")

347

client.create_shard_key(collection_name="docs", shard_key="org_1_team_a")

348

client.create_shard_key(collection_name="docs", shard_key="org_1_team_b")

349

350

# Query across multiple related shards

351

results = client.query_points(

352

collection_name="docs",

353

query=query_vector,

354

shard_key_selector=models.ShardKeySelector(

355

shard_keys=["org_1_team_a", "org_1_team_b"] # Search both teams

356

)

357

)

358

```

359

360

## Performance Considerations

361

362

### Shard Key Distribution

363

364

- **Balanced sharding**: Ensure even data distribution across shard keys

365

- **Shard key cardinality**: Higher cardinality improves parallel processing

366

- **Query patterns**: Align shard keys with common query patterns

367

- **Cross-shard queries**: Minimize queries spanning multiple shards

368

369

### Replication Strategy

370

371

```python

372

# High availability setup

373

client.create_shard_key(

374

collection_name="critical_data",

375

shard_key="important_tenant",

376

shards_number=3, # Distribute across 3 shards

377

replication_factor=2 # 2 replicas per shard

378

)

379

```

380

381

### Monitoring Cluster Health

382

383

```python

384

def monitor_cluster_health(client: QdrantClient):

385

cluster_info = client.get_cluster_info()

386

387

print(f"Leader: {cluster_info.raft_info.leader}")

388

print(f"Pending operations: {cluster_info.raft_info.pending_operations}")

389

390

for peer_id, peer_info in cluster_info.peers.items():

391

print(f"Peer {peer_id}: {peer_info.state} ({peer_info.uri})")

392

393

# Check collection-specific cluster status

394

for collection in client.get_collections().collections:

395

coll_cluster = client.get_collection_cluster_info(collection.name)

396

transfers = len(coll_cluster.shard_transfers)

397

if transfers > 0:

398

print(f"Collection {collection.name}: {transfers} ongoing transfers")

399

```