or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

client-setup.mdclustering-sharding.mdcollection-management.mdfastembed-integration.mdindex.mdindexing-optimization.mdsearch-query.mdsnapshots-backup.mdvector-operations.md

indexing-optimization.mddocs/

0

# Indexing & Optimization

1

2

Payload field indexing, collection optimization, and performance tuning capabilities.

3

4

## Capabilities

5

6

### Payload Field Indexing

7

8

Create indexes on payload fields for faster filtering.

9

10

```python { .api }

11

def create_payload_index(

12

self,

13

collection_name: str,

14

field_name: str,

15

field_schema: Optional[PayloadFieldSchema] = None,

16

wait: bool = True,

17

ordering: Optional[WriteOrdering] = None,

18

**kwargs

19

) -> UpdateResult:

20

"""

21

Create index on payload field.

22

23

Parameters:

24

- collection_name: Name of the collection

25

- field_name: Payload field name to index

26

- field_schema: Index configuration and field type

27

- wait: Wait for operation to complete

28

- ordering: Write ordering guarantees

29

30

Returns:

31

UpdateResult: Result of the operation

32

"""

33

34

def delete_payload_index(

35

self,

36

collection_name: str,

37

field_name: str,

38

wait: bool = True,

39

ordering: Optional[WriteOrdering] = None,

40

**kwargs

41

) -> UpdateResult:

42

"""

43

Delete payload field index.

44

45

Parameters:

46

- collection_name: Name of the collection

47

- field_name: Payload field name

48

- wait: Wait for operation to complete

49

- ordering: Write ordering guarantees

50

51

Returns:

52

UpdateResult: Result of the operation

53

"""

54

55

def list_payload_indexes(

56

self,

57

collection_name: str,

58

**kwargs

59

) -> Dict[str, PayloadIndexInfo]:

60

"""

61

List all payload indexes in collection.

62

63

Parameters:

64

- collection_name: Name of the collection

65

66

Returns:

67

Dict[str, PayloadIndexInfo]: Mapping of field names to index info

68

"""

69

```

70

71

Usage examples:

72

73

```python

74

from qdrant_client import models

75

76

# Create keyword index for exact matching

77

client.create_payload_index(

78

collection_name="documents",

79

field_name="category",

80

field_schema=models.KeywordIndexParams(

81

type="keyword",

82

on_disk=False

83

)

84

)

85

86

# Create integer index for numeric fields

87

client.create_payload_index(

88

collection_name="documents",

89

field_name="timestamp",

90

field_schema=models.IntegerIndexParams(

91

type="integer",

92

range=True,

93

on_disk=True

94

)

95

)

96

97

# Create text index for full-text search

98

client.create_payload_index(

99

collection_name="documents",

100

field_name="content",

101

field_schema=models.TextIndexParams(

102

type="text",

103

tokenizer="word",

104

min_token_len=2,

105

max_token_len=20,

106

lowercase=True,

107

on_disk=True

108

)

109

)

110

111

# Create geo index for geographic queries

112

client.create_payload_index(

113

collection_name="locations",

114

field_name="coordinates",

115

field_schema=models.GeoIndexParams(

116

type="geo",

117

on_disk=False

118

)

119

)

120

```

121

122

### Collection Optimization

123

124

Optimize collection storage and search performance.

125

126

```python { .api }

127

def optimize_collection(

128

self,

129

collection_name: str,

130

wait: bool = True,

131

**kwargs

132

) -> UpdateResult:

133

"""

134

Optimize collection by rebuilding indexes and compacting storage.

135

136

Parameters:

137

- collection_name: Name of the collection

138

- wait: Wait for operation to complete

139

140

Returns:

141

UpdateResult: Result of the operation

142

"""

143

```

144

145

### Vector Index Management

146

147

Manage vector indexes for search performance.

148

149

```python { .api }

150

def recreate_index(

151

self,

152

collection_name: str,

153

wait: bool = True,

154

**kwargs

155

) -> UpdateResult:

156

"""

157

Recreate vector index with current configuration.

158

159

Parameters:

160

- collection_name: Name of the collection

161

- wait: Wait for operation to complete

162

163

Returns:

164

UpdateResult: Result of the operation

165

"""

166

```

167

168

## Index Types

169

170

### Keyword Index

171

172

For exact string matching and categorical fields.

173

174

```python { .api }

175

class KeywordIndexParams(BaseModel):

176

type: Literal["keyword"] = "keyword"

177

on_disk: Optional[bool] = None # Store index on disk

178

```

179

180

Best for:

181

- Categories, tags, labels

182

- User IDs, product codes

183

- Enum values

184

- Exact string matching

185

186

### Integer Index

187

188

For numeric fields with range queries.

189

190

```python { .api }

191

class IntegerIndexParams(BaseModel):

192

type: Literal["integer"] = "integer"

193

range: bool = True # Enable range queries

194

on_disk: Optional[bool] = None # Store index on disk

195

```

196

197

Best for:

198

- Timestamps, dates

199

- Prices, quantities

200

- User ratings, scores

201

- Numeric IDs

202

203

### Float Index

204

205

For floating-point numeric fields.

206

207

```python { .api }

208

class FloatIndexParams(BaseModel):

209

type: Literal["float"] = "float"

210

range: bool = True # Enable range queries

211

on_disk: Optional[bool] = None # Store index on disk

212

```

213

214

Best for:

215

- Continuous measurements

216

- Probabilities, percentages

217

- Geographic coordinates (individual components)

218

- Machine learning scores

219

220

### Boolean Index

221

222

For boolean fields.

223

224

```python { .api }

225

class BoolIndexParams(BaseModel):

226

type: Literal["bool"] = "bool"

227

on_disk: Optional[bool] = None # Store index on disk

228

```

229

230

Best for:

231

- Feature flags

232

- Binary classifications

233

- Yes/no fields

234

235

### Geographic Index

236

237

For geographic coordinate fields.

238

239

```python { .api }

240

class GeoIndexParams(BaseModel):

241

type: Literal["geo"] = "geo"

242

on_disk: Optional[bool] = None # Store index on disk

243

```

244

245

Best for:

246

- Latitude/longitude coordinates

247

- Geographic bounding box queries

248

- Radius-based location searches

249

250

### Text Index

251

252

For full-text search capabilities.

253

254

```python { .api }

255

class TextIndexParams(BaseModel):

256

type: Literal["text"] = "text"

257

tokenizer: TextIndexTokenizer = "word" # Tokenization method

258

min_token_len: Optional[int] = None # Minimum token length

259

max_token_len: Optional[int] = None # Maximum token length

260

lowercase: Optional[bool] = None # Convert to lowercase

261

on_disk: Optional[bool] = None # Store index on disk

262

263

class TextIndexTokenizer(str, Enum):

264

WORD = "word" # Word-based tokenization

265

WHITESPACE = "whitespace" # Whitespace tokenization

266

PREFIX = "prefix" # Prefix-based tokenization

267

```

268

269

Best for:

270

- Document content

271

- Product descriptions

272

- User comments

273

- Search queries

274

275

## Index Information

276

277

### Index Status

278

279

```python { .api }

280

class PayloadIndexInfo(BaseModel):

281

data_type: PayloadSchemaType

282

params: Optional[PayloadIndexParams] = None

283

points: Optional[int] = None # Number of indexed points

284

285

class PayloadSchemaType(str, Enum):

286

KEYWORD = "keyword"

287

INTEGER = "integer"

288

FLOAT = "float"

289

GEO = "geo"

290

TEXT = "text"

291

BOOL = "bool"

292

DATETIME = "datetime"

293

```

294

295

## Performance Considerations

296

297

### Index Selection Guidelines

298

299

**Use keyword indexes when:**

300

- Exact matching on categorical data

301

- Small number of unique values (< 10,000)

302

- Frequent equality filters

303

304

**Use integer/float indexes when:**

305

- Range queries (>, <, >=, <=)

306

- Numeric comparisons

307

- Sorting by numeric fields

308

309

**Use text indexes when:**

310

- Full-text search required

311

- Partial word matching needed

312

- Search across large text fields

313

314

**Use geo indexes when:**

315

- Location-based queries

316

- Geographic filtering

317

- Proximity searches

318

319

### Index Storage Options

320

321

**In-memory indexes (`on_disk=False`):**

322

- Faster query performance

323

- Higher memory usage

324

- Best for frequently queried fields

325

326

**On-disk indexes (`on_disk=True`):**

327

- Lower memory usage

328

- Slightly slower query performance

329

- Best for large collections or infrequently used fields

330

331

### Index Optimization Tips

332

333

1. **Index only necessary fields** - Each index consumes memory and slows writes

334

2. **Use appropriate index types** - Wrong index type reduces performance

335

3. **Consider cardinality** - High cardinality fields benefit more from indexing

336

4. **Monitor index usage** - Remove unused indexes to improve performance

337

5. **Balance memory vs. disk** - Use `on_disk=True` for less critical indexes

338

339

```python

340

# Example: Strategic indexing for a document collection

341

collection_name = "documents"

342

343

# High-cardinality field used in filters - keyword index

344

client.create_payload_index(

345

collection_name=collection_name,

346

field_name="document_id",

347

field_schema=models.KeywordIndexParams(type="keyword", on_disk=False)

348

)

349

350

# Numeric field for range queries - integer index

351

client.create_payload_index(

352

collection_name=collection_name,

353

field_name="timestamp",

354

field_schema=models.IntegerIndexParams(type="integer", range=True, on_disk=True)

355

)

356

357

# Full-text searchable content - text index

358

client.create_payload_index(

359

collection_name=collection_name,

360

field_name="content",

361

field_schema=models.TextIndexParams(

362

type="text",

363

tokenizer="word",

364

lowercase=True,

365

on_disk=True # Large text index on disk

366

)

367

)

368

369

# Boolean flag for filtering - bool index

370

client.create_payload_index(

371

collection_name=collection_name,

372

field_name="published",

373

field_schema=models.BoolIndexParams(type="bool", on_disk=False)

374

)

375

```

376

377

## Query Optimization

378

379

### Using Indexed Fields

380

381

```python

382

# Efficient queries using indexed fields

383

from qdrant_client import models

384

385

# Keyword index query (exact match)

386

results = client.query_points(

387

collection_name="documents",

388

query=query_vector,

389

query_filter=models.Filter(

390

must=[

391

models.FieldCondition(

392

key="category", # Indexed keyword field

393

match=models.MatchValue(value="technology")

394

)

395

]

396

)

397

)

398

399

# Range query on indexed numeric field

400

results = client.query_points(

401

collection_name="documents",

402

query=query_vector,

403

query_filter=models.Filter(

404

must=[

405

models.FieldCondition(

406

key="timestamp", # Indexed integer field

407

range=models.Range(

408

gte=1640995200, # Jan 1, 2022

409

lte=1672531199 # Dec 31, 2022

410

)

411

)

412

]

413

)

414

)

415

416

# Full-text search on indexed text field

417

results = client.query_points(

418

collection_name="documents",

419

query=query_vector,

420

query_filter=models.Filter(

421

must=[

422

models.FieldCondition(

423

key="content", # Indexed text field

424

match=models.MatchText(text="machine learning")

425

)

426

]

427

)

428

)

429

```