Tessl Tile for pypi/qdrant-client@1.15.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

client-setup.md clustering-sharding.md collection-management.md fastembed-integration.md index.md indexing-optimization.md search-query.md snapshots-backup.md vector-operations.md

indexing-optimization.mddocs/

0
# Indexing & Optimization
1

2
Payload field indexing, collection optimization, and performance tuning capabilities.
3

4
## Capabilities
5

6
### Payload Field Indexing
7

8
Create indexes on payload fields for faster filtering.
9

10
```python { .api }
11
def create_payload_index(
12
    self,
13
    collection_name: str,
14
    field_name: str,
15
    field_schema: Optional[PayloadFieldSchema] = None,
16
    wait: bool = True,
17
    ordering: Optional[WriteOrdering] = None,
18
    **kwargs
19
) -> UpdateResult:
20
    """
21
    Create index on payload field.
22

23
    Parameters:
24
    - collection_name: Name of the collection
25
    - field_name: Payload field name to index
26
    - field_schema: Index configuration and field type
27
    - wait: Wait for operation to complete
28
    - ordering: Write ordering guarantees
29

30
    Returns:
31
        UpdateResult: Result of the operation
32
    """
33

34
def delete_payload_index(
35
    self,
36
    collection_name: str,
37
    field_name: str,
38
    wait: bool = True,
39
    ordering: Optional[WriteOrdering] = None,
40
    **kwargs
41
) -> UpdateResult:
42
    """
43
    Delete payload field index.
44

45
    Parameters:
46
    - collection_name: Name of the collection
47
    - field_name: Payload field name
48
    - wait: Wait for operation to complete
49
    - ordering: Write ordering guarantees
50

51
    Returns:
52
        UpdateResult: Result of the operation
53
    """
54

55
def list_payload_indexes(
56
    self,
57
    collection_name: str,
58
    **kwargs
59
) -> Dict[str, PayloadIndexInfo]:
60
    """
61
    List all payload indexes in collection.
62

63
    Parameters:
64
    - collection_name: Name of the collection
65

66
    Returns:
67
        Dict[str, PayloadIndexInfo]: Mapping of field names to index info
68
    """
69
```
70

71
Usage examples:
72

73
```python
74
from qdrant_client import models
75

76
# Create keyword index for exact matching
77
client.create_payload_index(
78
    collection_name="documents",
79
    field_name="category",
80
    field_schema=models.KeywordIndexParams(
81
        type="keyword",
82
        on_disk=False
83
    )
84
)
85

86
# Create integer index for numeric fields
87
client.create_payload_index(
88
    collection_name="documents", 
89
    field_name="timestamp",
90
    field_schema=models.IntegerIndexParams(
91
        type="integer",
92
        range=True,
93
        on_disk=True
94
    )
95
)
96

97
# Create text index for full-text search
98
client.create_payload_index(
99
    collection_name="documents",
100
    field_name="content",
101
    field_schema=models.TextIndexParams(
102
        type="text",
103
        tokenizer="word",
104
        min_token_len=2,
105
        max_token_len=20,
106
        lowercase=True,
107
        on_disk=True
108
    )
109
)
110

111
# Create geo index for geographic queries
112
client.create_payload_index(
113
    collection_name="locations",
114
    field_name="coordinates",
115
    field_schema=models.GeoIndexParams(
116
        type="geo",
117
        on_disk=False
118
    )
119
)
120
```
121

122
### Collection Optimization
123

124
Optimize collection storage and search performance.
125

126
```python { .api }
127
def optimize_collection(
128
    self,
129
    collection_name: str,
130
    wait: bool = True,
131
    **kwargs
132
) -> UpdateResult:
133
    """
134
    Optimize collection by rebuilding indexes and compacting storage.
135

136
    Parameters:
137
    - collection_name: Name of the collection
138
    - wait: Wait for operation to complete
139

140
    Returns:
141
        UpdateResult: Result of the operation
142
    """
143
```
144

145
### Vector Index Management
146

147
Manage vector indexes for search performance.
148

149
```python { .api }
150
def recreate_index(
151
    self,
152
    collection_name: str,
153
    wait: bool = True,
154
    **kwargs
155
) -> UpdateResult:
156
    """
157
    Recreate vector index with current configuration.
158

159
    Parameters:
160
    - collection_name: Name of the collection
161
    - wait: Wait for operation to complete
162

163
    Returns:
164
        UpdateResult: Result of the operation
165
    """
166
```
167

168
## Index Types
169

170
### Keyword Index
171

172
For exact string matching and categorical fields.
173

174
```python { .api }
175
class KeywordIndexParams(BaseModel):
176
    type: Literal["keyword"] = "keyword"
177
    on_disk: Optional[bool] = None  # Store index on disk
178
```
179

180
Best for:
181
- Categories, tags, labels
182
- User IDs, product codes
183
- Enum values
184
- Exact string matching
185

186
### Integer Index
187

188
For numeric fields with range queries.
189

190
```python { .api }
191
class IntegerIndexParams(BaseModel):
192
    type: Literal["integer"] = "integer"
193
    range: bool = True  # Enable range queries
194
    on_disk: Optional[bool] = None  # Store index on disk
195
```
196

197
Best for:
198
- Timestamps, dates
199
- Prices, quantities
200
- User ratings, scores
201
- Numeric IDs
202

203
### Float Index
204

205
For floating-point numeric fields.
206

207
```python { .api }
208
class FloatIndexParams(BaseModel):
209
    type: Literal["float"] = "float"
210
    range: bool = True  # Enable range queries
211
    on_disk: Optional[bool] = None  # Store index on disk
212
```
213

214
Best for:
215
- Continuous measurements
216
- Probabilities, percentages
217
- Geographic coordinates (individual components)
218
- Machine learning scores
219

220
### Boolean Index
221

222
For boolean fields.
223

224
```python { .api }
225
class BoolIndexParams(BaseModel):
226
    type: Literal["bool"] = "bool"
227
    on_disk: Optional[bool] = None  # Store index on disk
228
```
229

230
Best for:
231
- Feature flags
232
- Binary classifications
233
- Yes/no fields
234

235
### Geographic Index
236

237
For geographic coordinate fields.
238

239
```python { .api }
240
class GeoIndexParams(BaseModel):
241
    type: Literal["geo"] = "geo"
242
    on_disk: Optional[bool] = None  # Store index on disk
243
```
244

245
Best for:
246
- Latitude/longitude coordinates
247
- Geographic bounding box queries
248
- Radius-based location searches
249

250
### Text Index
251

252
For full-text search capabilities.
253

254
```python { .api }
255
class TextIndexParams(BaseModel):
256
    type: Literal["text"] = "text"
257
    tokenizer: TextIndexTokenizer = "word"  # Tokenization method
258
    min_token_len: Optional[int] = None  # Minimum token length
259
    max_token_len: Optional[int] = None  # Maximum token length
260
    lowercase: Optional[bool] = None  # Convert to lowercase
261
    on_disk: Optional[bool] = None  # Store index on disk
262

263
class TextIndexTokenizer(str, Enum):
264
    WORD = "word"  # Word-based tokenization
265
    WHITESPACE = "whitespace"  # Whitespace tokenization
266
    PREFIX = "prefix"  # Prefix-based tokenization
267
```
268

269
Best for:
270
- Document content
271
- Product descriptions
272
- User comments
273
- Search queries
274

275
## Index Information
276

277
### Index Status
278

279
```python { .api }
280
class PayloadIndexInfo(BaseModel):
281
    data_type: PayloadSchemaType
282
    params: Optional[PayloadIndexParams] = None
283
    points: Optional[int] = None  # Number of indexed points
284

285
class PayloadSchemaType(str, Enum):
286
    KEYWORD = "keyword"
287
    INTEGER = "integer" 
288
    FLOAT = "float"
289
    GEO = "geo"
290
    TEXT = "text"
291
    BOOL = "bool"
292
    DATETIME = "datetime"
293
```
294

295
## Performance Considerations
296

297
### Index Selection Guidelines
298

299
**Use keyword indexes when:**
300
- Exact matching on categorical data
301
- Small number of unique values (< 10,000)
302
- Frequent equality filters
303

304
**Use integer/float indexes when:**
305
- Range queries (>, <, >=, <=)
306
- Numeric comparisons
307
- Sorting by numeric fields
308

309
**Use text indexes when:**
310
- Full-text search required
311
- Partial word matching needed
312
- Search across large text fields
313

314
**Use geo indexes when:**
315
- Location-based queries
316
- Geographic filtering
317
- Proximity searches
318

319
### Index Storage Options
320

321
**In-memory indexes (`on_disk=False`):**
322
- Faster query performance
323
- Higher memory usage
324
- Best for frequently queried fields
325

326
**On-disk indexes (`on_disk=True`):**
327
- Lower memory usage
328
- Slightly slower query performance  
329
- Best for large collections or infrequently used fields
330

331
### Index Optimization Tips
332

333
1. **Index only necessary fields** - Each index consumes memory and slows writes
334
2. **Use appropriate index types** - Wrong index type reduces performance
335
3. **Consider cardinality** - High cardinality fields benefit more from indexing
336
4. **Monitor index usage** - Remove unused indexes to improve performance
337
5. **Balance memory vs. disk** - Use `on_disk=True` for less critical indexes
338

339
```python
340
# Example: Strategic indexing for a document collection
341
collection_name = "documents"
342

343
# High-cardinality field used in filters - keyword index
344
client.create_payload_index(
345
    collection_name=collection_name,
346
    field_name="document_id",
347
    field_schema=models.KeywordIndexParams(type="keyword", on_disk=False)
348
)
349

350
# Numeric field for range queries - integer index  
351
client.create_payload_index(
352
    collection_name=collection_name,
353
    field_name="timestamp",
354
    field_schema=models.IntegerIndexParams(type="integer", range=True, on_disk=True)
355
)
356

357
# Full-text searchable content - text index
358
client.create_payload_index(
359
    collection_name=collection_name,
360
    field_name="content",
361
    field_schema=models.TextIndexParams(
362
        type="text",
363
        tokenizer="word",
364
        lowercase=True,
365
        on_disk=True  # Large text index on disk
366
    )
367
)
368

369
# Boolean flag for filtering - bool index
370
client.create_payload_index(
371
    collection_name=collection_name,
372
    field_name="published",
373
    field_schema=models.BoolIndexParams(type="bool", on_disk=False)
374
)
375
```
376

377
## Query Optimization
378

379
### Using Indexed Fields
380

381
```python
382
# Efficient queries using indexed fields
383
from qdrant_client import models
384

385
# Keyword index query (exact match)
386
results = client.query_points(
387
    collection_name="documents",
388
    query=query_vector,
389
    query_filter=models.Filter(
390
        must=[
391
            models.FieldCondition(
392
                key="category",  # Indexed keyword field
393
                match=models.MatchValue(value="technology")
394
            )
395
        ]
396
    )
397
)
398

399
# Range query on indexed numeric field
400
results = client.query_points(
401
    collection_name="documents", 
402
    query=query_vector,
403
    query_filter=models.Filter(
404
        must=[
405
            models.FieldCondition(
406
                key="timestamp",  # Indexed integer field
407
                range=models.Range(
408
                    gte=1640995200,  # Jan 1, 2022
409
                    lte=1672531199   # Dec 31, 2022
410
                )
411
            )
412
        ]
413
    )
414
)
415

416
# Full-text search on indexed text field
417
results = client.query_points(
418
    collection_name="documents",
419
    query=query_vector,
420
    query_filter=models.Filter(
421
        must=[
422
            models.FieldCondition(
423
                key="content",  # Indexed text field
424
                match=models.MatchText(text="machine learning")
425
            )
426
        ]
427
    )
428
)
429
```

Version

Tile

Files

indexing-optimization.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

indexing-optimization.mddocs/