tessl/pypi-qdrant-client

Client library for the Qdrant vector search engine

—

Pending

Overview

Eval results

Files

Indexing & Optimization

Name: tessl/pypi-qdrant-client
Author: tessl

Payload field indexing, collection optimization, and performance tuning capabilities.

Capabilities

Payload Field Indexing

Create indexes on payload fields for faster filtering.

def create_payload_index(
    self,
    collection_name: str,
    field_name: str,
    field_schema: Optional[PayloadFieldSchema] = None,
    wait: bool = True,
    ordering: Optional[WriteOrdering] = None,
    **kwargs
) -> UpdateResult:
    """
    Create index on payload field.

    Parameters:
    - collection_name: Name of the collection
    - field_name: Payload field name to index
    - field_schema: Index configuration and field type
    - wait: Wait for operation to complete
    - ordering: Write ordering guarantees

    Returns:
        UpdateResult: Result of the operation
    """

def delete_payload_index(
    self,
    collection_name: str,
    field_name: str,
    wait: bool = True,
    ordering: Optional[WriteOrdering] = None,
    **kwargs
) -> UpdateResult:
    """
    Delete payload field index.

    Parameters:
    - collection_name: Name of the collection
    - field_name: Payload field name
    - wait: Wait for operation to complete
    - ordering: Write ordering guarantees

    Returns:
        UpdateResult: Result of the operation
    """

def list_payload_indexes(
    self,
    collection_name: str,
    **kwargs
) -> Dict[str, PayloadIndexInfo]:
    """
    List all payload indexes in collection.

    Parameters:
    - collection_name: Name of the collection

    Returns:
        Dict[str, PayloadIndexInfo]: Mapping of field names to index info
    """

Usage examples:

from qdrant_client import models

# Create keyword index for exact matching
client.create_payload_index(
    collection_name="documents",
    field_name="category",
    field_schema=models.KeywordIndexParams(
        type="keyword",
        on_disk=False
    )
)

# Create integer index for numeric fields
client.create_payload_index(
    collection_name="documents", 
    field_name="timestamp",
    field_schema=models.IntegerIndexParams(
        type="integer",
        range=True,
        on_disk=True
    )
)

# Create text index for full-text search
client.create_payload_index(
    collection_name="documents",
    field_name="content",
    field_schema=models.TextIndexParams(
        type="text",
        tokenizer="word",
        min_token_len=2,
        max_token_len=20,
        lowercase=True,
        on_disk=True
    )
)

# Create geo index for geographic queries
client.create_payload_index(
    collection_name="locations",
    field_name="coordinates",
    field_schema=models.GeoIndexParams(
        type="geo",
        on_disk=False
    )
)

Collection Optimization

Optimize collection storage and search performance.

def optimize_collection(
    self,
    collection_name: str,
    wait: bool = True,
    **kwargs
) -> UpdateResult:
    """
    Optimize collection by rebuilding indexes and compacting storage.

    Parameters:
    - collection_name: Name of the collection
    - wait: Wait for operation to complete

    Returns:
        UpdateResult: Result of the operation
    """

Vector Index Management

Manage vector indexes for search performance.

def recreate_index(
    self,
    collection_name: str,
    wait: bool = True,
    **kwargs
) -> UpdateResult:
    """
    Recreate vector index with current configuration.

    Parameters:
    - collection_name: Name of the collection
    - wait: Wait for operation to complete

    Returns:
        UpdateResult: Result of the operation
    """

Index Types

Keyword Index

For exact string matching and categorical fields.

class KeywordIndexParams(BaseModel):
    type: Literal["keyword"] = "keyword"
    on_disk: Optional[bool] = None  # Store index on disk

Best for:

Categories, tags, labels
User IDs, product codes
Enum values
Exact string matching

Integer Index

For numeric fields with range queries.

class IntegerIndexParams(BaseModel):
    type: Literal["integer"] = "integer"
    range: bool = True  # Enable range queries
    on_disk: Optional[bool] = None  # Store index on disk

Best for:

Timestamps, dates
Prices, quantities
User ratings, scores
Numeric IDs

Float Index

For floating-point numeric fields.

class FloatIndexParams(BaseModel):
    type: Literal["float"] = "float"
    range: bool = True  # Enable range queries
    on_disk: Optional[bool] = None  # Store index on disk

Best for:

Continuous measurements
Probabilities, percentages
Geographic coordinates (individual components)
Machine learning scores

Boolean Index

For boolean fields.

class BoolIndexParams(BaseModel):
    type: Literal["bool"] = "bool"
    on_disk: Optional[bool] = None  # Store index on disk

Best for:

Feature flags
Binary classifications
Yes/no fields

Geographic Index

For geographic coordinate fields.

class GeoIndexParams(BaseModel):
    type: Literal["geo"] = "geo"
    on_disk: Optional[bool] = None  # Store index on disk

Best for:

Latitude/longitude coordinates
Geographic bounding box queries
Radius-based location searches

Text Index

For full-text search capabilities.

class TextIndexParams(BaseModel):
    type: Literal["text"] = "text"
    tokenizer: TextIndexTokenizer = "word"  # Tokenization method
    min_token_len: Optional[int] = None  # Minimum token length
    max_token_len: Optional[int] = None  # Maximum token length
    lowercase: Optional[bool] = None  # Convert to lowercase
    on_disk: Optional[bool] = None  # Store index on disk

class TextIndexTokenizer(str, Enum):
    WORD = "word"  # Word-based tokenization
    WHITESPACE = "whitespace"  # Whitespace tokenization
    PREFIX = "prefix"  # Prefix-based tokenization

Best for:

Document content
Product descriptions
User comments
Search queries

Index Information

Index Status

class PayloadIndexInfo(BaseModel):
    data_type: PayloadSchemaType
    params: Optional[PayloadIndexParams] = None
    points: Optional[int] = None  # Number of indexed points

class PayloadSchemaType(str, Enum):
    KEYWORD = "keyword"
    INTEGER = "integer" 
    FLOAT = "float"
    GEO = "geo"
    TEXT = "text"
    BOOL = "bool"
    DATETIME = "datetime"

Performance Considerations

Index Selection Guidelines

Use keyword indexes when:

Exact matching on categorical data
Small number of unique values (< 10,000)
Frequent equality filters

Use integer/float indexes when:

Range queries (>, <, >=, <=)
Numeric comparisons
Sorting by numeric fields

Use text indexes when:

Full-text search required
Partial word matching needed
Search across large text fields

Use geo indexes when:

Location-based queries
Geographic filtering
Proximity searches

Index Storage Options

In-memory indexes (on_disk=False):

Faster query performance
Higher memory usage
Best for frequently queried fields

On-disk indexes (on_disk=True):

Lower memory usage
Slightly slower query performance
Best for large collections or infrequently used fields

Index Optimization Tips

Index only necessary fields - Each index consumes memory and slows writes
Use appropriate index types - Wrong index type reduces performance
Consider cardinality - High cardinality fields benefit more from indexing
Monitor index usage - Remove unused indexes to improve performance
Balance memory vs. disk - Use on_disk=True for less critical indexes

# Example: Strategic indexing for a document collection
collection_name = "documents"

# High-cardinality field used in filters - keyword index
client.create_payload_index(
    collection_name=collection_name,
    field_name="document_id",
    field_schema=models.KeywordIndexParams(type="keyword", on_disk=False)
)

# Numeric field for range queries - integer index  
client.create_payload_index(
    collection_name=collection_name,
    field_name="timestamp",
    field_schema=models.IntegerIndexParams(type="integer", range=True, on_disk=True)
)

# Full-text searchable content - text index
client.create_payload_index(
    collection_name=collection_name,
    field_name="content",
    field_schema=models.TextIndexParams(
        type="text",
        tokenizer="word",
        lowercase=True,
        on_disk=True  # Large text index on disk
    )
)

# Boolean flag for filtering - bool index
client.create_payload_index(
    collection_name=collection_name,
    field_name="published",
    field_schema=models.BoolIndexParams(type="bool", on_disk=False)
)

Query Optimization

Using Indexed Fields

# Efficient queries using indexed fields
from qdrant_client import models

# Keyword index query (exact match)
results = client.query_points(
    collection_name="documents",
    query=query_vector,
    query_filter=models.Filter(
        must=[
            models.FieldCondition(
                key="category",  # Indexed keyword field
                match=models.MatchValue(value="technology")
            )
        ]
    )
)

# Range query on indexed numeric field
results = client.query_points(
    collection_name="documents", 
    query=query_vector,
    query_filter=models.Filter(
        must=[
            models.FieldCondition(
                key="timestamp",  # Indexed integer field
                range=models.Range(
                    gte=1640995200,  # Jan 1, 2022
                    lte=1672531199   # Dec 31, 2022
                )
            )
        ]
    )
)

# Full-text search on indexed text field
results = client.query_points(
    collection_name="documents",
    query=query_vector,
    query_filter=models.Filter(
        must=[
            models.FieldCondition(
                key="content",  # Indexed text field
                match=models.MatchText(text="machine learning")
            )
        ]
    )
)

Install with Tessl CLI