Tessl Tile for pypi/qdrant-client@1.15.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

client-setup.md clustering-sharding.md collection-management.md fastembed-integration.md index.md indexing-optimization.md search-query.md snapshots-backup.md vector-operations.md

fastembed-integration.mddocs/

0
# FastEmbed Integration
1

2
Automatic embedding generation for text and images using the FastEmbed library, enabling semantic search without manual vector creation.
3

4
## Installation
5

6
```bash
7
pip install qdrant-client[fastembed]
8
# or for GPU support
9
pip install qdrant-client[fastembed-gpu]
10
```
11

12
## Capabilities
13

14
### Embedding Size Discovery
15

16
Get embedding dimensions for supported models.
17

18
```python { .api }
19
def get_embedding_size(self, model_name: str) -> int:
20
    """
21
    Get embedding size for a model.
22

23
    Parameters:
24
    - model_name: Name of the embedding model
25

26
    Returns:
27
        int: Vector dimension for the model
28
    """
29
```
30

31
Usage example:
32

33
```python
34
# Get embedding size for a model
35
size = client.get_embedding_size("sentence-transformers/all-MiniLM-L6-v2")
36
print(f"Embedding size: {size}")  # 384
37

38
# Create collection with correct size
39
client.create_collection(
40
    collection_name="semantic_collection",
41
    vectors_config=models.VectorParams(size=size, distance=models.Distance.COSINE)
42
)
43
```
44

45
### Document Upload with Auto-Embedding
46

47
Upload documents with automatic embedding generation.
48

49
```python { .api }
50
def upload_collection(
51
    self,
52
    collection_name: str,
53
    vectors: Union[
54
        Iterable[VectorStruct],
55
        Iterable[PointStruct], 
56
        Iterable[Record],
57
        Iterable[Document],
58
        Iterable[ImageDocument]
59
    ],
60
    ids: Optional[Iterable[PointId]] = None,
61
    batch_size: int = 100,
62
    parallel: int = 1,
63
    max_retries: int = 3,
64
    payload: Optional[Iterable[Payload]] = None,
65
    wait: bool = True,
66
    shard_key_selector: Optional[ShardKeySelector] = None,
67
) -> None:
68
    """
69
    Upload collection with automatic embedding generation.
70

71
    Parameters:
72
    - collection_name: Name of the collection
73
    - vectors: Documents, images, or vectors to upload
74
    - ids: Point IDs (generated if not provided)
75
    - batch_size: Number of items per batch
76
    - parallel: Number of parallel processing threads
77
    - max_retries: Maximum retry attempts
78
    - payload: Additional metadata for points
79
    - wait: Wait for operation to complete
80
    - shard_key_selector: Shard key for routing
81
    """
82
```
83

84
Usage examples:
85

86
```python
87
from qdrant_client import models
88

89
# Upload text documents with auto-embedding
90
model_name = "sentence-transformers/all-MiniLM-L6-v2"
91
documents = [
92
    models.Document(text="Qdrant has Langchain integrations", model=model_name),
93
    models.Document(text="Qdrant also has Llama Index integrations", model=model_name),
94
    models.Document(text="Vector databases are useful for AI applications", model=model_name)
95
]
96

97
client.upload_collection(
98
    collection_name="semantic_search",
99
    vectors=documents,
100
    ids=[1, 2, 3],
101
    payload=[
102
        {"source": "langchain-docs"},
103
        {"source": "llamaindex-docs"},
104
        {"source": "general"}
105
    ]
106
)
107

108
# Upload images with auto-embedding
109
image_documents = [
110
    models.ImageDocument(image="path/to/image1.jpg", model="clip-ViT-B-32"),
111
    models.ImageDocument(image="path/to/image2.jpg", model="clip-ViT-B-32")
112
]
113

114
client.upload_collection(
115
    collection_name="image_search",
116
    vectors=image_documents,
117
    payload=[{"type": "photo"}, {"type": "illustration"}]
118
)
119
```
120

121
### Semantic Search
122

123
Perform semantic search using text or image queries with automatic embedding.
124

125
```python { .api }
126
def query_qdrant(
127
    self,
128
    collection_name: str,
129
    query: Union[str, Document, ImageDocument],
130
    query_filter: Optional[Filter] = None,
131
    limit: int = 10,
132
    search_params: Optional[SearchParams] = None,
133
    **kwargs
134
) -> List[QueryResponse]:
135
    """
136
    Semantic search with automatic query embedding.
137

138
    Parameters:
139
    - collection_name: Name of the collection
140
    - query: Text query, Document, or ImageDocument
141
    - query_filter: Filter conditions for results
142
    - limit: Maximum number of results
143
    - search_params: Search algorithm parameters
144

145
    Returns:
146
        List[QueryResponse]: Semantic search results
147
    """
148
```
149

150
Usage examples:
151

152
```python
153
# Text semantic search
154
results = client.query_qdrant(
155
    collection_name="semantic_search",
156
    query=models.Document(text="AI and machine learning", model=model_name),
157
    limit=5
158
)
159

160
# Simple text query (uses default model)
161
results = client.query_qdrant(
162
    collection_name="semantic_search",
163
    query="vector search applications",
164
    limit=5
165
)
166

167
# Image semantic search
168
results = client.query_qdrant(
169
    collection_name="image_search",
170
    query=models.ImageDocument(image="query_image.jpg", model="clip-ViT-B-32"),
171
    limit=5
172
)
173
```
174

175
### Manual Embedding Generation
176

177
Generate embeddings manually for custom processing.
178

179
```python { .api }
180
def embed_documents(
181
    self,
182
    documents: List[Union[str, Document]],
183
    model_name: Optional[str] = None,
184
    **kwargs
185
) -> List[List[float]]:
186
    """
187
    Generate embeddings for text documents.
188

189
    Parameters:
190
    - documents: List of text strings or Document objects
191
    - model_name: Embedding model name
192

193
    Returns:
194
        List[List[float]]: Generated embeddings
195
    """
196

197
def embed_sparse_documents(
198
    self,
199
    documents: List[Union[str, Document]],
200
    model_name: Optional[str] = None,
201
    **kwargs
202
) -> List[SparseVector]:
203
    """
204
    Generate sparse embeddings for text documents.
205

206
    Parameters:
207
    - documents: List of text strings or Document objects
208
    - model_name: Sparse embedding model name
209

210
    Returns:
211
        List[SparseVector]: Generated sparse embeddings
212
    """
213
```
214

215
## Document Types
216

217
### Text Documents
218

219
```python { .api }
220
class Document(BaseModel):
221
    text: str  # Text content to embed
222
    model: str  # Embedding model name
223
    options: Optional[Dict[str, Any]] = None  # Model-specific options
224

225
# Document options examples:
226
# {"cuda": True}  # Use GPU acceleration
227
# {"normalize": True}  # Normalize embeddings
228
# {"batch_size": 32}  # Custom batch size
229
```
230

231
### Image Documents  
232

233
```python { .api }
234
class ImageDocument(BaseModel):
235
    image: Union[str, bytes, ImageInput]  # Image path, bytes, or PIL Image
236
    model: str  # Image embedding model name
237
    options: Optional[Dict[str, Any]] = None  # Model-specific options
238
```
239

240
### Query Response
241

242
```python { .api }
243
class QueryResponse(BaseModel):
244
    id: Union[str, int]  # Point ID
245
    embedding: Optional[List[float]]  # Generated embedding
246
    sparse_embedding: Optional[SparseVector] = None  # Sparse embedding
247
    metadata: Dict[str, Any]  # Point payload
248
    document: str  # Original document text
249
    score: float  # Similarity score
250
```
251

252
## Model Support
253

254
### Text Embedding Models
255

256
Popular text embedding models supported by FastEmbed:
257

258
- `"sentence-transformers/all-MiniLM-L6-v2"` (384 dimensions)
259
- `"sentence-transformers/all-mpnet-base-v2"` (768 dimensions)
260
- `"BAAI/bge-small-en-v1.5"` (384 dimensions)
261
- `"BAAI/bge-base-en-v1.5"` (768 dimensions)
262
- `"BAAI/bge-large-en-v1.5"` (1024 dimensions)
263

264
### Image Embedding Models
265

266
Image embedding models for visual search:
267

268
- `"clip-ViT-B-32"` (512 dimensions)
269
- `"clip-ViT-L-14"` (768 dimensions)
270

271
### Sparse Models
272

273
Sparse embedding models for keyword matching:
274

275
- `"Qdrant/bm25"` (sparse vectors)
276
- `"prithivida/Splade_PP_en_v1"` (sparse vectors)
277

278
### Model Discovery
279

280
```python { .api }
281
class FastEmbedMisc:
282
    @classmethod
283
    def is_installed(cls) -> bool:
284
        """Check if FastEmbed is installed."""
285
    
286
    @classmethod
287
    def get_text_models(cls) -> Set[str]:
288
        """Get available text embedding models."""
289
    
290
    @classmethod
291
    def get_image_models(cls) -> Set[str]:
292
        """Get available image embedding models."""
293
    
294
    @classmethod  
295
    def get_sparse_models(cls) -> Set[str]:
296
        """Get available sparse embedding models."""
297
```
298

299
## Advanced Features
300

301
### GPU Acceleration
302

303
Enable GPU acceleration for faster embedding generation:
304

305
```python
306
# Install GPU version
307
# pip install qdrant-client[fastembed-gpu]
308

309
# Use GPU in document options
310
document = models.Document(
311
    text="Accelerated embedding generation",
312
    model="sentence-transformers/all-MiniLM-L6-v2",
313
    options={"cuda": True}
314
)
315
```
316

317
### Batch Processing
318

319
Optimize embedding generation with batch processing:
320

321
```python
322
# Large batch upload with optimized settings
323
documents = [models.Document(text=f"Document {i}", model=model_name) for i in range(10000)]
324

325
client.upload_collection(
326
    collection_name="large_collection",
327
    vectors=documents,
328
    batch_size=200,  # Larger batches for efficiency
329
    parallel=4       # Multiple parallel workers
330
)
331
```
332

333
### Hybrid Search
334

335
Combine dense and sparse embeddings for hybrid search:
336

337
```python
338
from qdrant_client.hybrid import FusionQuery
339

340
# Upload documents with both dense and sparse embeddings
341
dense_docs = [models.Document(text=text, model="all-MiniLM-L6-v2") for text in texts]
342
sparse_docs = [models.Document(text=text, model="Qdrant/bm25") for text in texts]
343

344
# Create collection with multiple vectors
345
client.create_collection(
346
    collection_name="hybrid_collection",
347
    vectors_config={
348
        "dense": models.VectorParams(size=384, distance=models.Distance.COSINE),
349
        "sparse": models.SparseVectorParams()
350
    }
351
)
352

353
# Perform hybrid search
354
fusion_query = FusionQuery(
355
    dense_query=models.Document(text="search query", model="all-MiniLM-L6-v2"),
356
    sparse_query=models.Document(text="search query", model="Qdrant/bm25")
357
)
358

359
results = client.query_points(
360
    collection_name="hybrid_collection",
361
    query=fusion_query,
362
    limit=10
363
)
364
```
365

366
## Error Handling
367

368
Common FastEmbed-related exceptions:
369

370
```python { .api }
371
class FastEmbedNotInstalled(Exception):
372
    """Raised when FastEmbed is not installed but required."""
373

374
class ModelNotFound(Exception):
375
    """Raised when specified model is not available."""
376

377
class EmbeddingGenerationError(Exception):
378
    """Raised when embedding generation fails."""
379
```
380

381
Check FastEmbed availability:
382

383
```python
384
from qdrant_client.fastembed_common import FastEmbedMisc
385

386
if not FastEmbedMisc.is_installed():
387
    print("FastEmbed not installed. Install with: pip install qdrant-client[fastembed]")
388
else:
389
    print("Available text models:", FastEmbedMisc.get_text_models())
390
```

Version

Tile

Files

fastembed-integration.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

fastembed-integration.mddocs/