Tessl Tile for pypi/pymilvus@2.6.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

data-management.md index-management.md index.md milvus-client.md orm-collection.md search-operations.md types-enums.md user-management.md utility-functions.md

types-enums.mddocs/

0
# Types and Enums
1

2
PyMilvus provides a comprehensive type system including data types, enums for configuration options, status codes, and complex type definitions. This reference covers all enumeration values, type constants, and data structures used throughout the API.
3

4
## Data Types
5

6
### DataType Enum
7

8
```python { .api }
9
from pymilvus import DataType
10

11
class DataType(IntEnum):
12
    NONE = 0
13
    BOOL = 1
14
    INT8 = 2
15
    INT16 = 3
16
    INT32 = 4
17
    INT64 = 5
18
    FLOAT = 10
19
    DOUBLE = 11
20
    STRING = 20          # Deprecated, use VARCHAR
21
    VARCHAR = 21
22
    ARRAY = 22
23
    JSON = 23
24
    GEOMETRY = 24        # For geospatial data
25
    BINARY_VECTOR = 100
26
    FLOAT_VECTOR = 101
27
    FLOAT16_VECTOR = 102      # Half precision vectors
28
    BFLOAT16_VECTOR = 103     # Brain float 16 vectors
29
    SPARSE_FLOAT_VECTOR = 104 # Sparse vectors for text search
30
    INT8_VECTOR = 105         # Quantized vectors
31
    UNKNOWN = 999
32
```
33

34
### Data Type Usage Examples
35

36
```python { .api }
37
from pymilvus import FieldSchema, DataType
38

39
# Scalar data types
40
bool_field = FieldSchema("active", DataType.BOOL)
41
int_field = FieldSchema("count", DataType.INT64) 
42
float_field = FieldSchema("score", DataType.DOUBLE)
43
text_field = FieldSchema("content", DataType.VARCHAR, max_length=1000)
44

45
# Vector data types
46
dense_vector = FieldSchema("embedding", DataType.FLOAT_VECTOR, dim=768)
47
binary_vector = FieldSchema("hash", DataType.BINARY_VECTOR, dim=128)
48
sparse_vector = FieldSchema("sparse_embed", DataType.SPARSE_FLOAT_VECTOR)
49

50
# Half-precision vectors for memory efficiency
51
fp16_vector = FieldSchema("fp16_embed", DataType.FLOAT16_VECTOR, dim=512)
52
bf16_vector = FieldSchema("bf16_embed", DataType.BFLOAT16_VECTOR, dim=512)
53

54
# Quantized vectors for storage efficiency
55
int8_vector = FieldSchema("quantized", DataType.INT8_VECTOR, dim=256)
56

57
# Complex data types
58
json_field = FieldSchema("metadata", DataType.JSON)
59
array_field = FieldSchema("tags", DataType.ARRAY, 
60
                         max_capacity=20, element_type=DataType.VARCHAR)
61

62
# Geospatial data (experimental)
63
geo_field = FieldSchema("location", DataType.GEOMETRY)
64
```
65

66
### Data Type Characteristics
67

68
```python { .api }
69
# Vector type memory usage comparison (per vector)
70
vector_memory_usage = {
71
    DataType.FLOAT_VECTOR: "dimension * 4 bytes (32-bit floats)",
72
    DataType.FLOAT16_VECTOR: "dimension * 2 bytes (16-bit floats)", 
73
    DataType.BFLOAT16_VECTOR: "dimension * 2 bytes (bfloat16)",
74
    DataType.INT8_VECTOR: "dimension * 1 byte (quantized)",
75
    DataType.BINARY_VECTOR: "dimension / 8 bytes (packed bits)",
76
    DataType.SPARSE_FLOAT_VECTOR: "variable (only non-zero values stored)"
77
}
78

79
# Example: 768-dimension vectors
80
dimension = 768
81
for vector_type, formula in vector_memory_usage.items():
82
    if "dimension *" in formula:
83
        multiplier = float(formula.split(" * ")[1].split(" ")[0])
84
        memory_bytes = dimension * multiplier
85
        print(f"{vector_type.name}: {memory_bytes} bytes per vector")
86
    else:
87
        print(f"{vector_type.name}: {formula}")
88
```
89

90
## Index Types
91

92
### IndexType Enum
93

94
```python { .api }
95
from pymilvus import IndexType
96

97
class IndexType(IntEnum):
98
    INVALID = 0
99
    FLAT = 1              # Exact search, 100% recall
100
    IVFLAT = 2           # Alias: IVF_FLAT
101
    IVF_SQ8 = 3          # IVF with scalar quantization
102
    RNSG = 4             # Random Navigable Small Graph (deprecated)
103
    IVF_SQ8H = 5         # Alias: IVF_SQ8_H
104
    IVF_PQ = 6           # IVF with product quantization
105
    HNSW = 11            # Hierarchical Navigable Small World
106
    ANNOY = 12           # Approximate Nearest Neighbors Oh Yeah
107
    AUTOINDEX = 13       # Automatic index selection
108
    SPARSE_INVERTED_INDEX = 14  # For sparse vectors
109
    SPARSE_WAND = 15     # Weak AND for sparse vectors
110
    TRIE = 16            # For string prefix matching
111
    STL_SORT = 17        # For numeric range queries
112
    INVERTED = 18        # For JSON and array fields
113
    GPU_IVF_FLAT = 19    # GPU-accelerated IVF_FLAT
114
    GPU_IVF_PQ = 20      # GPU-accelerated IVF_PQ
115
    GPU_BRUTE_FORCE = 21 # GPU exact search
116
```
117

118
### Index Type Usage by Field Type
119

120
```python { .api }
121
# Vector field indexes
122
vector_indexes = {
123
    DataType.FLOAT_VECTOR: [
124
        IndexType.FLAT,           # Exact search
125
        IndexType.IVF_FLAT,       # Good accuracy/speed balance
126
        IndexType.IVF_PQ,         # Memory efficient
127
        IndexType.HNSW,           # Fast search
128
        IndexType.ANNOY,          # Memory efficient
129
        IndexType.AUTOINDEX       # Automatic selection
130
    ],
131
    DataType.BINARY_VECTOR: [
132
        IndexType.FLAT,           # Exact Hamming distance
133
        IndexType.IVF_FLAT        # Approximate Hamming search
134
    ],
135
    DataType.SPARSE_FLOAT_VECTOR: [
136
        IndexType.SPARSE_INVERTED_INDEX,  # Standard for sparse vectors
137
        IndexType.SPARSE_WAND             # Optimized sparse search
138
    ]
139
}
140

141
# Scalar field indexes
142
scalar_indexes = {
143
    DataType.VARCHAR: [IndexType.TRIE],      # String prefix/equality
144
    DataType.INT64: [IndexType.STL_SORT],    # Numeric range queries
145
    DataType.DOUBLE: [IndexType.STL_SORT],   # Numeric range queries  
146
    DataType.JSON: [IndexType.INVERTED],     # Key-value queries
147
    DataType.ARRAY: [IndexType.INVERTED]     # Array containment queries
148
}
149

150
# GPU-accelerated indexes (requires GPU-enabled Milvus)
151
gpu_indexes = {
152
    DataType.FLOAT_VECTOR: [
153
        IndexType.GPU_IVF_FLAT,
154
        IndexType.GPU_IVF_PQ,
155
        IndexType.GPU_BRUTE_FORCE
156
    ]
157
}
158
```
159

160
## Function Types
161

162
### FunctionType Enum
163

164
```python { .api }
165
from pymilvus import FunctionType
166

167
class FunctionType(IntEnum):
168
    UNKNOWN = 0
169
    BM25 = 1          # Sparse vector generation from text
170
    TEXTEMBEDDING = 2 # Dense vector generation from text
171
    RERANK = 3        # Relevance scoring for reranking
172
```
173

174
### Function Type Usage
175

176
```python { .api }
177
from pymilvus import Function, FunctionType
178

179
# BM25 function for sparse text vectors
180
bm25_function = Function(
181
    name="text_bm25",
182
    function_type=FunctionType.BM25,
183
    input_field_names=["content"],
184
    output_field_names=["bm25_sparse"],
185
    params={
186
        "language": "en",
187
        "k1": 1.2,        # BM25 parameter
188
        "b": 0.75         # BM25 parameter
189
    }
190
)
191

192
# Text embedding function for dense vectors
193
embedding_function = Function(
194
    name="text_embedding",
195
    function_type=FunctionType.TEXTEMBEDDING,
196
    input_field_names=["title", "description"],
197
    output_field_names=["text_vector"],
198
    params={
199
        "model_name": "sentence-transformers/all-MiniLM-L6-v2",
200
        "model_config": {
201
            "device": "cuda:0",
202
            "normalize_embeddings": True
203
        }
204
    }
205
)
206

207
# Reranking function for relevance scoring
208
rerank_function = Function(
209
    name="cross_encoder_rerank",
210
    function_type=FunctionType.RERANK,
211
    input_field_names=["query", "document"],
212
    output_field_names=["relevance_score"],
213
    params={
214
        "model_name": "cross-encoder/ms-marco-MiniLM-L-6-v2",
215
        "top_k": 100
216
    }
217
)
218
```
219

220
## Metric Types
221

222
### MetricType Constants
223

224
```python { .api }
225
# Distance metrics for vector similarity
226
class MetricType:
227
    L2 = "L2"                    # Euclidean distance
228
    IP = "IP"                    # Inner Product (cosine for normalized vectors)
229
    COSINE = "COSINE"           # Cosine similarity (auto-normalized)
230
    HAMMING = "HAMMING"         # Hamming distance (for binary vectors)
231
    JACCARD = "JACCARD"         # Jaccard similarity (for binary vectors)
232
    TANIMOTO = "TANIMOTO"       # Tanimoto coefficient
233
    SUBSTRUCTURE = "SUBSTRUCTURE" # Chemical substructure matching
234
    SUPERSTRUCTURE = "SUPERSTRUCTURE" # Chemical superstructure matching
235
```
236

237
### Metric Type Usage Guidelines
238

239
```python { .api }
240
# Metric selection by use case
241
metric_guidelines = {
242
    "general_embeddings": "L2",        # Most common for embeddings
243
    "normalized_embeddings": "COSINE", # For unit vectors
244
    "dot_product_similarity": "IP",    # When vectors aren't normalized
245
    "binary_hashes": "HAMMING",        # For binary vectors
246
    "molecular_fingerprints": "TANIMOTO", # Chemical similarity
247
    "sparse_vectors": "IP"             # For BM25/TF-IDF vectors
248
}
249

250
# Example index creation with different metrics
251
from pymilvus import MilvusClient
252

253
client = MilvusClient()
254

255
# L2 distance for general embeddings
256
client.create_index("documents", "embedding", {
257
    "index_type": "HNSW",
258
    "metric_type": "L2",
259
    "params": {"M": 32, "efConstruction": 400}
260
})
261

262
# Cosine similarity for normalized text embeddings
263
client.create_index("articles", "text_vector", {
264
    "index_type": "IVF_FLAT",
265
    "metric_type": "COSINE", 
266
    "params": {"nlist": 1024}
267
})
268

269
# Inner product for sparse vectors
270
client.create_index("bm25_collection", "sparse_vector", {
271
    "index_type": "SPARSE_INVERTED_INDEX",
272
    "metric_type": "IP",
273
    "params": {"drop_ratio_build": 0.2}
274
})
275
```
276

277
## State Enums
278

279
### LoadState
280

281
```python { .api }
282
class LoadState:
283
    NotExist = "NotExist"      # Collection doesn't exist
284
    NotLoad = "NotLoad"        # Collection not loaded into memory
285
    Loading = "Loading"        # Currently loading
286
    Loaded = "Loaded"         # Fully loaded and ready
287
```
288

289
### IndexState
290

291
```python { .api }
292
class IndexState:
293
    IndexStateNone = "IndexStateNone"  # No index information
294
    Unissued = "Unissued"             # Index build not started
295
    InProgress = "InProgress"         # Index building in progress
296
    Finished = "Finished"             # Index build completed
297
    Failed = "Failed"                 # Index build failed
298
    Deleted = "Deleted"               # Index was deleted
299
```
300

301
### ConsistencyLevel
302

303
```python { .api }
304
class ConsistencyLevel:
305
    Strong = "Strong"           # Read your own writes immediately
306
    Bounded = "Bounded"         # Bounded staleness (default)
307
    Eventually = "Eventually"   # Eventual consistency
308
    Session = "Session"         # Session consistency
309
    Customized = "Customized"   # Custom consistency level
310
```
311

312
### State Usage Examples
313

314
```python { .api }
315
from pymilvus import MilvusClient, utility
316

317
client = MilvusClient()
318

319
# Check loading state
320
load_state = client.get_load_state("my_collection")
321
state = load_state.get("state", "Unknown")
322

323
if state == "NotLoad":
324
    print("Collection not loaded, loading now...")
325
    client.load_collection("my_collection")
326
elif state == "Loading":
327
    print("Collection is currently loading...")
328
    utility.wait_for_loading_complete("my_collection")
329
elif state == "Loaded":
330
    print("Collection ready for search")
331

332
# Check index building state
333
index_progress = utility.index_building_progress("my_collection", "vector_field")
334
index_state = index_progress.get("index_state", "Unknown")
335

336
state_messages = {
337
    "Unissued": "Index build queued",
338
    "InProgress": f"Building index: {index_progress.get('progress', 0)}%",
339
    "Finished": "Index build completed successfully",
340
    "Failed": "Index build failed - check logs"
341
}
342

343
print(state_messages.get(index_state, f"Unknown state: {index_state}"))
344

345
# Use different consistency levels
346
search_results = client.search(
347
    "my_collection",
348
    data=[[0.1] * 768],
349
    limit=10,
350
    consistency_level="Strong"  # Ensure latest data
351
)
352
```
353

354
## Complex Type Definitions
355

356
### Status
357

358
```python { .api }
359
from pymilvus.client.types import Status
360

361
class Status:
362
    SUCCESS = 0
363
    UNEXPECTED_ERROR = 1
364
    CONNECT_FAILED = 2
365
    PERMISSION_DENIED = 3
366
    COLLECTION_NOT_EXISTS = 4
367
    ILLEGAL_ARGUMENT = 5
368
    ILLEGAL_DIMENSION = 7
369
    ILLEGAL_INDEX_TYPE = 8
370
    ILLEGAL_COLLECTION_NAME = 9
371
    ILLEGAL_TOPK = 10
372
    ILLEGAL_ROWRECORD = 11
373
    ILLEGAL_VECTOR_ID = 12
374
    ILLEGAL_SEARCH_RESULT = 13
375
    FILE_NOT_FOUND = 14
376
    META_FAILED = 15
377
    CACHE_FAILED = 16
378
    CANNOT_CREATE_FOLDER = 17
379
    CANNOT_CREATE_FILE = 18
380
    CANNOT_DELETE_FOLDER = 19
381
    CANNOT_DELETE_FILE = 20
382
    BUILD_INDEX_ERROR = 21
383
    ILLEGAL_NLIST = 22
384
    ILLEGAL_METRIC_TYPE = 23
385
    OUT_OF_MEMORY = 24
386
    
387
    def __init__(self, code: int = SUCCESS, message: str = ""):
388
        self.code = code
389
        self.message = message
390
    
391
    def OK(self) -> bool:
392
        """Return True if status indicates success"""
393
        return self.code == Status.SUCCESS
394
```
395

396
### BulkInsertState
397

398
```python { .api }
399
class BulkInsertState:
400
    def __init__(self):
401
        self.task_id: int = 0
402
        self.state: str = ""           # "pending", "importing", "completed", "failed"
403
        self.row_count: int = 0
404
        self.id_list: List[int] = []
405
        self.infos: Dict[str, Any] = {}
406
        self.create_time: int = 0
407
        self.progress: float = 0.0
408
```
409

410
### Replica Information
411

412
```python { .api }
413
class Replica:
414
    def __init__(self):
415
        self.id: int = 0
416
        self.collection_id: int = 0
417
        self.partition_ids: List[int] = []
418
        self.shard_replicas: List[Shard] = []
419
        self.node_ids: List[int] = []
420
        self.resource_group: str = ""
421

422
class Shard:
423
    def __init__(self):
424
        self.channel_name: str = ""
425
        self.shard_leader: int = 0
426
        self.shard_nodes: List[int] = []
427
        self.dm_channel_name: str = ""
428
```
429

430
### ResourceGroupInfo
431

432
```python { .api }
433
class ResourceGroupInfo:
434
    def __init__(self):
435
        self.name: str = ""
436
        self.capacity: int = 0
437
        self.num_available_node: int = 0
438
        self.num_loaded_replica: Dict[str, int] = {}
439
        self.num_outgoing_node: Dict[str, int] = {}
440
        self.num_incoming_node: Dict[str, int] = {}
441
        self.config: Dict[str, Any] = {}
442
        self.nodes: List[int] = []
443
```
444

445
## Type Validation Utilities
446

447
### Data Type Validation
448

449
```python { .api }
450
def validate_data_type_compatibility(field_type: DataType, value: Any) -> bool:
451
    """Validate if a value is compatible with a field type"""
452
    
453
    type_validators = {
454
        DataType.BOOL: lambda x: isinstance(x, bool),
455
        DataType.INT8: lambda x: isinstance(x, int) and -128 <= x <= 127,
456
        DataType.INT16: lambda x: isinstance(x, int) and -32768 <= x <= 32767,
457
        DataType.INT32: lambda x: isinstance(x, int) and -2147483648 <= x <= 2147483647,
458
        DataType.INT64: lambda x: isinstance(x, int),
459
        DataType.FLOAT: lambda x: isinstance(x, (int, float)),
460
        DataType.DOUBLE: lambda x: isinstance(x, (int, float)),
461
        DataType.VARCHAR: lambda x: isinstance(x, str),
462
        DataType.JSON: lambda x: isinstance(x, (dict, list, str, int, float, bool, type(None))),
463
        DataType.ARRAY: lambda x: isinstance(x, list),
464
        DataType.FLOAT_VECTOR: lambda x: isinstance(x, list) and all(isinstance(v, (int, float)) for v in x),
465
        DataType.BINARY_VECTOR: lambda x: isinstance(x, (list, bytes)),
466
        DataType.SPARSE_FLOAT_VECTOR: lambda x: isinstance(x, dict) or isinstance(x, list)
467
    }
468
    
469
    validator = type_validators.get(field_type)
470
    if validator:
471
        return validator(value)
472
    
473
    return False
474

475
# Usage examples
476
test_values = [
477
    (DataType.INT32, 12345, True),
478
    (DataType.INT32, 3000000000, False),  # Too large for INT32
479
    (DataType.FLOAT_VECTOR, [0.1, 0.2, 0.3], True),
480
    (DataType.FLOAT_VECTOR, [1, 2, "3"], False),  # Invalid vector element
481
    (DataType.VARCHAR, "hello", True),
482
    (DataType.JSON, {"key": "value"}, True)
483
]
484

485
for field_type, value, expected in test_values:
486
    result = validate_data_type_compatibility(field_type, value)
487
    status = "✓" if result == expected else "✗"
488
    print(f"{status} {field_type.name}: {value} -> {result}")
489
```
490

491
### Index Type Compatibility
492

493
```python { .api }
494
def get_compatible_index_types(field_type: DataType) -> List[IndexType]:
495
    """Get compatible index types for a field type"""
496
    
497
    compatibility_map = {
498
        DataType.FLOAT_VECTOR: [
499
            IndexType.FLAT, IndexType.IVF_FLAT, IndexType.IVF_PQ, 
500
            IndexType.HNSW, IndexType.ANNOY, IndexType.AUTOINDEX
501
        ],
502
        DataType.BINARY_VECTOR: [
503
            IndexType.FLAT, IndexType.IVF_FLAT
504
        ],
505
        DataType.SPARSE_FLOAT_VECTOR: [
506
            IndexType.SPARSE_INVERTED_INDEX, IndexType.SPARSE_WAND
507
        ],
508
        DataType.VARCHAR: [IndexType.TRIE],
509
        DataType.INT64: [IndexType.STL_SORT],
510
        DataType.INT32: [IndexType.STL_SORT],
511
        DataType.DOUBLE: [IndexType.STL_SORT],
512
        DataType.FLOAT: [IndexType.STL_SORT],
513
        DataType.JSON: [IndexType.INVERTED],
514
        DataType.ARRAY: [IndexType.INVERTED]
515
    }
516
    
517
    return compatibility_map.get(field_type, [])
518

519
# Check index compatibility
520
field_types = [DataType.FLOAT_VECTOR, DataType.VARCHAR, DataType.JSON]
521

522
for field_type in field_types:
523
    compatible_indexes = get_compatible_index_types(field_type)
524
    print(f"{field_type.name} compatible indexes:")
525
    for index_type in compatible_indexes:
526
        print(f"  - {index_type.name}")
527
```
528

529
### Metric Type Compatibility
530

531
```python { .api }
532
def get_compatible_metrics(field_type: DataType) -> List[str]:
533
    """Get compatible metric types for a field type"""
534
    
535
    metric_compatibility = {
536
        DataType.FLOAT_VECTOR: ["L2", "IP", "COSINE"],
537
        DataType.BINARY_VECTOR: ["HAMMING", "JACCARD", "TANIMOTO", "SUBSTRUCTURE", "SUPERSTRUCTURE"],
538
        DataType.SPARSE_FLOAT_VECTOR: ["IP"],
539
        DataType.FLOAT16_VECTOR: ["L2", "IP", "COSINE"],
540
        DataType.BFLOAT16_VECTOR: ["L2", "IP", "COSINE"],
541
        DataType.INT8_VECTOR: ["L2", "IP", "COSINE"]
542
    }
543
    
544
    return metric_compatibility.get(field_type, [])
545

546
# Validate metric compatibility
547
vector_fields = [
548
    (DataType.FLOAT_VECTOR, "L2"),
549
    (DataType.BINARY_VECTOR, "HAMMING"),
550
    (DataType.SPARSE_FLOAT_VECTOR, "IP"),
551
    (DataType.FLOAT_VECTOR, "HAMMING")  # Invalid combination
552
]
553

554
for field_type, metric in vector_fields:
555
    compatible_metrics = get_compatible_metrics(field_type)
556
    is_compatible = metric in compatible_metrics
557
    status = "✓" if is_compatible else "✗"
558
    print(f"{status} {field_type.name} + {metric}: {is_compatible}")
559
```
560

561
## Configuration Constants
562

563
### Default Values
564

565
```python { .api }
566
class DefaultConfig:
567
    # Connection defaults
568
    DEFAULT_HOST = "localhost"
569
    DEFAULT_PORT = "19530"
570
    DEFAULT_URI = f"http://{DEFAULT_HOST}:{DEFAULT_PORT}"
571
    MILVUS_CONN_ALIAS = "default"
572
    MILVUS_CONN_TIMEOUT = 10.0
573
    
574
    # Data limits
575
    MaxVarCharLength = 65535
576
    MaxArrayCapacity = 4096
577
    MaxDimension = 32768
578
    
579
    # Encoding
580
    EncodeProtocol = "utf-8"
581
    
582
    # Index defaults
583
    DefaultIndexType = IndexType.AUTOINDEX
584
    DefaultMetricType = "L2"
585
    
586
    # Search defaults
587
    DefaultSearchLimit = 10
588
    DefaultQueryLimit = 16384
589
    DefaultBatchSize = 1000
590
```
591

592
### Environment Configuration
593

594
```python { .api }
595
import os
596

597
# Environment-based configuration
598
def get_milvus_config():
599
    """Get Milvus configuration from environment variables"""
600
    
601
    config = {
602
        "uri": os.getenv("MILVUS_URI", DefaultConfig.DEFAULT_URI),
603
        "user": os.getenv("MILVUS_USER", ""),
604
        "password": os.getenv("MILVUS_PASSWORD", ""),
605
        "db_name": os.getenv("MILVUS_DB_NAME", ""),
606
        "timeout": float(os.getenv("MILVUS_TIMEOUT", DefaultConfig.MILVUS_CONN_TIMEOUT))
607
    }
608
    
609
    return config
610

611
# Use environment configuration
612
config = get_milvus_config()
613
client = MilvusClient(**config)
614
```
615

616
PyMilvus types and enums provide a comprehensive type system for vector database operations, ensuring type safety and providing clear configuration options for all aspects of data management, indexing, and search operations.

Version

Tile

Files

types-enums.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

types-enums.mddocs/