0
# Types and Enums
1
2
PyMilvus provides a comprehensive type system including data types, enums for configuration options, status codes, and complex type definitions. This reference covers all enumeration values, type constants, and data structures used throughout the API.
3
4
## Data Types
5
6
### DataType Enum
7
8
```python { .api }
9
from pymilvus import DataType
10
11
class DataType(IntEnum):
12
NONE = 0
13
BOOL = 1
14
INT8 = 2
15
INT16 = 3
16
INT32 = 4
17
INT64 = 5
18
FLOAT = 10
19
DOUBLE = 11
20
STRING = 20 # Deprecated, use VARCHAR
21
VARCHAR = 21
22
ARRAY = 22
23
JSON = 23
24
GEOMETRY = 24 # For geospatial data
25
BINARY_VECTOR = 100
26
FLOAT_VECTOR = 101
27
FLOAT16_VECTOR = 102 # Half precision vectors
28
BFLOAT16_VECTOR = 103 # Brain float 16 vectors
29
SPARSE_FLOAT_VECTOR = 104 # Sparse vectors for text search
30
INT8_VECTOR = 105 # Quantized vectors
31
UNKNOWN = 999
32
```
33
34
### Data Type Usage Examples
35
36
```python { .api }
37
from pymilvus import FieldSchema, DataType
38
39
# Scalar data types
40
bool_field = FieldSchema("active", DataType.BOOL)
41
int_field = FieldSchema("count", DataType.INT64)
42
float_field = FieldSchema("score", DataType.DOUBLE)
43
text_field = FieldSchema("content", DataType.VARCHAR, max_length=1000)
44
45
# Vector data types
46
dense_vector = FieldSchema("embedding", DataType.FLOAT_VECTOR, dim=768)
47
binary_vector = FieldSchema("hash", DataType.BINARY_VECTOR, dim=128)
48
sparse_vector = FieldSchema("sparse_embed", DataType.SPARSE_FLOAT_VECTOR)
49
50
# Half-precision vectors for memory efficiency
51
fp16_vector = FieldSchema("fp16_embed", DataType.FLOAT16_VECTOR, dim=512)
52
bf16_vector = FieldSchema("bf16_embed", DataType.BFLOAT16_VECTOR, dim=512)
53
54
# Quantized vectors for storage efficiency
55
int8_vector = FieldSchema("quantized", DataType.INT8_VECTOR, dim=256)
56
57
# Complex data types
58
json_field = FieldSchema("metadata", DataType.JSON)
59
array_field = FieldSchema("tags", DataType.ARRAY,
60
max_capacity=20, element_type=DataType.VARCHAR)
61
62
# Geospatial data (experimental)
63
geo_field = FieldSchema("location", DataType.GEOMETRY)
64
```
65
66
### Data Type Characteristics
67
68
```python { .api }
69
# Vector type memory usage comparison (per vector)
70
vector_memory_usage = {
71
DataType.FLOAT_VECTOR: "dimension * 4 bytes (32-bit floats)",
72
DataType.FLOAT16_VECTOR: "dimension * 2 bytes (16-bit floats)",
73
DataType.BFLOAT16_VECTOR: "dimension * 2 bytes (bfloat16)",
74
DataType.INT8_VECTOR: "dimension * 1 byte (quantized)",
75
DataType.BINARY_VECTOR: "dimension / 8 bytes (packed bits)",
76
DataType.SPARSE_FLOAT_VECTOR: "variable (only non-zero values stored)"
77
}
78
79
# Example: 768-dimension vectors
80
dimension = 768
81
for vector_type, formula in vector_memory_usage.items():
82
if "dimension *" in formula:
83
multiplier = float(formula.split(" * ")[1].split(" ")[0])
84
memory_bytes = dimension * multiplier
85
print(f"{vector_type.name}: {memory_bytes} bytes per vector")
86
else:
87
print(f"{vector_type.name}: {formula}")
88
```
89
90
## Index Types
91
92
### IndexType Enum
93
94
```python { .api }
95
from pymilvus import IndexType
96
97
class IndexType(IntEnum):
98
INVALID = 0
99
FLAT = 1 # Exact search, 100% recall
100
IVFLAT = 2 # Alias: IVF_FLAT
101
IVF_SQ8 = 3 # IVF with scalar quantization
102
RNSG = 4 # Random Navigable Small Graph (deprecated)
103
IVF_SQ8H = 5 # Alias: IVF_SQ8_H
104
IVF_PQ = 6 # IVF with product quantization
105
HNSW = 11 # Hierarchical Navigable Small World
106
ANNOY = 12 # Approximate Nearest Neighbors Oh Yeah
107
AUTOINDEX = 13 # Automatic index selection
108
SPARSE_INVERTED_INDEX = 14 # For sparse vectors
109
SPARSE_WAND = 15 # Weak AND for sparse vectors
110
TRIE = 16 # For string prefix matching
111
STL_SORT = 17 # For numeric range queries
112
INVERTED = 18 # For JSON and array fields
113
GPU_IVF_FLAT = 19 # GPU-accelerated IVF_FLAT
114
GPU_IVF_PQ = 20 # GPU-accelerated IVF_PQ
115
GPU_BRUTE_FORCE = 21 # GPU exact search
116
```
117
118
### Index Type Usage by Field Type
119
120
```python { .api }
121
# Vector field indexes
122
vector_indexes = {
123
DataType.FLOAT_VECTOR: [
124
IndexType.FLAT, # Exact search
125
IndexType.IVF_FLAT, # Good accuracy/speed balance
126
IndexType.IVF_PQ, # Memory efficient
127
IndexType.HNSW, # Fast search
128
IndexType.ANNOY, # Memory efficient
129
IndexType.AUTOINDEX # Automatic selection
130
],
131
DataType.BINARY_VECTOR: [
132
IndexType.FLAT, # Exact Hamming distance
133
IndexType.IVF_FLAT # Approximate Hamming search
134
],
135
DataType.SPARSE_FLOAT_VECTOR: [
136
IndexType.SPARSE_INVERTED_INDEX, # Standard for sparse vectors
137
IndexType.SPARSE_WAND # Optimized sparse search
138
]
139
}
140
141
# Scalar field indexes
142
scalar_indexes = {
143
DataType.VARCHAR: [IndexType.TRIE], # String prefix/equality
144
DataType.INT64: [IndexType.STL_SORT], # Numeric range queries
145
DataType.DOUBLE: [IndexType.STL_SORT], # Numeric range queries
146
DataType.JSON: [IndexType.INVERTED], # Key-value queries
147
DataType.ARRAY: [IndexType.INVERTED] # Array containment queries
148
}
149
150
# GPU-accelerated indexes (requires GPU-enabled Milvus)
151
gpu_indexes = {
152
DataType.FLOAT_VECTOR: [
153
IndexType.GPU_IVF_FLAT,
154
IndexType.GPU_IVF_PQ,
155
IndexType.GPU_BRUTE_FORCE
156
]
157
}
158
```
159
160
## Function Types
161
162
### FunctionType Enum
163
164
```python { .api }
165
from pymilvus import FunctionType
166
167
class FunctionType(IntEnum):
168
UNKNOWN = 0
169
BM25 = 1 # Sparse vector generation from text
170
TEXTEMBEDDING = 2 # Dense vector generation from text
171
RERANK = 3 # Relevance scoring for reranking
172
```
173
174
### Function Type Usage
175
176
```python { .api }
177
from pymilvus import Function, FunctionType
178
179
# BM25 function for sparse text vectors
180
bm25_function = Function(
181
name="text_bm25",
182
function_type=FunctionType.BM25,
183
input_field_names=["content"],
184
output_field_names=["bm25_sparse"],
185
params={
186
"language": "en",
187
"k1": 1.2, # BM25 parameter
188
"b": 0.75 # BM25 parameter
189
}
190
)
191
192
# Text embedding function for dense vectors
193
embedding_function = Function(
194
name="text_embedding",
195
function_type=FunctionType.TEXTEMBEDDING,
196
input_field_names=["title", "description"],
197
output_field_names=["text_vector"],
198
params={
199
"model_name": "sentence-transformers/all-MiniLM-L6-v2",
200
"model_config": {
201
"device": "cuda:0",
202
"normalize_embeddings": True
203
}
204
}
205
)
206
207
# Reranking function for relevance scoring
208
rerank_function = Function(
209
name="cross_encoder_rerank",
210
function_type=FunctionType.RERANK,
211
input_field_names=["query", "document"],
212
output_field_names=["relevance_score"],
213
params={
214
"model_name": "cross-encoder/ms-marco-MiniLM-L-6-v2",
215
"top_k": 100
216
}
217
)
218
```
219
220
## Metric Types
221
222
### MetricType Constants
223
224
```python { .api }
225
# Distance metrics for vector similarity
226
class MetricType:
227
L2 = "L2" # Euclidean distance
228
IP = "IP" # Inner Product (cosine for normalized vectors)
229
COSINE = "COSINE" # Cosine similarity (auto-normalized)
230
HAMMING = "HAMMING" # Hamming distance (for binary vectors)
231
JACCARD = "JACCARD" # Jaccard similarity (for binary vectors)
232
TANIMOTO = "TANIMOTO" # Tanimoto coefficient
233
SUBSTRUCTURE = "SUBSTRUCTURE" # Chemical substructure matching
234
SUPERSTRUCTURE = "SUPERSTRUCTURE" # Chemical superstructure matching
235
```
236
237
### Metric Type Usage Guidelines
238
239
```python { .api }
240
# Metric selection by use case
241
metric_guidelines = {
242
"general_embeddings": "L2", # Most common for embeddings
243
"normalized_embeddings": "COSINE", # For unit vectors
244
"dot_product_similarity": "IP", # When vectors aren't normalized
245
"binary_hashes": "HAMMING", # For binary vectors
246
"molecular_fingerprints": "TANIMOTO", # Chemical similarity
247
"sparse_vectors": "IP" # For BM25/TF-IDF vectors
248
}
249
250
# Example index creation with different metrics
251
from pymilvus import MilvusClient
252
253
client = MilvusClient()
254
255
# L2 distance for general embeddings
256
client.create_index("documents", "embedding", {
257
"index_type": "HNSW",
258
"metric_type": "L2",
259
"params": {"M": 32, "efConstruction": 400}
260
})
261
262
# Cosine similarity for normalized text embeddings
263
client.create_index("articles", "text_vector", {
264
"index_type": "IVF_FLAT",
265
"metric_type": "COSINE",
266
"params": {"nlist": 1024}
267
})
268
269
# Inner product for sparse vectors
270
client.create_index("bm25_collection", "sparse_vector", {
271
"index_type": "SPARSE_INVERTED_INDEX",
272
"metric_type": "IP",
273
"params": {"drop_ratio_build": 0.2}
274
})
275
```
276
277
## State Enums
278
279
### LoadState
280
281
```python { .api }
282
class LoadState:
283
NotExist = "NotExist" # Collection doesn't exist
284
NotLoad = "NotLoad" # Collection not loaded into memory
285
Loading = "Loading" # Currently loading
286
Loaded = "Loaded" # Fully loaded and ready
287
```
288
289
### IndexState
290
291
```python { .api }
292
class IndexState:
293
IndexStateNone = "IndexStateNone" # No index information
294
Unissued = "Unissued" # Index build not started
295
InProgress = "InProgress" # Index building in progress
296
Finished = "Finished" # Index build completed
297
Failed = "Failed" # Index build failed
298
Deleted = "Deleted" # Index was deleted
299
```
300
301
### ConsistencyLevel
302
303
```python { .api }
304
class ConsistencyLevel:
305
Strong = "Strong" # Read your own writes immediately
306
Bounded = "Bounded" # Bounded staleness (default)
307
Eventually = "Eventually" # Eventual consistency
308
Session = "Session" # Session consistency
309
Customized = "Customized" # Custom consistency level
310
```
311
312
### State Usage Examples
313
314
```python { .api }
315
from pymilvus import MilvusClient, utility
316
317
client = MilvusClient()
318
319
# Check loading state
320
load_state = client.get_load_state("my_collection")
321
state = load_state.get("state", "Unknown")
322
323
if state == "NotLoad":
324
print("Collection not loaded, loading now...")
325
client.load_collection("my_collection")
326
elif state == "Loading":
327
print("Collection is currently loading...")
328
utility.wait_for_loading_complete("my_collection")
329
elif state == "Loaded":
330
print("Collection ready for search")
331
332
# Check index building state
333
index_progress = utility.index_building_progress("my_collection", "vector_field")
334
index_state = index_progress.get("index_state", "Unknown")
335
336
state_messages = {
337
"Unissued": "Index build queued",
338
"InProgress": f"Building index: {index_progress.get('progress', 0)}%",
339
"Finished": "Index build completed successfully",
340
"Failed": "Index build failed - check logs"
341
}
342
343
print(state_messages.get(index_state, f"Unknown state: {index_state}"))
344
345
# Use different consistency levels
346
search_results = client.search(
347
"my_collection",
348
data=[[0.1] * 768],
349
limit=10,
350
consistency_level="Strong" # Ensure latest data
351
)
352
```
353
354
## Complex Type Definitions
355
356
### Status
357
358
```python { .api }
359
from pymilvus.client.types import Status
360
361
class Status:
362
SUCCESS = 0
363
UNEXPECTED_ERROR = 1
364
CONNECT_FAILED = 2
365
PERMISSION_DENIED = 3
366
COLLECTION_NOT_EXISTS = 4
367
ILLEGAL_ARGUMENT = 5
368
ILLEGAL_DIMENSION = 7
369
ILLEGAL_INDEX_TYPE = 8
370
ILLEGAL_COLLECTION_NAME = 9
371
ILLEGAL_TOPK = 10
372
ILLEGAL_ROWRECORD = 11
373
ILLEGAL_VECTOR_ID = 12
374
ILLEGAL_SEARCH_RESULT = 13
375
FILE_NOT_FOUND = 14
376
META_FAILED = 15
377
CACHE_FAILED = 16
378
CANNOT_CREATE_FOLDER = 17
379
CANNOT_CREATE_FILE = 18
380
CANNOT_DELETE_FOLDER = 19
381
CANNOT_DELETE_FILE = 20
382
BUILD_INDEX_ERROR = 21
383
ILLEGAL_NLIST = 22
384
ILLEGAL_METRIC_TYPE = 23
385
OUT_OF_MEMORY = 24
386
387
def __init__(self, code: int = SUCCESS, message: str = ""):
388
self.code = code
389
self.message = message
390
391
def OK(self) -> bool:
392
"""Return True if status indicates success"""
393
return self.code == Status.SUCCESS
394
```
395
396
### BulkInsertState
397
398
```python { .api }
399
class BulkInsertState:
400
def __init__(self):
401
self.task_id: int = 0
402
self.state: str = "" # "pending", "importing", "completed", "failed"
403
self.row_count: int = 0
404
self.id_list: List[int] = []
405
self.infos: Dict[str, Any] = {}
406
self.create_time: int = 0
407
self.progress: float = 0.0
408
```
409
410
### Replica Information
411
412
```python { .api }
413
class Replica:
414
def __init__(self):
415
self.id: int = 0
416
self.collection_id: int = 0
417
self.partition_ids: List[int] = []
418
self.shard_replicas: List[Shard] = []
419
self.node_ids: List[int] = []
420
self.resource_group: str = ""
421
422
class Shard:
423
def __init__(self):
424
self.channel_name: str = ""
425
self.shard_leader: int = 0
426
self.shard_nodes: List[int] = []
427
self.dm_channel_name: str = ""
428
```
429
430
### ResourceGroupInfo
431
432
```python { .api }
433
class ResourceGroupInfo:
434
def __init__(self):
435
self.name: str = ""
436
self.capacity: int = 0
437
self.num_available_node: int = 0
438
self.num_loaded_replica: Dict[str, int] = {}
439
self.num_outgoing_node: Dict[str, int] = {}
440
self.num_incoming_node: Dict[str, int] = {}
441
self.config: Dict[str, Any] = {}
442
self.nodes: List[int] = []
443
```
444
445
## Type Validation Utilities
446
447
### Data Type Validation
448
449
```python { .api }
450
def validate_data_type_compatibility(field_type: DataType, value: Any) -> bool:
451
"""Validate if a value is compatible with a field type"""
452
453
type_validators = {
454
DataType.BOOL: lambda x: isinstance(x, bool),
455
DataType.INT8: lambda x: isinstance(x, int) and -128 <= x <= 127,
456
DataType.INT16: lambda x: isinstance(x, int) and -32768 <= x <= 32767,
457
DataType.INT32: lambda x: isinstance(x, int) and -2147483648 <= x <= 2147483647,
458
DataType.INT64: lambda x: isinstance(x, int),
459
DataType.FLOAT: lambda x: isinstance(x, (int, float)),
460
DataType.DOUBLE: lambda x: isinstance(x, (int, float)),
461
DataType.VARCHAR: lambda x: isinstance(x, str),
462
DataType.JSON: lambda x: isinstance(x, (dict, list, str, int, float, bool, type(None))),
463
DataType.ARRAY: lambda x: isinstance(x, list),
464
DataType.FLOAT_VECTOR: lambda x: isinstance(x, list) and all(isinstance(v, (int, float)) for v in x),
465
DataType.BINARY_VECTOR: lambda x: isinstance(x, (list, bytes)),
466
DataType.SPARSE_FLOAT_VECTOR: lambda x: isinstance(x, dict) or isinstance(x, list)
467
}
468
469
validator = type_validators.get(field_type)
470
if validator:
471
return validator(value)
472
473
return False
474
475
# Usage examples
476
test_values = [
477
(DataType.INT32, 12345, True),
478
(DataType.INT32, 3000000000, False), # Too large for INT32
479
(DataType.FLOAT_VECTOR, [0.1, 0.2, 0.3], True),
480
(DataType.FLOAT_VECTOR, [1, 2, "3"], False), # Invalid vector element
481
(DataType.VARCHAR, "hello", True),
482
(DataType.JSON, {"key": "value"}, True)
483
]
484
485
for field_type, value, expected in test_values:
486
result = validate_data_type_compatibility(field_type, value)
487
status = "✓" if result == expected else "✗"
488
print(f"{status} {field_type.name}: {value} -> {result}")
489
```
490
491
### Index Type Compatibility
492
493
```python { .api }
494
def get_compatible_index_types(field_type: DataType) -> List[IndexType]:
495
"""Get compatible index types for a field type"""
496
497
compatibility_map = {
498
DataType.FLOAT_VECTOR: [
499
IndexType.FLAT, IndexType.IVF_FLAT, IndexType.IVF_PQ,
500
IndexType.HNSW, IndexType.ANNOY, IndexType.AUTOINDEX
501
],
502
DataType.BINARY_VECTOR: [
503
IndexType.FLAT, IndexType.IVF_FLAT
504
],
505
DataType.SPARSE_FLOAT_VECTOR: [
506
IndexType.SPARSE_INVERTED_INDEX, IndexType.SPARSE_WAND
507
],
508
DataType.VARCHAR: [IndexType.TRIE],
509
DataType.INT64: [IndexType.STL_SORT],
510
DataType.INT32: [IndexType.STL_SORT],
511
DataType.DOUBLE: [IndexType.STL_SORT],
512
DataType.FLOAT: [IndexType.STL_SORT],
513
DataType.JSON: [IndexType.INVERTED],
514
DataType.ARRAY: [IndexType.INVERTED]
515
}
516
517
return compatibility_map.get(field_type, [])
518
519
# Check index compatibility
520
field_types = [DataType.FLOAT_VECTOR, DataType.VARCHAR, DataType.JSON]
521
522
for field_type in field_types:
523
compatible_indexes = get_compatible_index_types(field_type)
524
print(f"{field_type.name} compatible indexes:")
525
for index_type in compatible_indexes:
526
print(f" - {index_type.name}")
527
```
528
529
### Metric Type Compatibility
530
531
```python { .api }
532
def get_compatible_metrics(field_type: DataType) -> List[str]:
533
"""Get compatible metric types for a field type"""
534
535
metric_compatibility = {
536
DataType.FLOAT_VECTOR: ["L2", "IP", "COSINE"],
537
DataType.BINARY_VECTOR: ["HAMMING", "JACCARD", "TANIMOTO", "SUBSTRUCTURE", "SUPERSTRUCTURE"],
538
DataType.SPARSE_FLOAT_VECTOR: ["IP"],
539
DataType.FLOAT16_VECTOR: ["L2", "IP", "COSINE"],
540
DataType.BFLOAT16_VECTOR: ["L2", "IP", "COSINE"],
541
DataType.INT8_VECTOR: ["L2", "IP", "COSINE"]
542
}
543
544
return metric_compatibility.get(field_type, [])
545
546
# Validate metric compatibility
547
vector_fields = [
548
(DataType.FLOAT_VECTOR, "L2"),
549
(DataType.BINARY_VECTOR, "HAMMING"),
550
(DataType.SPARSE_FLOAT_VECTOR, "IP"),
551
(DataType.FLOAT_VECTOR, "HAMMING") # Invalid combination
552
]
553
554
for field_type, metric in vector_fields:
555
compatible_metrics = get_compatible_metrics(field_type)
556
is_compatible = metric in compatible_metrics
557
status = "✓" if is_compatible else "✗"
558
print(f"{status} {field_type.name} + {metric}: {is_compatible}")
559
```
560
561
## Configuration Constants
562
563
### Default Values
564
565
```python { .api }
566
class DefaultConfig:
567
# Connection defaults
568
DEFAULT_HOST = "localhost"
569
DEFAULT_PORT = "19530"
570
DEFAULT_URI = f"http://{DEFAULT_HOST}:{DEFAULT_PORT}"
571
MILVUS_CONN_ALIAS = "default"
572
MILVUS_CONN_TIMEOUT = 10.0
573
574
# Data limits
575
MaxVarCharLength = 65535
576
MaxArrayCapacity = 4096
577
MaxDimension = 32768
578
579
# Encoding
580
EncodeProtocol = "utf-8"
581
582
# Index defaults
583
DefaultIndexType = IndexType.AUTOINDEX
584
DefaultMetricType = "L2"
585
586
# Search defaults
587
DefaultSearchLimit = 10
588
DefaultQueryLimit = 16384
589
DefaultBatchSize = 1000
590
```
591
592
### Environment Configuration
593
594
```python { .api }
595
import os
596
597
# Environment-based configuration
598
def get_milvus_config():
599
"""Get Milvus configuration from environment variables"""
600
601
config = {
602
"uri": os.getenv("MILVUS_URI", DefaultConfig.DEFAULT_URI),
603
"user": os.getenv("MILVUS_USER", ""),
604
"password": os.getenv("MILVUS_PASSWORD", ""),
605
"db_name": os.getenv("MILVUS_DB_NAME", ""),
606
"timeout": float(os.getenv("MILVUS_TIMEOUT", DefaultConfig.MILVUS_CONN_TIMEOUT))
607
}
608
609
return config
610
611
# Use environment configuration
612
config = get_milvus_config()
613
client = MilvusClient(**config)
614
```
615
616
PyMilvus types and enums provide a comprehensive type system for vector database operations, ensuring type safety and providing clear configuration options for all aspects of data management, indexing, and search operations.