or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data-management.mdindex-management.mdindex.mdmilvus-client.mdorm-collection.mdsearch-operations.mdtypes-enums.mduser-management.mdutility-functions.md

types-enums.mddocs/

0

# Types and Enums

1

2

PyMilvus provides a comprehensive type system including data types, enums for configuration options, status codes, and complex type definitions. This reference covers all enumeration values, type constants, and data structures used throughout the API.

3

4

## Data Types

5

6

### DataType Enum

7

8

```python { .api }

9

from pymilvus import DataType

10

11

class DataType(IntEnum):

12

NONE = 0

13

BOOL = 1

14

INT8 = 2

15

INT16 = 3

16

INT32 = 4

17

INT64 = 5

18

FLOAT = 10

19

DOUBLE = 11

20

STRING = 20 # Deprecated, use VARCHAR

21

VARCHAR = 21

22

ARRAY = 22

23

JSON = 23

24

GEOMETRY = 24 # For geospatial data

25

BINARY_VECTOR = 100

26

FLOAT_VECTOR = 101

27

FLOAT16_VECTOR = 102 # Half precision vectors

28

BFLOAT16_VECTOR = 103 # Brain float 16 vectors

29

SPARSE_FLOAT_VECTOR = 104 # Sparse vectors for text search

30

INT8_VECTOR = 105 # Quantized vectors

31

UNKNOWN = 999

32

```

33

34

### Data Type Usage Examples

35

36

```python { .api }

37

from pymilvus import FieldSchema, DataType

38

39

# Scalar data types

40

bool_field = FieldSchema("active", DataType.BOOL)

41

int_field = FieldSchema("count", DataType.INT64)

42

float_field = FieldSchema("score", DataType.DOUBLE)

43

text_field = FieldSchema("content", DataType.VARCHAR, max_length=1000)

44

45

# Vector data types

46

dense_vector = FieldSchema("embedding", DataType.FLOAT_VECTOR, dim=768)

47

binary_vector = FieldSchema("hash", DataType.BINARY_VECTOR, dim=128)

48

sparse_vector = FieldSchema("sparse_embed", DataType.SPARSE_FLOAT_VECTOR)

49

50

# Half-precision vectors for memory efficiency

51

fp16_vector = FieldSchema("fp16_embed", DataType.FLOAT16_VECTOR, dim=512)

52

bf16_vector = FieldSchema("bf16_embed", DataType.BFLOAT16_VECTOR, dim=512)

53

54

# Quantized vectors for storage efficiency

55

int8_vector = FieldSchema("quantized", DataType.INT8_VECTOR, dim=256)

56

57

# Complex data types

58

json_field = FieldSchema("metadata", DataType.JSON)

59

array_field = FieldSchema("tags", DataType.ARRAY,

60

max_capacity=20, element_type=DataType.VARCHAR)

61

62

# Geospatial data (experimental)

63

geo_field = FieldSchema("location", DataType.GEOMETRY)

64

```

65

66

### Data Type Characteristics

67

68

```python { .api }

69

# Vector type memory usage comparison (per vector)

70

vector_memory_usage = {

71

DataType.FLOAT_VECTOR: "dimension * 4 bytes (32-bit floats)",

72

DataType.FLOAT16_VECTOR: "dimension * 2 bytes (16-bit floats)",

73

DataType.BFLOAT16_VECTOR: "dimension * 2 bytes (bfloat16)",

74

DataType.INT8_VECTOR: "dimension * 1 byte (quantized)",

75

DataType.BINARY_VECTOR: "dimension / 8 bytes (packed bits)",

76

DataType.SPARSE_FLOAT_VECTOR: "variable (only non-zero values stored)"

77

}

78

79

# Example: 768-dimension vectors

80

dimension = 768

81

for vector_type, formula in vector_memory_usage.items():

82

if "dimension *" in formula:

83

multiplier = float(formula.split(" * ")[1].split(" ")[0])

84

memory_bytes = dimension * multiplier

85

print(f"{vector_type.name}: {memory_bytes} bytes per vector")

86

else:

87

print(f"{vector_type.name}: {formula}")

88

```

89

90

## Index Types

91

92

### IndexType Enum

93

94

```python { .api }

95

from pymilvus import IndexType

96

97

class IndexType(IntEnum):

98

INVALID = 0

99

FLAT = 1 # Exact search, 100% recall

100

IVFLAT = 2 # Alias: IVF_FLAT

101

IVF_SQ8 = 3 # IVF with scalar quantization

102

RNSG = 4 # Random Navigable Small Graph (deprecated)

103

IVF_SQ8H = 5 # Alias: IVF_SQ8_H

104

IVF_PQ = 6 # IVF with product quantization

105

HNSW = 11 # Hierarchical Navigable Small World

106

ANNOY = 12 # Approximate Nearest Neighbors Oh Yeah

107

AUTOINDEX = 13 # Automatic index selection

108

SPARSE_INVERTED_INDEX = 14 # For sparse vectors

109

SPARSE_WAND = 15 # Weak AND for sparse vectors

110

TRIE = 16 # For string prefix matching

111

STL_SORT = 17 # For numeric range queries

112

INVERTED = 18 # For JSON and array fields

113

GPU_IVF_FLAT = 19 # GPU-accelerated IVF_FLAT

114

GPU_IVF_PQ = 20 # GPU-accelerated IVF_PQ

115

GPU_BRUTE_FORCE = 21 # GPU exact search

116

```

117

118

### Index Type Usage by Field Type

119

120

```python { .api }

121

# Vector field indexes

122

vector_indexes = {

123

DataType.FLOAT_VECTOR: [

124

IndexType.FLAT, # Exact search

125

IndexType.IVF_FLAT, # Good accuracy/speed balance

126

IndexType.IVF_PQ, # Memory efficient

127

IndexType.HNSW, # Fast search

128

IndexType.ANNOY, # Memory efficient

129

IndexType.AUTOINDEX # Automatic selection

130

],

131

DataType.BINARY_VECTOR: [

132

IndexType.FLAT, # Exact Hamming distance

133

IndexType.IVF_FLAT # Approximate Hamming search

134

],

135

DataType.SPARSE_FLOAT_VECTOR: [

136

IndexType.SPARSE_INVERTED_INDEX, # Standard for sparse vectors

137

IndexType.SPARSE_WAND # Optimized sparse search

138

]

139

}

140

141

# Scalar field indexes

142

scalar_indexes = {

143

DataType.VARCHAR: [IndexType.TRIE], # String prefix/equality

144

DataType.INT64: [IndexType.STL_SORT], # Numeric range queries

145

DataType.DOUBLE: [IndexType.STL_SORT], # Numeric range queries

146

DataType.JSON: [IndexType.INVERTED], # Key-value queries

147

DataType.ARRAY: [IndexType.INVERTED] # Array containment queries

148

}

149

150

# GPU-accelerated indexes (requires GPU-enabled Milvus)

151

gpu_indexes = {

152

DataType.FLOAT_VECTOR: [

153

IndexType.GPU_IVF_FLAT,

154

IndexType.GPU_IVF_PQ,

155

IndexType.GPU_BRUTE_FORCE

156

]

157

}

158

```

159

160

## Function Types

161

162

### FunctionType Enum

163

164

```python { .api }

165

from pymilvus import FunctionType

166

167

class FunctionType(IntEnum):

168

UNKNOWN = 0

169

BM25 = 1 # Sparse vector generation from text

170

TEXTEMBEDDING = 2 # Dense vector generation from text

171

RERANK = 3 # Relevance scoring for reranking

172

```

173

174

### Function Type Usage

175

176

```python { .api }

177

from pymilvus import Function, FunctionType

178

179

# BM25 function for sparse text vectors

180

bm25_function = Function(

181

name="text_bm25",

182

function_type=FunctionType.BM25,

183

input_field_names=["content"],

184

output_field_names=["bm25_sparse"],

185

params={

186

"language": "en",

187

"k1": 1.2, # BM25 parameter

188

"b": 0.75 # BM25 parameter

189

}

190

)

191

192

# Text embedding function for dense vectors

193

embedding_function = Function(

194

name="text_embedding",

195

function_type=FunctionType.TEXTEMBEDDING,

196

input_field_names=["title", "description"],

197

output_field_names=["text_vector"],

198

params={

199

"model_name": "sentence-transformers/all-MiniLM-L6-v2",

200

"model_config": {

201

"device": "cuda:0",

202

"normalize_embeddings": True

203

}

204

}

205

)

206

207

# Reranking function for relevance scoring

208

rerank_function = Function(

209

name="cross_encoder_rerank",

210

function_type=FunctionType.RERANK,

211

input_field_names=["query", "document"],

212

output_field_names=["relevance_score"],

213

params={

214

"model_name": "cross-encoder/ms-marco-MiniLM-L-6-v2",

215

"top_k": 100

216

}

217

)

218

```

219

220

## Metric Types

221

222

### MetricType Constants

223

224

```python { .api }

225

# Distance metrics for vector similarity

226

class MetricType:

227

L2 = "L2" # Euclidean distance

228

IP = "IP" # Inner Product (cosine for normalized vectors)

229

COSINE = "COSINE" # Cosine similarity (auto-normalized)

230

HAMMING = "HAMMING" # Hamming distance (for binary vectors)

231

JACCARD = "JACCARD" # Jaccard similarity (for binary vectors)

232

TANIMOTO = "TANIMOTO" # Tanimoto coefficient

233

SUBSTRUCTURE = "SUBSTRUCTURE" # Chemical substructure matching

234

SUPERSTRUCTURE = "SUPERSTRUCTURE" # Chemical superstructure matching

235

```

236

237

### Metric Type Usage Guidelines

238

239

```python { .api }

240

# Metric selection by use case

241

metric_guidelines = {

242

"general_embeddings": "L2", # Most common for embeddings

243

"normalized_embeddings": "COSINE", # For unit vectors

244

"dot_product_similarity": "IP", # When vectors aren't normalized

245

"binary_hashes": "HAMMING", # For binary vectors

246

"molecular_fingerprints": "TANIMOTO", # Chemical similarity

247

"sparse_vectors": "IP" # For BM25/TF-IDF vectors

248

}

249

250

# Example index creation with different metrics

251

from pymilvus import MilvusClient

252

253

client = MilvusClient()

254

255

# L2 distance for general embeddings

256

client.create_index("documents", "embedding", {

257

"index_type": "HNSW",

258

"metric_type": "L2",

259

"params": {"M": 32, "efConstruction": 400}

260

})

261

262

# Cosine similarity for normalized text embeddings

263

client.create_index("articles", "text_vector", {

264

"index_type": "IVF_FLAT",

265

"metric_type": "COSINE",

266

"params": {"nlist": 1024}

267

})

268

269

# Inner product for sparse vectors

270

client.create_index("bm25_collection", "sparse_vector", {

271

"index_type": "SPARSE_INVERTED_INDEX",

272

"metric_type": "IP",

273

"params": {"drop_ratio_build": 0.2}

274

})

275

```

276

277

## State Enums

278

279

### LoadState

280

281

```python { .api }

282

class LoadState:

283

NotExist = "NotExist" # Collection doesn't exist

284

NotLoad = "NotLoad" # Collection not loaded into memory

285

Loading = "Loading" # Currently loading

286

Loaded = "Loaded" # Fully loaded and ready

287

```

288

289

### IndexState

290

291

```python { .api }

292

class IndexState:

293

IndexStateNone = "IndexStateNone" # No index information

294

Unissued = "Unissued" # Index build not started

295

InProgress = "InProgress" # Index building in progress

296

Finished = "Finished" # Index build completed

297

Failed = "Failed" # Index build failed

298

Deleted = "Deleted" # Index was deleted

299

```

300

301

### ConsistencyLevel

302

303

```python { .api }

304

class ConsistencyLevel:

305

Strong = "Strong" # Read your own writes immediately

306

Bounded = "Bounded" # Bounded staleness (default)

307

Eventually = "Eventually" # Eventual consistency

308

Session = "Session" # Session consistency

309

Customized = "Customized" # Custom consistency level

310

```

311

312

### State Usage Examples

313

314

```python { .api }

315

from pymilvus import MilvusClient, utility

316

317

client = MilvusClient()

318

319

# Check loading state

320

load_state = client.get_load_state("my_collection")

321

state = load_state.get("state", "Unknown")

322

323

if state == "NotLoad":

324

print("Collection not loaded, loading now...")

325

client.load_collection("my_collection")

326

elif state == "Loading":

327

print("Collection is currently loading...")

328

utility.wait_for_loading_complete("my_collection")

329

elif state == "Loaded":

330

print("Collection ready for search")

331

332

# Check index building state

333

index_progress = utility.index_building_progress("my_collection", "vector_field")

334

index_state = index_progress.get("index_state", "Unknown")

335

336

state_messages = {

337

"Unissued": "Index build queued",

338

"InProgress": f"Building index: {index_progress.get('progress', 0)}%",

339

"Finished": "Index build completed successfully",

340

"Failed": "Index build failed - check logs"

341

}

342

343

print(state_messages.get(index_state, f"Unknown state: {index_state}"))

344

345

# Use different consistency levels

346

search_results = client.search(

347

"my_collection",

348

data=[[0.1] * 768],

349

limit=10,

350

consistency_level="Strong" # Ensure latest data

351

)

352

```

353

354

## Complex Type Definitions

355

356

### Status

357

358

```python { .api }

359

from pymilvus.client.types import Status

360

361

class Status:

362

SUCCESS = 0

363

UNEXPECTED_ERROR = 1

364

CONNECT_FAILED = 2

365

PERMISSION_DENIED = 3

366

COLLECTION_NOT_EXISTS = 4

367

ILLEGAL_ARGUMENT = 5

368

ILLEGAL_DIMENSION = 7

369

ILLEGAL_INDEX_TYPE = 8

370

ILLEGAL_COLLECTION_NAME = 9

371

ILLEGAL_TOPK = 10

372

ILLEGAL_ROWRECORD = 11

373

ILLEGAL_VECTOR_ID = 12

374

ILLEGAL_SEARCH_RESULT = 13

375

FILE_NOT_FOUND = 14

376

META_FAILED = 15

377

CACHE_FAILED = 16

378

CANNOT_CREATE_FOLDER = 17

379

CANNOT_CREATE_FILE = 18

380

CANNOT_DELETE_FOLDER = 19

381

CANNOT_DELETE_FILE = 20

382

BUILD_INDEX_ERROR = 21

383

ILLEGAL_NLIST = 22

384

ILLEGAL_METRIC_TYPE = 23

385

OUT_OF_MEMORY = 24

386

387

def __init__(self, code: int = SUCCESS, message: str = ""):

388

self.code = code

389

self.message = message

390

391

def OK(self) -> bool:

392

"""Return True if status indicates success"""

393

return self.code == Status.SUCCESS

394

```

395

396

### BulkInsertState

397

398

```python { .api }

399

class BulkInsertState:

400

def __init__(self):

401

self.task_id: int = 0

402

self.state: str = "" # "pending", "importing", "completed", "failed"

403

self.row_count: int = 0

404

self.id_list: List[int] = []

405

self.infos: Dict[str, Any] = {}

406

self.create_time: int = 0

407

self.progress: float = 0.0

408

```

409

410

### Replica Information

411

412

```python { .api }

413

class Replica:

414

def __init__(self):

415

self.id: int = 0

416

self.collection_id: int = 0

417

self.partition_ids: List[int] = []

418

self.shard_replicas: List[Shard] = []

419

self.node_ids: List[int] = []

420

self.resource_group: str = ""

421

422

class Shard:

423

def __init__(self):

424

self.channel_name: str = ""

425

self.shard_leader: int = 0

426

self.shard_nodes: List[int] = []

427

self.dm_channel_name: str = ""

428

```

429

430

### ResourceGroupInfo

431

432

```python { .api }

433

class ResourceGroupInfo:

434

def __init__(self):

435

self.name: str = ""

436

self.capacity: int = 0

437

self.num_available_node: int = 0

438

self.num_loaded_replica: Dict[str, int] = {}

439

self.num_outgoing_node: Dict[str, int] = {}

440

self.num_incoming_node: Dict[str, int] = {}

441

self.config: Dict[str, Any] = {}

442

self.nodes: List[int] = []

443

```

444

445

## Type Validation Utilities

446

447

### Data Type Validation

448

449

```python { .api }

450

def validate_data_type_compatibility(field_type: DataType, value: Any) -> bool:

451

"""Validate if a value is compatible with a field type"""

452

453

type_validators = {

454

DataType.BOOL: lambda x: isinstance(x, bool),

455

DataType.INT8: lambda x: isinstance(x, int) and -128 <= x <= 127,

456

DataType.INT16: lambda x: isinstance(x, int) and -32768 <= x <= 32767,

457

DataType.INT32: lambda x: isinstance(x, int) and -2147483648 <= x <= 2147483647,

458

DataType.INT64: lambda x: isinstance(x, int),

459

DataType.FLOAT: lambda x: isinstance(x, (int, float)),

460

DataType.DOUBLE: lambda x: isinstance(x, (int, float)),

461

DataType.VARCHAR: lambda x: isinstance(x, str),

462

DataType.JSON: lambda x: isinstance(x, (dict, list, str, int, float, bool, type(None))),

463

DataType.ARRAY: lambda x: isinstance(x, list),

464

DataType.FLOAT_VECTOR: lambda x: isinstance(x, list) and all(isinstance(v, (int, float)) for v in x),

465

DataType.BINARY_VECTOR: lambda x: isinstance(x, (list, bytes)),

466

DataType.SPARSE_FLOAT_VECTOR: lambda x: isinstance(x, dict) or isinstance(x, list)

467

}

468

469

validator = type_validators.get(field_type)

470

if validator:

471

return validator(value)

472

473

return False

474

475

# Usage examples

476

test_values = [

477

(DataType.INT32, 12345, True),

478

(DataType.INT32, 3000000000, False), # Too large for INT32

479

(DataType.FLOAT_VECTOR, [0.1, 0.2, 0.3], True),

480

(DataType.FLOAT_VECTOR, [1, 2, "3"], False), # Invalid vector element

481

(DataType.VARCHAR, "hello", True),

482

(DataType.JSON, {"key": "value"}, True)

483

]

484

485

for field_type, value, expected in test_values:

486

result = validate_data_type_compatibility(field_type, value)

487

status = "✓" if result == expected else "✗"

488

print(f"{status} {field_type.name}: {value} -> {result}")

489

```

490

491

### Index Type Compatibility

492

493

```python { .api }

494

def get_compatible_index_types(field_type: DataType) -> List[IndexType]:

495

"""Get compatible index types for a field type"""

496

497

compatibility_map = {

498

DataType.FLOAT_VECTOR: [

499

IndexType.FLAT, IndexType.IVF_FLAT, IndexType.IVF_PQ,

500

IndexType.HNSW, IndexType.ANNOY, IndexType.AUTOINDEX

501

],

502

DataType.BINARY_VECTOR: [

503

IndexType.FLAT, IndexType.IVF_FLAT

504

],

505

DataType.SPARSE_FLOAT_VECTOR: [

506

IndexType.SPARSE_INVERTED_INDEX, IndexType.SPARSE_WAND

507

],

508

DataType.VARCHAR: [IndexType.TRIE],

509

DataType.INT64: [IndexType.STL_SORT],

510

DataType.INT32: [IndexType.STL_SORT],

511

DataType.DOUBLE: [IndexType.STL_SORT],

512

DataType.FLOAT: [IndexType.STL_SORT],

513

DataType.JSON: [IndexType.INVERTED],

514

DataType.ARRAY: [IndexType.INVERTED]

515

}

516

517

return compatibility_map.get(field_type, [])

518

519

# Check index compatibility

520

field_types = [DataType.FLOAT_VECTOR, DataType.VARCHAR, DataType.JSON]

521

522

for field_type in field_types:

523

compatible_indexes = get_compatible_index_types(field_type)

524

print(f"{field_type.name} compatible indexes:")

525

for index_type in compatible_indexes:

526

print(f" - {index_type.name}")

527

```

528

529

### Metric Type Compatibility

530

531

```python { .api }

532

def get_compatible_metrics(field_type: DataType) -> List[str]:

533

"""Get compatible metric types for a field type"""

534

535

metric_compatibility = {

536

DataType.FLOAT_VECTOR: ["L2", "IP", "COSINE"],

537

DataType.BINARY_VECTOR: ["HAMMING", "JACCARD", "TANIMOTO", "SUBSTRUCTURE", "SUPERSTRUCTURE"],

538

DataType.SPARSE_FLOAT_VECTOR: ["IP"],

539

DataType.FLOAT16_VECTOR: ["L2", "IP", "COSINE"],

540

DataType.BFLOAT16_VECTOR: ["L2", "IP", "COSINE"],

541

DataType.INT8_VECTOR: ["L2", "IP", "COSINE"]

542

}

543

544

return metric_compatibility.get(field_type, [])

545

546

# Validate metric compatibility

547

vector_fields = [

548

(DataType.FLOAT_VECTOR, "L2"),

549

(DataType.BINARY_VECTOR, "HAMMING"),

550

(DataType.SPARSE_FLOAT_VECTOR, "IP"),

551

(DataType.FLOAT_VECTOR, "HAMMING") # Invalid combination

552

]

553

554

for field_type, metric in vector_fields:

555

compatible_metrics = get_compatible_metrics(field_type)

556

is_compatible = metric in compatible_metrics

557

status = "✓" if is_compatible else "✗"

558

print(f"{status} {field_type.name} + {metric}: {is_compatible}")

559

```

560

561

## Configuration Constants

562

563

### Default Values

564

565

```python { .api }

566

class DefaultConfig:

567

# Connection defaults

568

DEFAULT_HOST = "localhost"

569

DEFAULT_PORT = "19530"

570

DEFAULT_URI = f"http://{DEFAULT_HOST}:{DEFAULT_PORT}"

571

MILVUS_CONN_ALIAS = "default"

572

MILVUS_CONN_TIMEOUT = 10.0

573

574

# Data limits

575

MaxVarCharLength = 65535

576

MaxArrayCapacity = 4096

577

MaxDimension = 32768

578

579

# Encoding

580

EncodeProtocol = "utf-8"

581

582

# Index defaults

583

DefaultIndexType = IndexType.AUTOINDEX

584

DefaultMetricType = "L2"

585

586

# Search defaults

587

DefaultSearchLimit = 10

588

DefaultQueryLimit = 16384

589

DefaultBatchSize = 1000

590

```

591

592

### Environment Configuration

593

594

```python { .api }

595

import os

596

597

# Environment-based configuration

598

def get_milvus_config():

599

"""Get Milvus configuration from environment variables"""

600

601

config = {

602

"uri": os.getenv("MILVUS_URI", DefaultConfig.DEFAULT_URI),

603

"user": os.getenv("MILVUS_USER", ""),

604

"password": os.getenv("MILVUS_PASSWORD", ""),

605

"db_name": os.getenv("MILVUS_DB_NAME", ""),

606

"timeout": float(os.getenv("MILVUS_TIMEOUT", DefaultConfig.MILVUS_CONN_TIMEOUT))

607

}

608

609

return config

610

611

# Use environment configuration

612

config = get_milvus_config()

613

client = MilvusClient(**config)

614

```

615

616

PyMilvus types and enums provide a comprehensive type system for vector database operations, ensuring type safety and providing clear configuration options for all aspects of data management, indexing, and search operations.