A lightweight version of Milvus wrapped with Python for vector similarity search in AI applications
—
The primary and recommended way to use milvus-lite is through the pymilvus client, which automatically activates milvus-lite when using local file URIs. This approach provides access to the complete Milvus API surface including collections, vector operations, indexing, and querying.
Create a MilvusClient instance that automatically uses milvus-lite for local database files.
from pymilvus import MilvusClient
# Local file URI activates milvus-lite automatically
client = MilvusClient(uri="./database.db")
# Alternative: specify full path
client = MilvusClient(uri="/path/to/database.db")Usage Example:
from pymilvus import MilvusClient
# Initialize client - this starts milvus-lite internally
client = MilvusClient("./my_vector_db.db")
# Client is ready for all standard Milvus operations
collection_exists = client.has_collection("test_collection")Full collection lifecycle management including creation, deletion, listing, and metadata operations.
# Collection creation with schema
client.create_collection(
collection_name: str,
dimension: int,
primary_field_name: str = "id",
id_type: str = "int",
vector_field_name: str = "vector",
metric_type: str = "COSINE",
auto_id: bool = False,
timeout: Optional[float] = None,
**kwargs
) -> None
# Collection existence check
client.has_collection(collection_name: str, timeout: Optional[float] = None) -> bool
# Collection deletion
client.drop_collection(collection_name: str, timeout: Optional[float] = None) -> None
# List all collections
client.list_collections(timeout: Optional[float] = None) -> List[str]
# Get collection statistics
client.describe_collection(collection_name: str, timeout: Optional[float] = None) -> Dict[str, Any]Usage Example:
# Create collection with 384-dimensional vectors
client.create_collection(
collection_name="embeddings",
dimension=384,
metric_type="COSINE",
auto_id=True
)
# Check if collection exists
if client.has_collection("embeddings"):
stats = client.describe_collection("embeddings")
print(f"Collection has {stats['num_entities']} entities")Insert, upsert, delete, and query operations for vector data with support for batch operations and metadata filtering.
# Insert data
client.insert(
collection_name: str,
data: List[Dict[str, Any]],
partition_name: Optional[str] = None,
timeout: Optional[float] = None
) -> Dict[str, Any]
# Upsert data (insert or update if exists)
client.upsert(
collection_name: str,
data: List[Dict[str, Any]],
partition_name: Optional[str] = None,
timeout: Optional[float] = None
) -> Dict[str, Any]
# Delete data by filter expression
client.delete(
collection_name: str,
filter: str,
partition_name: Optional[str] = None,
timeout: Optional[float] = None
) -> Dict[str, Any]
# Query data by filter
client.query(
collection_name: str,
filter: str,
output_fields: Optional[List[str]] = None,
partition_names: Optional[List[str]] = None,
timeout: Optional[float] = None
) -> List[Dict[str, Any]]Usage Example:
# Insert vector data with metadata
data = [
{"id": 1, "vector": [0.1, 0.2, 0.3], "category": "document", "title": "Sample Doc"},
{"id": 2, "vector": [0.4, 0.5, 0.6], "category": "image", "title": "Sample Image"}
]
result = client.insert(collection_name="embeddings", data=data)
print(f"Inserted {result['insert_count']} entities")
# Query with filter
results = client.query(
collection_name="embeddings",
filter='category == "document"',
output_fields=["id", "title", "category"]
)High-performance vector similarity search with support for various distance metrics, filtering, and result limiting.
# Vector similarity search
client.search(
collection_name: str,
data: List[List[float]],
filter: Optional[str] = None,
limit: int = 10,
output_fields: Optional[List[str]] = None,
search_params: Optional[Dict[str, Any]] = None,
partition_names: Optional[List[str]] = None,
timeout: Optional[float] = None
) -> List[List[Dict[str, Any]]]
# Hybrid search (multiple vector fields)
client.hybrid_search(
collection_name: str,
reqs: List[Dict[str, Any]],
ranker: Dict[str, Any],
limit: int = 10,
partition_names: Optional[List[str]] = None,
output_fields: Optional[List[str]] = None,
timeout: Optional[float] = None
) -> List[List[Dict[str, Any]]]Usage Example:
# Single vector search
query_vector = [0.15, 0.25, 0.35] # Query embedding
results = client.search(
collection_name="embeddings",
data=[query_vector],
filter='category == "document"',
limit=5,
output_fields=["id", "title", "category"]
)
# Process results
for hits in results:
for hit in hits:
print(f"ID: {hit['id']}, Score: {hit['distance']}, Title: {hit['entity']['title']}")Create and manage vector indexes for improved search performance, with support for different index types and parameters.
# Create index on vector field
client.create_index(
collection_name: str,
field_name: str,
index_params: Dict[str, Any],
timeout: Optional[float] = None
) -> None
# Drop index
client.drop_index(
collection_name: str,
field_name: str,
timeout: Optional[float] = None
) -> None
# List indexes
client.list_indexes(
collection_name: str,
timeout: Optional[float] = None
) -> List[str]
# Describe index
client.describe_index(
collection_name: str,
field_name: str,
timeout: Optional[float] = None
) -> Dict[str, Any]Usage Example:
# Create IVF_FLAT index for better performance on larger datasets
index_params = {
"index_type": "IVF_FLAT",
"metric_type": "COSINE",
"params": {"nlist": 128}
}
client.create_index(
collection_name="embeddings",
field_name="vector",
index_params=index_params
)
# Check index information
index_info = client.describe_index(
collection_name="embeddings",
field_name="vector"
)
print(f"Index type: {index_info['index_type']}")# Client automatically manages connection lifecycle
# No explicit connect/disconnect needed for milvus-lite
# Client will use file-based connection for local URIs
# Connection is established on first operationInstall with Tessl CLI
npx tessl i tessl/pypi-milvus-lite