Chroma - the open-source embedding database
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Collections are the primary containers for documents and embeddings in ChromaDB. They provide isolation, configuration, and organization for related documents with consistent embedding functions and metadata schemas.
Create new collections with optional configuration, metadata, and custom embedding functions.
def create_collection(
name: str,
configuration: Optional[CollectionConfiguration] = None,
metadata: Optional[CollectionMetadata] = None,
embedding_function: Optional[EmbeddingFunction] = None,
data_loader: Optional[DataLoader] = None,
get_or_create: bool = False
) -> Collection:
"""
Create a new collection.
Args:
name: The name of the collection
configuration: Optional configuration for the collection
metadata: Optional metadata for the collection
embedding_function: Function to generate embeddings
data_loader: Function to load data from URIs
get_or_create: If True, get existing collection instead of failing
Returns:
Collection: The created collection object
Raises:
ValueError: If collection already exists and get_or_create=False
"""Usage Example:
import chromadb
from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction
client = chromadb.EphemeralClient()
# Create a simple collection
collection = client.create_collection(name="my_documents")
# Create with custom embedding function
openai_ef = OpenAIEmbeddingFunction(api_key="your-api-key")
collection = client.create_collection(
name="openai_collection",
embedding_function=openai_ef,
metadata={"description": "Documents with OpenAI embeddings"}
)Retrieve existing collections by name with optional embedding function specification.
def get_collection(
name: str,
embedding_function: Optional[EmbeddingFunction] = None,
data_loader: Optional[DataLoader] = None
) -> Collection:
"""
Get an existing collection by name.
Args:
name: The name of the collection
embedding_function: Function to generate embeddings
data_loader: Function to load data from URIs
Returns:
Collection: The retrieved collection object
Raises:
ValueError: If collection does not exist
"""Usage Example:
# Get existing collection
collection = client.get_collection("my_documents")
# Get with specific embedding function
collection = client.get_collection(
"openai_collection",
embedding_function=OpenAIEmbeddingFunction(api_key="your-api-key")
)Retrieve an existing collection or create it if it doesn't exist, providing convenience for idempotent operations.
def get_or_create_collection(
name: str,
configuration: Optional[CollectionConfiguration] = None,
metadata: Optional[CollectionMetadata] = None,
embedding_function: Optional[EmbeddingFunction] = None,
data_loader: Optional[DataLoader] = None
) -> Collection:
"""
Get an existing collection or create it if it doesn't exist.
Args:
name: The name of the collection
configuration: Optional configuration for the collection
metadata: Optional metadata for the collection
embedding_function: Function to generate embeddings
data_loader: Function to load data from URIs
Returns:
Collection: The retrieved or created collection object
"""Get a list of all collections with optional pagination support.
def list_collections(
limit: Optional[int] = None,
offset: Optional[int] = None
) -> Sequence[Collection]:
"""
List all collections.
Args:
limit: Maximum number of collections to return
offset: Number of collections to skip
Returns:
Sequence[Collection]: List of collection objects
"""Usage Example:
# List all collections
collections = client.list_collections()
for collection in collections:
print(f"Collection: {collection.name}")
# List with pagination
first_10 = client.list_collections(limit=10)
next_10 = client.list_collections(limit=10, offset=10)Get the total number of collections in the database.
def count_collections() -> int:
"""
Count the number of collections.
Returns:
int: The number of collections in the database
"""Remove collections and all their contained documents and embeddings.
def delete_collection(name: str) -> None:
"""
Delete a collection by name.
Args:
name: The name of the collection to delete
Raises:
ValueError: If collection does not exist
"""Usage Example:
# Delete a collection
client.delete_collection("old_collection")Access collection metadata and properties.
class Collection:
@property
def name(self) -> str:
"""The name of the collection."""
@property
def id(self) -> UUID:
"""The unique identifier of the collection."""
@property
def metadata(self) -> Optional[CollectionMetadata]:
"""The metadata associated with the collection."""Update collection properties including name, metadata, and configuration.
def modify(
name: Optional[str] = None,
metadata: Optional[CollectionMetadata] = None,
configuration: Optional[CollectionConfiguration] = None
) -> None:
"""
Modify collection properties.
Args:
name: New name for the collection
metadata: New metadata for the collection
configuration: New configuration for the collection
"""Usage Example:
collection = client.get_collection("my_collection")
# Update metadata
collection.modify(metadata={"version": "2.0", "updated": "2024-01-01"})
# Rename collection
collection.modify(name="renamed_collection")Get the number of documents in a collection.
def count(self) -> int:
"""
Count the number of documents in the collection.
Returns:
int: The number of documents in the collection
"""Create a copy of a collection with a new name while preserving all documents and embeddings.
def fork(self, new_name: str) -> Collection:
"""
Create a copy of the collection with a new name.
Args:
new_name: The name for the new collection
Returns:
Collection: The newly created collection copy
"""Usage Example:
original = client.get_collection("original_collection")
copy = original.fork("collection_backup")from typing import Dict, Any, Optional, Sequence
from uuid import UUID
CollectionMetadata = Dict[str, Any]
CollectionConfiguration = Dict[str, Any]
class Collection:
name: str
id: UUID
metadata: Optional[CollectionMetadata]Install with Tessl CLI
npx tessl i tessl/pypi-chromadb