An integration package connecting Chroma and LangChain for vector database operations.
npx @tessl/cli install tessl/pypi-langchain-chroma@0.2.0An integration package connecting Chroma and LangChain for vector database operations. This package provides a LangChain-compatible interface to ChromaDB, enabling developers to use ChromaDB as a vector store for embedding-based search and retrieval in AI applications, particularly for semantic search, question-answering systems, and retrieval-augmented generation (RAG) pipelines.
pip install langchain-chromachromadb>=1.0.9, langchain-core>=0.3.70, numpy>=1.26.0from langchain_chroma import Chromafrom langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
# Initialize the vector store
embeddings = OpenAIEmbeddings()
vector_store = Chroma(
collection_name="my_collection",
embedding_function=embeddings,
persist_directory="./chroma_db"
)
# Add documents
documents = [
Document(page_content="Hello world", metadata={"source": "greeting"}),
Document(page_content="Python is great", metadata={"source": "programming"})
]
vector_store.add_documents(documents)
# Perform similarity search
results = vector_store.similarity_search("programming language", k=2)
for doc in results:
print(f"Content: {doc.page_content}")
print(f"Metadata: {doc.metadata}")The langchain-chroma package implements the LangChain VectorStore interface with ChromaDB as the backend:
Core document operations including adding, updating, and deleting documents in the vector store. Supports batch operations and automatic ID generation.
def add_texts(texts: Iterable[str], metadatas: Optional[list[dict]] = None, ids: Optional[list[str]] = None, **kwargs: Any) -> list[str]
def add_documents(documents: list[Document], ids: Optional[list[str]] = None, **kwargs: Any) -> list[str]
def add_images(uris: list[str], metadatas: Optional[list[dict]] = None, ids: Optional[list[str]] = None) -> list[str]
def update_document(document_id: str, document: Document) -> None
def update_documents(ids: list[str], documents: list[Document]) -> None
def delete(ids: Optional[list[str]] = None, **kwargs: Any) -> NoneComprehensive search functionality including similarity search, vector search, and relevance scoring. Supports metadata filtering and document content filtering.
def similarity_search(query: str, k: int = 4, filter: Optional[dict[str, str]] = None, **kwargs: Any) -> list[Document]
def similarity_search_with_score(query: str, k: int = 4, filter: Optional[dict[str, str]] = None, where_document: Optional[dict[str, str]] = None, **kwargs: Any) -> list[tuple[Document, float]]
def similarity_search_by_vector(embedding: list[float], k: int = 4, filter: Optional[dict[str, str]] = None, where_document: Optional[dict[str, str]] = None, **kwargs: Any) -> list[Document]
def similarity_search_by_image(uri: str, k: int = 4, filter: Optional[dict[str, str]] = None, **kwargs: Any) -> list[Document]Advanced search algorithms that optimize for both similarity to query and diversity among results, reducing redundancy in search results.
def max_marginal_relevance_search(query: str, k: int = 4, fetch_k: int = 20, lambda_mult: float = 0.5, filter: Optional[dict[str, str]] = None, where_document: Optional[dict[str, str]] = None, **kwargs: Any) -> list[Document]
def max_marginal_relevance_search_by_vector(embedding: list[float], k: int = 4, fetch_k: int = 20, lambda_mult: float = 0.5, filter: Optional[dict[str, str]] = None, where_document: Optional[dict[str, str]] = None, **kwargs: Any) -> list[Document]Collection-level operations for managing the underlying ChromaDB collections, including retrieval, resetting, and deletion.
def get(ids: Optional[Union[str, list[str]]] = None, where: Optional[Where] = None, limit: Optional[int] = None, offset: Optional[int] = None, where_document: Optional[WhereDocument] = None, include: Optional[list[str]] = None) -> dict[str, Any]
def get_by_ids(ids: Sequence[str], /) -> list[Document]
def reset_collection() -> None
def delete_collection() -> NoneClass methods and utilities for creating Chroma instances from various data sources and configurations.
@classmethod
def from_texts(cls: type[Chroma], texts: list[str], embedding: Optional[Embeddings] = None, metadatas: Optional[list[dict]] = None, ids: Optional[list[str]] = None, collection_name: str = "langchain", **kwargs: Any) -> Chroma
@classmethod
def from_documents(cls: type[Chroma], documents: list[Document], embedding: Optional[Embeddings] = None, ids: Optional[list[str]] = None, collection_name: str = "langchain", **kwargs: Any) -> Chroma
@staticmethod
def encode_image(uri: str) -> strfrom typing import Union, Optional, Any, Callable, Iterable
from collections.abc import Sequence
import numpy as np
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from chromadb.api.types import Where, WhereDocument
from chromadb.api import CreateCollectionConfiguration
import chromadb
Matrix = Union[list[list[float]], list[np.ndarray], np.ndarray]
class Chroma(VectorStore):
"""
Chroma vector store integration for LangChain.
Provides a LangChain-compatible interface to ChromaDB for vector storage,
similarity search, and document retrieval operations.
"""
def __init__(
self,
collection_name: str = "langchain",
embedding_function: Optional[Embeddings] = None,
persist_directory: Optional[str] = None,
host: Optional[str] = None,
port: Optional[int] = None,
headers: Optional[dict[str, str]] = None,
chroma_cloud_api_key: Optional[str] = None,
tenant: Optional[str] = None,
database: Optional[str] = None,
client_settings: Optional[chromadb.config.Settings] = None,
collection_metadata: Optional[dict] = None,
collection_configuration: Optional[CreateCollectionConfiguration] = None,
client: Optional[chromadb.ClientAPI] = None,
relevance_score_fn: Optional[Callable[[float], float]] = None,
create_collection_if_not_exists: Optional[bool] = True,
*,
ssl: bool = False,
) -> None