An integration package connecting Chroma and LangChain for vector database operations.
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
An integration package connecting Chroma and LangChain for vector database operations. This package provides a LangChain-compatible interface to ChromaDB, enabling developers to use ChromaDB as a vector store for embedding-based search and retrieval in AI applications, particularly for semantic search, question-answering systems, and retrieval-augmented generation (RAG) pipelines.
pip install langchain-chromachromadb>=1.0.9, langchain-core>=0.3.70, numpy>=1.26.0from langchain_chroma import Chromafrom langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
# Initialize the vector store
embeddings = OpenAIEmbeddings()
vector_store = Chroma(
collection_name="my_collection",
embedding_function=embeddings,
persist_directory="./chroma_db"
)
# Add documents
documents = [
Document(page_content="Hello world", metadata={"source": "greeting"}),
Document(page_content="Python is great", metadata={"source": "programming"})
]
vector_store.add_documents(documents)
# Perform similarity search
results = vector_store.similarity_search("programming language", k=2)
for doc in results:
print(f"Content: {doc.page_content}")
print(f"Metadata: {doc.metadata}")The langchain-chroma package implements the LangChain VectorStore interface with ChromaDB as the backend:
Core document operations including adding, updating, and deleting documents in the vector store. Supports batch operations and automatic ID generation.
def add_texts(texts: Iterable[str], metadatas: Optional[list[dict]] = None, ids: Optional[list[str]] = None, **kwargs: Any) -> list[str]
def add_documents(documents: list[Document], ids: Optional[list[str]] = None, **kwargs: Any) -> list[str]
def add_images(uris: list[str], metadatas: Optional[list[dict]] = None, ids: Optional[list[str]] = None) -> list[str]
def update_document(document_id: str, document: Document) -> None
def update_documents(ids: list[str], documents: list[Document]) -> None
def delete(ids: Optional[list[str]] = None, **kwargs: Any) -> NoneComprehensive search functionality including similarity search, vector search, and relevance scoring. Supports metadata filtering and document content filtering.
def similarity_search(query: str, k: int = 4, filter: Optional[dict[str, str]] = None, **kwargs: Any) -> list[Document]
def similarity_search_with_score(query: str, k: int = 4, filter: Optional[dict[str, str]] = None, where_document: Optional[dict[str, str]] = None, **kwargs: Any) -> list[tuple[Document, float]]
def similarity_search_by_vector(embedding: list[float], k: int = 4, filter: Optional[dict[str, str]] = None, where_document: Optional[dict[str, str]] = None, **kwargs: Any) -> list[Document]
def similarity_search_by_image(uri: str, k: int = 4, filter: Optional[dict[str, str]] = None, **kwargs: Any) -> list[Document]Advanced search algorithms that optimize for both similarity to query and diversity among results, reducing redundancy in search results.
def max_marginal_relevance_search(query: str, k: int = 4, fetch_k: int = 20, lambda_mult: float = 0.5, filter: Optional[dict[str, str]] = None, where_document: Optional[dict[str, str]] = None, **kwargs: Any) -> list[Document]
def max_marginal_relevance_search_by_vector(embedding: list[float], k: int = 4, fetch_k: int = 20, lambda_mult: float = 0.5, filter: Optional[dict[str, str]] = None, where_document: Optional[dict[str, str]] = None, **kwargs: Any) -> list[Document]Collection-level operations for managing the underlying ChromaDB collections, including retrieval, resetting, and deletion.
def get(ids: Optional[Union[str, list[str]]] = None, where: Optional[Where] = None, limit: Optional[int] = None, offset: Optional[int] = None, where_document: Optional[WhereDocument] = None, include: Optional[list[str]] = None) -> dict[str, Any]
def get_by_ids(ids: Sequence[str], /) -> list[Document]
def reset_collection() -> None
def delete_collection() -> NoneClass methods and utilities for creating Chroma instances from various data sources and configurations.
@classmethod
def from_texts(cls: type[Chroma], texts: list[str], embedding: Optional[Embeddings] = None, metadatas: Optional[list[dict]] = None, ids: Optional[list[str]] = None, collection_name: str = "langchain", **kwargs: Any) -> Chroma
@classmethod
def from_documents(cls: type[Chroma], documents: list[Document], embedding: Optional[Embeddings] = None, ids: Optional[list[str]] = None, collection_name: str = "langchain", **kwargs: Any) -> Chroma
@staticmethod
def encode_image(uri: str) -> strfrom typing import Union, Optional, Any, Callable, Iterable
from collections.abc import Sequence
import numpy as np
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from chromadb.api.types import Where, WhereDocument
from chromadb.api import CreateCollectionConfiguration
import chromadb
Matrix = Union[list[list[float]], list[np.ndarray], np.ndarray]
class Chroma(VectorStore):
"""
Chroma vector store integration for LangChain.
Provides a LangChain-compatible interface to ChromaDB for vector storage,
similarity search, and document retrieval operations.
"""
def __init__(
self,
collection_name: str = "langchain",
embedding_function: Optional[Embeddings] = None,
persist_directory: Optional[str] = None,
host: Optional[str] = None,
port: Optional[int] = None,
headers: Optional[dict[str, str]] = None,
chroma_cloud_api_key: Optional[str] = None,
tenant: Optional[str] = None,
database: Optional[str] = None,
client_settings: Optional[chromadb.config.Settings] = None,
collection_metadata: Optional[dict] = None,
collection_configuration: Optional[CreateCollectionConfiguration] = None,
client: Optional[chromadb.ClientAPI] = None,
relevance_score_fn: Optional[Callable[[float], float]] = None,
create_collection_if_not_exists: Optional[bool] = True,
*,
ssl: bool = False,
) -> None