Quarkus extension for integrating Chroma vector database as an embedding store with LangChain4j
The ChromaEmbeddingStore bean is the primary interface for interacting with the Chroma vector database. This bean is automatically created and configured by the extension based on your application.properties settings.
The extension creates a synthetic CDI bean that can be injected into your application classes.
import jakarta.inject.Inject;
import dev.langchain4j.store.embedding.chroma.ChromaEmbeddingStore;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.data.segment.TextSegment;
// Inject as ChromaEmbeddingStore
@Inject
ChromaEmbeddingStore embeddingStore;
// Inject as EmbeddingStore interface
@Inject
EmbeddingStore<TextSegment> embeddingStore;Bean Characteristics:
@ApplicationScoped (singleton)dev.langchain4j.store.embedding.chroma.ChromaEmbeddingStoredev.langchain4j.store.embedding.EmbeddingStoredev.langchain4j.store.embedding.EmbeddingStore<TextSegment>Methods for storing embeddings in the Chroma database.
/**
* Add a single embedding to the store.
* An ID is automatically generated.
*
* @param embedding The embedding to add
* @return The generated ID for the embedding
*/
String add(Embedding embedding);
/**
* Add a single embedding with a specific ID.
*
* @param id The ID to associate with the embedding
* @param embedding The embedding to add
*/
void add(String id, Embedding embedding);
/**
* Add a single embedding with an associated text segment.
* An ID is automatically generated.
*
* @param embedding The embedding to add
* @param textSegment The text segment associated with the embedding
* @return The generated ID for the embedding
*/
String add(Embedding embedding, TextSegment textSegment);
/**
* Add multiple embeddings to the store.
* IDs are automatically generated for each embedding.
*
* @param embeddings List of embeddings to add
* @return List of generated IDs in the same order as input embeddings
*/
List<String> addAll(List<Embedding> embeddings);
/**
* Add multiple embeddings with specific IDs.
*
* @param ids List of IDs to associate with the embeddings
* @param embeddings List of embeddings to add
*/
void addAll(List<String> ids, List<Embedding> embeddings);
/**
* Add multiple embeddings with associated text segments.
* IDs are automatically generated for each embedding.
*
* @param embeddings List of embeddings to add
* @param textSegments List of text segments associated with the embeddings
* @return List of generated IDs in the same order as input embeddings
*/
List<String> addAll(List<Embedding> embeddings, List<TextSegment> textSegments);Usage Example:
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import java.util.List;
// Add single embedding with text segment
TextSegment segment = TextSegment.from("The quick brown fox");
Embedding embedding = embeddingModel.embed(segment).content();
String id = embeddingStore.add(embedding, segment);
// Add multiple embeddings with text segments
List<TextSegment> segments = List.of(
TextSegment.from("First document"),
TextSegment.from("Second document"),
TextSegment.from("Third document")
);
List<Embedding> embeddings = segments.stream()
.map(seg -> embeddingModel.embed(seg).content())
.toList();
List<String> ids = embeddingStore.addAll(embeddings, segments);
// Add with custom ID
embeddingStore.add("custom-id-123", embedding);Methods for performing similarity searches in the Chroma database.
/**
* Search for embeddings similar to the query embedding.
*
* @param request The search request containing query embedding and parameters
* @return Search result containing matching embeddings with similarity scores
*/
EmbeddingSearchResult<TextSegment> search(EmbeddingSearchRequest request);Required Types:
// Search request builder
class EmbeddingSearchRequest {
static Builder builder();
interface Builder {
Builder queryEmbedding(Embedding queryEmbedding);
Builder maxResults(Integer maxResults);
Builder minScore(Double minScore);
Builder filter(Filter filter); // Optional: metadata-based filtering
EmbeddingSearchRequest build();
}
}
// Search result
class EmbeddingSearchResult<Embedded> {
List<EmbeddingMatch<Embedded>> matches();
}
// Individual match
class EmbeddingMatch<Embedded> {
double score(); // Similarity score (higher is more similar)
String embeddingId(); // ID of the matching embedding
Embedding embedding(); // The matching embedding vector
Embedded embedded(); // The associated object (TextSegment)
}
// Filter for metadata-based filtering (from LangChain4j)
class Filter {
static Filter metadataKey(String key);
Filter isEqualTo(Object value);
Filter isNotEqualTo(Object value);
Filter isGreaterThan(Comparable<?> value);
Filter isGreaterThanOrEqualTo(Comparable<?> value);
Filter isLessThan(Comparable<?> value);
Filter isLessThanOrEqualTo(Comparable<?> value);
Filter isIn(Collection<?> values);
Filter isNotIn(Collection<?> values);
// Logical operators
static Filter and(Filter... filters);
static Filter or(Filter... filters);
}Usage Example:
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
import dev.langchain4j.store.embedding.EmbeddingSearchResult;
import dev.langchain4j.store.embedding.EmbeddingMatch;
// Basic similarity search
String query = "What is the capital of France?";
Embedding queryEmbedding = embeddingModel.embed(query).content();
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(5)
.build();
EmbeddingSearchResult<TextSegment> result = embeddingStore.search(request);
List<EmbeddingMatch<TextSegment>> matches = result.matches();
// Process results
for (EmbeddingMatch<TextSegment> match : matches) {
double score = match.score();
String text = match.embedded().text();
String id = match.embeddingId();
System.out.printf("Score: %.4f, ID: %s, Text: %s%n", score, id, text);
}
// Search with minimum score threshold
EmbeddingSearchRequest filteredRequest = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(10)
.minScore(0.7) // Only return results with score >= 0.7
.build();
EmbeddingSearchResult<TextSegment> filteredResult = embeddingStore.search(filteredRequest);
// Search with metadata filtering
import dev.langchain4j.store.embedding.filter.Filter;
EmbeddingSearchRequest metadataFilteredRequest = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(5)
.filter(Filter.metadataKey("category").isEqualTo("technical"))
.build();
EmbeddingSearchResult<TextSegment> metadataFilteredResult = embeddingStore.search(metadataFilteredRequest);
// Search with complex metadata filtering
Filter complexFilter = Filter.and(
Filter.metadataKey("category").isEqualTo("technical"),
Filter.metadataKey("year").isGreaterThanOrEqualTo(2020)
);
EmbeddingSearchRequest complexRequest = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(5)
.filter(complexFilter)
.build();
EmbeddingSearchResult<TextSegment> complexResult = embeddingStore.search(complexRequest);Methods for deleting embeddings from the Chroma database.
/**
* Remove all embeddings from the collection.
* This operation cannot be undone.
*/
void removeAll();
/**
* Remove specific embeddings by their IDs.
*
* @param ids Collection of embedding IDs to remove
*/
void removeAll(Collection<String> ids);Usage Example:
import java.util.Collection;
import java.util.List;
// Remove specific embeddings by ID
Collection<String> idsToRemove = List.of("id-1", "id-2", "id-3");
embeddingStore.removeAll(idsToRemove);
// Remove all embeddings from the collection
embeddingStore.removeAll();The ChromaEmbeddingStore implements the EmbeddingStore<TextSegment> interface from LangChain4j, making it compatible with all LangChain4j components that work with embedding stores.
Common Integration Patterns:
import dev.langchain4j.rag.content.retriever.EmbeddingStoreContentRetriever;
import dev.langchain4j.rag.content.retriever.ContentRetriever;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.enterprise.inject.Produces;
@ApplicationScoped
public class RagConfiguration {
@Produces
@ApplicationScoped
public ContentRetriever contentRetriever(
EmbeddingStore<TextSegment> embeddingStore,
EmbeddingModel embeddingModel) {
return EmbeddingStoreContentRetriever.builder()
.embeddingStore(embeddingStore)
.embeddingModel(embeddingModel)
.maxResults(5)
.minScore(0.6)
.build();
}
}Core LangChain4j Types:
// Text segment with metadata
class TextSegment {
static TextSegment from(String text);
static TextSegment from(String text, Metadata metadata);
String text();
Metadata metadata();
}
// Embedding vector
class Embedding {
float[] vector();
int dimension();
}
// Metadata for text segments
class Metadata {
static Metadata from(Map<String, Object> map);
Map<String, Object> toMap();
Object get(String key);
String getString(String key);
Integer getInteger(String key);
Long getLong(String key);
Float getFloat(String key);
Double getDouble(String key);
}The ChromaEmbeddingStore may throw exceptions in the following scenarios:
Best Practices:
import jakarta.enterprise.context.ApplicationScoped;
import org.jboss.logging.Logger;
@ApplicationScoped
public class SafeEmbeddingService {
private static final Logger LOG = Logger.getLogger(SafeEmbeddingService.class);
@Inject
EmbeddingStore<TextSegment> embeddingStore;
public Optional<String> safeAdd(Embedding embedding, TextSegment segment) {
try {
String id = embeddingStore.add(embedding, segment);
return Optional.of(id);
} catch (Exception e) {
LOG.error("Failed to add embedding to Chroma", e);
return Optional.empty();
}
}
}Install with Tessl CLI
npx tessl i tessl/maven-io-quarkiverse-langchain4j--quarkus-langchain4j-chroma