LangChain4j integration for Chroma embedding store enabling storage, retrieval, and similarity search of vector embeddings with metadata filtering support for both API V1 and V2.
—
Retrieval-Augmented Generation (RAG) implementation using ChromaEmbeddingStore.
RAG enhances LLM responses by retrieving relevant context from a vector database before generating answers.
Flow:
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.embedding.AllMiniLmL6V2EmbeddingModel;
import dev.langchain4j.model.chat.ChatLanguageModel;
public class BasicRAG {
private final EmbeddingModel embeddingModel;
private final ChromaEmbeddingStore store;
private final ChatLanguageModel chatModel;
public BasicRAG() {
this.embeddingModel = new AllMiniLmL6V2EmbeddingModel();
this.store = ChromaEmbeddingStore.builder()
.baseUrl("http://localhost:8000")
.collectionName("knowledge-base")
.build();
this.chatModel = createChatModel();
}
// Step 1: Index documents
public void indexDocuments(List<String> documents) {
List<Embedding> embeddings = new ArrayList<>();
List<TextSegment> segments = new ArrayList<>();
for (String doc : documents) {
Embedding emb = embeddingModel.embed(doc).content();
embeddings.add(emb);
segments.add(TextSegment.from(doc));
}
store.addAll(embeddings, segments);
}
// Step 2-5: Query with context
public String query(String question) {
// Convert query to embedding
Embedding queryEmb = embeddingModel.embed(question).content();
// Retrieve relevant documents
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmb)
.maxResults(3)
.minScore(0.7)
.build();
EmbeddingSearchResult<TextSegment> result = store.search(request);
// Extract context
String context = result.matches().stream()
.map(match -> match.embedded().text())
.collect(Collectors.joining("\n\n"));
// Generate response with context
String prompt = "Context:\n" + context + "\n\nQuestion: " + question;
return chatModel.generate(prompt);
}
}public class FilteredRAG {
public String queryWithFilter(
String question,
String category,
Integer minYear
) {
Embedding queryEmb = embeddingModel.embed(question).content();
// Build filter
Filter filter = metadataKey("category").isEqualTo(category);
if (minYear != null) {
filter = filter.and(
metadataKey("year").isGreaterThanOrEqualTo(minYear)
);
}
// Search with filter
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmb)
.maxResults(5)
.minScore(0.75)
.filter(filter)
.build();
EmbeddingSearchResult<TextSegment> result = store.search(request);
// Build context with sources
StringBuilder context = new StringBuilder();
for (EmbeddingMatch<TextSegment> match : result.matches()) {
TextSegment seg = match.embedded();
context.append("Source: ")
.append(seg.metadata().getString("source"))
.append("\n")
.append(seg.text())
.append("\n\n");
}
// Generate response
String prompt = String.format(
"Answer based on the following sources:\n\n%s\nQuestion: %s",
context.toString(),
question
);
return chatModel.generate(prompt);
}
}import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentSplitter;
import dev.langchain4j.data.document.splitter.DocumentSplitters;
public class DocumentIndexer {
public void indexDocument(String filePath) {
// Load document
Document document = loadDocument(filePath);
// Split into chunks
DocumentSplitter splitter = DocumentSplitters.recursive(
500, // max chunk size
50 // overlap
);
List<TextSegment> chunks = splitter.split(document);
// Create embeddings and metadata
List<Embedding> embeddings = new ArrayList<>();
List<TextSegment> segments = new ArrayList<>();
for (int i = 0; i < chunks.size(); i++) {
TextSegment chunk = chunks.get(i);
// Add chunk metadata
Metadata metadata = new Metadata()
.put("source", filePath)
.put("chunk_index", i)
.put("total_chunks", chunks.size())
.put("indexed_at", System.currentTimeMillis());
TextSegment segmentWithMeta = TextSegment.from(
chunk.text(),
metadata
);
Embedding emb = embeddingModel.embed(chunk.text()).content();
embeddings.add(emb);
segments.add(segmentWithMeta);
}
// Index all chunks
store.addAll(embeddings, segments);
}
}public class ReRankingRAG {
public String queryWithReRanking(String question) {
Embedding queryEmb = embeddingModel.embed(question).content();
// Retrieve more candidates
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmb)
.maxResults(20) // Get more candidates
.minScore(0.6) // Lower threshold
.build();
EmbeddingSearchResult<TextSegment> result = store.search(request);
// Re-rank using different criteria
List<EmbeddingMatch<TextSegment>> reranked = result.matches().stream()
.sorted((a, b) -> {
// Custom scoring logic
double scoreA = calculateRelevanceScore(a, question);
double scoreB = calculateRelevanceScore(b, question);
return Double.compare(scoreB, scoreA);
})
.limit(5) // Top 5 after re-ranking
.collect(Collectors.toList());
// Build context
String context = reranked.stream()
.map(match -> match.embedded().text())
.collect(Collectors.joining("\n\n"));
// Generate response
String prompt = "Context:\n" + context + "\n\nQuestion: " + question;
return chatModel.generate(prompt);
}
private double calculateRelevanceScore(
EmbeddingMatch<TextSegment> match,
String question
) {
double embeddingScore = match.score();
double recencyScore = calculateRecencyScore(match);
double sourceScore = calculateSourceScore(match);
return (embeddingScore * 0.6) +
(recencyScore * 0.2) +
(sourceScore * 0.2);
}
}public class MultiQueryRAG {
public String queryWithMultipleQueries(String originalQuestion) {
// Generate multiple query variations
List<String> queries = generateQueryVariations(originalQuestion);
Set<String> allDocuments = new HashSet<>();
// Search with each query
for (String query : queries) {
Embedding queryEmb = embeddingModel.embed(query).content();
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmb)
.maxResults(3)
.minScore(0.75)
.build();
EmbeddingSearchResult<TextSegment> result = store.search(request);
// Collect unique documents
result.matches().stream()
.map(match -> match.embedded().text())
.forEach(allDocuments::add);
}
// Build context from all unique documents
String context = String.join("\n\n", allDocuments);
// Generate response
String prompt = "Context:\n" + context +
"\n\nQuestion: " + originalQuestion;
return chatModel.generate(prompt);
}
private List<String> generateQueryVariations(String question) {
// Use LLM to generate query variations
String prompt = "Generate 3 different ways to ask: " + question;
String variations = chatModel.generate(prompt);
return parseVariations(variations);
}
}public class ConversationalRAG {
private final List<Message> conversationHistory = new ArrayList<>();
public String queryWithHistory(String question) {
// Include conversation history in query
String contextualQuery = buildContextualQuery(question);
Embedding queryEmb = embeddingModel.embed(contextualQuery).content();
// Retrieve relevant documents
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmb)
.maxResults(3)
.minScore(0.7)
.build();
EmbeddingSearchResult<TextSegment> result = store.search(request);
String context = result.matches().stream()
.map(match -> match.embedded().text())
.collect(Collectors.joining("\n\n"));
// Build prompt with history
String prompt = buildPromptWithHistory(context, question);
String response = chatModel.generate(prompt);
// Update history
conversationHistory.add(new Message("user", question));
conversationHistory.add(new Message("assistant", response));
return response;
}
private String buildContextualQuery(String question) {
if (conversationHistory.isEmpty()) {
return question;
}
// Include recent history for context
StringBuilder query = new StringBuilder();
int recentCount = Math.min(2, conversationHistory.size());
for (int i = conversationHistory.size() - recentCount;
i < conversationHistory.size(); i++) {
query.append(conversationHistory.get(i).content).append(" ");
}
query.append(question);
return query.toString();
}
private String buildPromptWithHistory(String context, String question) {
StringBuilder prompt = new StringBuilder();
prompt.append("Context:\n").append(context).append("\n\n");
if (!conversationHistory.isEmpty()) {
prompt.append("Previous conversation:\n");
for (Message msg : conversationHistory) {
prompt.append(msg.role).append(": ")
.append(msg.content).append("\n");
}
prompt.append("\n");
}
prompt.append("Question: ").append(question);
return prompt.toString();
}
record Message(String role, String content) {}
}public class AttributedRAG {
public AnswerWithSources queryWithSources(String question) {
Embedding queryEmb = embeddingModel.embed(question).content();
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmb)
.maxResults(5)
.minScore(0.7)
.build();
EmbeddingSearchResult<TextSegment> result = store.search(request);
// Build context with source markers
StringBuilder contextWithMarkers = new StringBuilder();
List<Source> sources = new ArrayList<>();
int sourceNum = 1;
for (EmbeddingMatch<TextSegment> match : result.matches()) {
TextSegment seg = match.embedded();
Metadata meta = seg.metadata();
String marker = "[" + sourceNum + "]";
contextWithMarkers.append(marker).append(" ")
.append(seg.text()).append("\n\n");
sources.add(new Source(
sourceNum++,
meta.getString("source"),
meta.getInteger("page"),
match.score()
));
}
// Generate response
String prompt = "Using the following sources, answer the question. " +
"Cite sources using [1], [2], etc.\n\n" +
contextWithMarkers.toString() +
"\nQuestion: " + question;
String answer = chatModel.generate(prompt);
return new AnswerWithSources(answer, sources);
}
record Source(int number, String file, Integer page, double relevance) {}
record AnswerWithSources(String answer, List<Source> sources) {}
}public class CachedRAG {
private final Map<String, Embedding> embeddingCache = new ConcurrentHashMap<>();
public String queryWithCache(String question) {
// Check cache
Embedding queryEmb = embeddingCache.computeIfAbsent(
question,
q -> embeddingModel.embed(q).content()
);
// Continue with normal RAG flow
return performRAGQuery(queryEmb, question);
}
}public void indexDocumentsBatch(List<String> documents, int batchSize) {
for (int i = 0; i < documents.size(); i += batchSize) {
int end = Math.min(i + batchSize, documents.size());
List<String> batch = documents.subList(i, end);
List<Embedding> embeddings = new ArrayList<>();
List<TextSegment> segments = new ArrayList<>();
for (String doc : batch) {
embeddings.add(embeddingModel.embed(doc).content());
segments.add(TextSegment.from(doc));
}
store.addAll(embeddings, segments);
System.out.println("Indexed batch " + (i / batchSize + 1));
}
}Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-chroma