Core classes and interfaces of LangChain4j providing foundational abstractions for LLM interaction, RAG, embeddings, agents, and observability
Package: dev.langchain4j.rag
Thread-Safety: Implementation-dependent
Use Case: Enhance LLM responses with relevant context from external knowledge bases
RAG (Retrieval Augmented Generation) enhances LLM responses by retrieving relevant information from external sources and injecting it into the prompt, reducing hallucinations and providing up-to-date information.
RAG systems in LangChain4j follow a modular pipeline:
User Query
↓
Query Transformation (optional)
↓
Content Retrieval
↓
Content Aggregation (optional)
↓
Content Injection into Prompt
↓
LLM Generation
↓
Responsepackage dev.langchain4j.rag;
import dev.langchain4j.rag.content.Content;
import dev.langchain4j.rag.query.Query;
import dev.langchain4j.data.message.ChatMessage;
/**
* Orchestrates RAG pipeline
* Thread-Safety: Implementation-dependent
*/
public interface RetrievalAugmentor {
/**
* Augment messages with retrieved content
* @param userMessage Original user message
* @return Augmented result with retrieved content
*/
Result augment(UserMessage userMessage);
}package dev.langchain4j.rag.content.retriever;
import dev.langchain4j.rag.content.Content;
import dev.langchain4j.rag.query.Query;
/**
* Retrieves relevant content for a query
* Thread-Safety: Implementation-dependent
*/
public interface ContentRetriever {
/**
* Retrieve relevant content
* @param query User query with metadata
* @return List of retrieved content
*/
List<Content> retrieve(Query query);
}package dev.langchain4j.rag.content.retriever;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.data.segment.TextSegment;
/**
* Retrieves content from embedding store using semantic search
* Thread-Safety: Depends on EmbeddingStore and EmbeddingModel thread-safety
*/
public class EmbeddingStoreContentRetriever implements ContentRetriever {
public static Builder builder() { /* ... */ }
public static class Builder {
public Builder embeddingStore(EmbeddingStore<TextSegment> store) { /* ... */ }
public Builder embeddingModel(EmbeddingModel model) { /* ... */ }
public Builder maxResults(int maxResults) { /* ... */ }
public Builder minScore(double minScore) { /* ... */ }
public Builder filter(Filter filter) { /* ... */ }
public EmbeddingStoreContentRetriever build() { /* ... */ }
}
@Override
public List<Content> retrieve(Query query) {
// 1. Embed query
// 2. Search embedding store
// 3. Return content from matches
}
}package dev.langchain4j.rag;
/**
* Standard implementation of RetrievalAugmentor
* Orchestrates query transformation, retrieval, and injection
* Thread-Safety: Depends on components
*/
public class DefaultRetrievalAugmentor implements RetrievalAugmentor {
public static Builder builder() { /* ... */ }
public static class Builder {
public Builder queryTransformer(QueryTransformer transformer) { /* ... */ }
public Builder queryRouter(QueryRouter router) { /* ... */ }
public Builder contentRetriever(ContentRetriever retriever) { /* ... */ }
public Builder contentAggregator(ContentAggregator aggregator) { /* ... */ }
public Builder contentInjector(ContentInjector injector) { /* ... */ }
public DefaultRetrievalAugmentor build() { /* ... */ }
}
}import dev.langchain4j.rag.DefaultRetrievalAugmentor;
import dev.langchain4j.rag.content.retriever.EmbeddingStoreContentRetriever;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.data.segment.TextSegment;
/**
* Simple RAG system with embedding store
*/
public class SimpleRAG {
private final ChatModel chatModel;
private final EmbeddingStore<TextSegment> embeddingStore;
private final EmbeddingModel embeddingModel;
public String query(String userQuestion) {
// 1. Create content retriever
ContentRetriever retriever = EmbeddingStoreContentRetriever.builder()
.embeddingStore(embeddingStore)
.embeddingModel(embeddingModel)
.maxResults(5)
.minScore(0.7)
.build();
// 2. Retrieve relevant content
Query query = Query.from(userQuestion);
List<Content> retrievedContent = retriever.retrieve(query);
// 3. Build context
String context = retrievedContent.stream()
.map(Content::textSegment)
.map(TextSegment::text)
.collect(Collectors.joining("\n\n"));
// 4. Build prompt with context
String prompt = String.format("""
Answer the question based on the following context:
Context:
%s
Question: %s
Answer:
""", context, userQuestion);
// 5. Generate response
return chatModel.chat(prompt);
}
}import dev.langchain4j.rag.DefaultRetrievalAugmentor;
import dev.langchain4j.rag.query.router.DefaultQueryRouter;
import dev.langchain4j.rag.content.aggregator.ReRankingContentAggregator;
import dev.langchain4j.rag.content.injector.DefaultContentInjector;
/**
* Advanced RAG with query routing and reranking
*/
public class AdvancedRAG {
private final RetrievalAugmentor retrievalAugmentor;
public AdvancedRAG() {
// Create multiple retrievers for different sources
ContentRetriever techDocsRetriever = EmbeddingStoreContentRetriever.builder()
.embeddingStore(techDocsStore)
.embeddingModel(embeddingModel)
.maxResults(10)
.minScore(0.6)
.filter(Filter.metadataKey("type").isEqualTo("technical"))
.build();
ContentRetriever apiDocsRetriever = EmbeddingStoreContentRetriever.builder()
.embeddingStore(apiDocsStore)
.embeddingModel(embeddingModel)
.maxResults(10)
.minScore(0.6)
.filter(Filter.metadataKey("type").isEqualTo("api-reference"))
.build();
// Route queries to appropriate retriever
QueryRouter queryRouter = DefaultQueryRouter.builder()
.retriever("technical", techDocsRetriever)
.retriever("api", apiDocsRetriever)
.classifier(this::classifyQuery) // Custom classifier
.build();
// Rerank and aggregate results
ContentAggregator aggregator = ReRankingContentAggregator.builder()
.scoringModel(scoringModel)
.minScore(0.7)
.build();
// Build complete augmentor
this.retrievalAugmentor = DefaultRetrievalAugmentor.builder()
.queryRouter(queryRouter)
.contentAggregator(aggregator)
.contentInjector(DefaultContentInjector.builder()
.promptTemplate(createPromptTemplate())
.build())
.build();
}
private String classifyQuery(Query query) {
// Classify query to determine which retriever to use
String text = query.text().toLowerCase();
if (text.contains("api") || text.contains("endpoint")) {
return "api";
}
return "technical";
}
public String query(String userQuestion) {
// Augmentor handles entire RAG pipeline
Result result = retrievalAugmentor.augment(UserMessage.from(userQuestion));
// Generate response with augmented content
return chatModel.chat(result.augmentedUserMessage());
}
}package dev.langchain4j.rag.query.transformer;
import dev.langchain4j.rag.query.Query;
/**
* Transforms queries before retrieval
* Examples: Expansion, compression, translation
*/
public interface QueryTransformer {
/**
* Transform query
* @param query Original query
* @return Transformed query (or queries)
*/
Collection<Query> transform(Query query);
}/**
* Expand query with synonyms and related terms
*/
public class QueryExpansion implements QueryTransformer {
private final ChatModel chatModel;
@Override
public Collection<Query> transform(Query query) {
String prompt = String.format("""
Generate 3 variations of this search query using synonyms and related terms:
Original: %s
Variations:
""", query.text());
String response = chatModel.chat(prompt);
String[] variations = response.split("\n");
List<Query> queries = new ArrayList<>();
queries.add(query); // Keep original
for (String variation : variations) {
if (!variation.trim().isEmpty()) {
queries.add(Query.from(variation.trim()));
}
}
return queries;
}
}/**
* Route queries to appropriate retrievers
*/
public class CustomQueryRouter implements QueryRouter {
private final Map<String, ContentRetriever> retrievers;
private final ChatModel classifier;
@Override
public Collection<ContentRetriever> route(Query query) {
// Use LLM to classify query intent
String prompt = String.format("""
Classify the intent of this query:
Options: [technical, api, tutorial, troubleshooting]
Query: %s
Intent:
""", query.text());
String intent = classifier.chat(prompt).trim().toLowerCase();
// Return appropriate retriever(s)
ContentRetriever retriever = retrievers.get(intent);
return retriever != null ? List.of(retriever) : List.of();
}
}package dev.langchain4j.rag.content.aggregator;
import dev.langchain4j.rag.content.Content;
/**
* Aggregates and processes retrieved content
* Examples: Deduplication, reranking, filtering
*/
public interface ContentAggregator {
/**
* Aggregate content from multiple sources
* @param contents Retrieved contents
* @param query Original query
* @return Aggregated and processed contents
*/
List<Content> aggregate(List<Content> contents, Query query);
}import dev.langchain4j.model.scoring.ScoringModel;
/**
* Rerank results using scoring model
*/
public class ReRankingAggregator implements ContentAggregator {
private final ScoringModel scoringModel;
private final double minScore;
@Override
public List<Content> aggregate(List<Content> contents, Query query) {
// Extract texts
List<String> texts = contents.stream()
.map(Content::textSegment)
.map(TextSegment::text)
.collect(Collectors.toList());
// Score all texts
Response<List<Double>> scores = scoringModel.scoreAll(texts, query.text());
// Combine content with scores
List<ScoredContent> scored = new ArrayList<>();
for (int i = 0; i < contents.size(); i++) {
double score = scores.content().get(i);
if (score >= minScore) {
scored.add(new ScoredContent(contents.get(i), score));
}
}
// Sort by score descending
scored.sort((a, b) -> Double.compare(b.score(), a.score()));
// Return reranked content
return scored.stream()
.map(ScoredContent::content)
.collect(Collectors.toList());
}
}package dev.langchain4j.rag.content.injector;
import dev.langchain4j.data.message.UserMessage;
import dev.langchain4j.rag.content.Content;
/**
* Injects retrieved content into user message
*/
public interface ContentInjector {
/**
* Create augmented message with retrieved content
* @param userMessage Original user message
* @param contents Retrieved contents
* @return Augmented user message
*/
UserMessage inject(UserMessage userMessage, List<Content> contents);
}// ✅ GOOD: Retrieve more, then filter/rerank
ContentRetriever retriever = EmbeddingStoreContentRetriever.builder()
.maxResults(20) // Retrieve more candidates
.minScore(0.5) // Lower threshold
.build();
// Then rerank to get best 5
ContentAggregator aggregator = ReRankingContentAggregator.builder()
.maxResults(5) // Keep top 5 after reranking
.minScore(0.8) // Higher threshold for final results
.build();// Filter by category, date, language, etc.
Filter filter = Filter.and(
Filter.metadataKey("category").isEqualTo("technical"),
Filter.metadataKey("version").isEqualTo("2.0"),
Filter.metadataKey("language").isEqualTo("en")
);
ContentRetriever retriever = EmbeddingStoreContentRetriever.builder()
.filter(filter)
.build();List<Content> contents = retriever.retrieve(query);
if (contents.isEmpty()) {
return "I don't have enough information to answer that question. " +
"Please try rephrasing or ask something else.";
}
// Proceed with generation// Don't exceed model's context window
int MAX_CONTEXT_TOKENS = 4000;
String context = buildContext(contents);
int tokens = tokenizer.countTokens(context);
if (tokens > MAX_CONTEXT_TOKENS) {
// Truncate or summarize
context = truncateToTokenLimit(context, MAX_CONTEXT_TOKENS);
}String prompt = """
Answer based on the following sources. Cite sources in your answer.
%s
Question: %s
Answer with citations:
""";
// Format sources with citations
StringBuilder sourcesBuilder = new StringBuilder();
for (int i = 0; i < contents.size(); i++) {
Content content = contents.get(i);
sourcesBuilder.append(String.format("[%d] %s\n\n",
i + 1,
content.textSegment().text()));
}Query → Embed → Search → Inject → GenerateBest for: Simple Q&A, single knowledge base
Query → Embed → Search (20 results) → Rerank (top 5) → Inject → GenerateBest for: Improved relevance, larger knowledge bases
Query → Route → [Search DB1, Search DB2, Search Web] → Aggregate → Inject → GenerateBest for: Multiple data sources, heterogeneous content
Query → Agent decides → [RAG | Tool | Direct answer] → GenerateBest for: Complex queries, multi-step reasoning
// Cache embeddings for common queries
LoadingCache<String, Embedding> queryCache = Caffeine.newBuilder()
.maximumSize(1000)
.expireAfterWrite(1, TimeUnit.HOURS)
.build(key -> embeddingModel.embed(key).content());
// Use cached embedding
Embedding queryEmbedding = queryCache.get(userQuery);// Retrieve from multiple sources in parallel
CompletableFuture<List<Content>> future1 = CompletableFuture.supplyAsync(
() -> retriever1.retrieve(query));
CompletableFuture<List<Content>> future2 = CompletableFuture.supplyAsync(
() -> retriever2.retrieve(query));
// Combine results
List<Content> allContents = new ArrayList<>();
allContents.addAll(future1.join());
allContents.addAll(future2.join());| Pitfall | Solution |
|---|---|
| Too much context | Limit to top 5-10 relevant chunks |
| Too little context | Increase maxResults, lower minScore |
| No source citations | Add source metadata to prompt |
| Stale content | Implement content refresh strategy |
| Irrelevant results | Use reranking and metadata filters |
| Slow retrieval | Cache embeddings, use parallel retrieval |
Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-core@1.11.0