Zero-configuration RAG package that bundles document parsing, embedding, and splitting for easy Retrieval-Augmented Generation in Java applications
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.loader.FileSystemDocumentLoader;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
import dev.langchain4j.rag.content.retriever.EmbeddingStoreContentRetriever;
import dev.langchain4j.service.AiServices;
import java.nio.file.Paths;
// 1. Load and ingest document
Document doc = FileSystemDocumentLoader.loadDocument(Paths.get("document.pdf"));
EmbeddingStore<TextSegment> store = new InMemoryEmbeddingStore<>();
EmbeddingStoreIngestor.ingest(doc, store);
// 2. Create RAG-enabled assistant
Assistant assistant = AiServices.builder(Assistant.class)
.chatModel(chatModel) // Your ChatModel instance
.contentRetriever(EmbeddingStoreContentRetriever.from(store))
.build();
// 3. Query with RAG
String answer = assistant.chat("What is this document about?");What happens automatically:
// Load multiple documents from directory
List<Document> docs = FileSystemDocumentLoader.loadDocumentsRecursively(
Paths.get("documents")
);
// Ingest all at once
EmbeddingStore<TextSegment> store = new InMemoryEmbeddingStore<>();
EmbeddingStoreIngestor.ingest(docs, store);import dev.langchain4j.data.document.splitter.DocumentSplitters;
import dev.langchain4j.model.embedding.EmbeddingModel;
// Create custom ingestor
EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder()
.documentSplitter(DocumentSplitters.recursive(500, 50)) // Custom chunk size
.embeddingModel(customModel) // Custom embedding model
.embeddingStore(store)
.build();
// Ingest with custom configuration
ingestor.ingest(documents);import dev.langchain4j.rag.content.retriever.EmbeddingStoreContentRetriever;
// Configure retrieval parameters
ContentRetriever retriever = EmbeddingStoreContentRetriever.builder()
.embeddingStore(store)
.maxResults(5) // Return top 5 results
.minScore(0.7) // Minimum similarity score
.displayName("DocRetriever")
.build();
Assistant assistant = AiServices.builder(Assistant.class)
.chatModel(chatModel)
.contentRetriever(retriever)
.build();import java.nio.file.FileSystems;
import java.nio.file.PathMatcher;
// Load only PDF files
PathMatcher pdfMatcher = FileSystems.getDefault()
.getPathMatcher("glob:**.pdf");
List<Document> pdfs = FileSystemDocumentLoader.loadDocumentsRecursively(
Paths.get("documents"),
pdfMatcher
);import dev.langchain4j.store.embedding.IngestionResult;
import dev.langchain4j.model.output.TokenUsage;
IngestionResult result = EmbeddingStoreIngestor.ingest(document, store);
TokenUsage usage = result.tokenUsage();
System.out.println("Input tokens: " + usage.inputTokenCount());
System.out.println("Total tokens: " + usage.totalTokenCount());Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j-easy-rag@1.11.0