Build LLM-powered applications in Java with support for chatbots, agents, RAG, tools, and much more
LangChain4j is a Java library for building LLM-powered applications with support for chatbots, agents, RAG (Retrieval Augmented Generation), tools, guardrails, and much more. It provides a high-level API for working with chat models, streaming, memory management, document processing, embeddings, and various integrations.
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j</artifactId>
<version>1.11.0</version>
</dependency>// Core AI Services API
import dev.langchain4j.service.AiServices;
import dev.langchain4j.service.SystemMessage;
import dev.langchain4j.service.UserMessage;
import dev.langchain4j.service.V;
import dev.langchain4j.service.MemoryId;
// Memory management
import dev.langchain4j.memory.chat.MessageWindowChatMemory;
import dev.langchain4j.memory.chat.TokenWindowChatMemory;
import dev.langchain4j.memory.chat.ChatMemoryProvider;
// Prompts and templates
import dev.langchain4j.model.input.Prompt;
import dev.langchain4j.model.input.PromptTemplate;
import dev.langchain4j.model.input.structured.StructuredPrompt;
// RAG (Retrieval Augmented Generation)
import dev.langchain4j.rag.RetrievalAugmentor;
import dev.langchain4j.rag.DefaultRetrievalAugmentor;
import dev.langchain4j.rag.content.retriever.ContentRetriever;
import dev.langchain4j.rag.content.retriever.EmbeddingStoreContentRetriever;
// Document processing
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.loader.FileSystemDocumentLoader;
import dev.langchain4j.data.document.parser.TextDocumentParser;
import dev.langchain4j.data.document.splitter.DocumentSplitters;
// Embedding store
import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
// Tools
import dev.langchain4j.agent.tool.Tool;import dev.langchain4j.service.AiServices;
// Define your AI service interface
interface Assistant {
String chat(String message);
}
// Create AI service with a chat model
Assistant assistant = AiServices.create(Assistant.class, chatModel);
// Use the assistant
String response = assistant.chat("Hello, how are you?");
System.out.println(response);When to use: Basic Q&A, single-user conversations
// Minimal setup - no memory, no tools
Assistant assistant = AiServices.create(Assistant.class, chatModel);
String answer = assistant.chat("What is Java?");When to use: Multiple users, need conversation history
interface Assistant {
String chat(@MemoryId String userId, String message);
}
Assistant assistant = AiServices.builder(Assistant.class)
.chatModel(chatModel)
.chatMemoryProvider(id -> MessageWindowChatMemory.withMaxMessages(20))
.build();
// Each user gets separate conversation history
assistant.chat("user1", "My name is Alice");
assistant.chat("user1", "What's my name?"); // Remembers "Alice"When to use: Need to answer questions from your documents
// 1. Load and embed documents
EmbeddingStore<TextSegment> store = new InMemoryEmbeddingStore<>();
EmbeddingModel embeddingModel = /* your embedding model */;
List<Document> documents = FileSystemDocumentLoader.loadDocuments("/path/to/docs");
for (Document doc : documents) {
List<TextSegment> segments = DocumentSplitters.recursive(500, 50, tokenizer).split(doc);
List<Embedding> embeddings = embeddingModel.embedAll(segments).content();
store.addAll(embeddings, segments);
}
// 2. Create retriever
ContentRetriever retriever = EmbeddingStoreContentRetriever.builder()
.embeddingStore(store)
.embeddingModel(embeddingModel)
.maxResults(5)
.minScore(0.7)
.build();
// 3. Build assistant with RAG
Assistant assistant = AiServices.builder(Assistant.class)
.chatModel(chatModel)
.contentRetriever(retriever)
.build();
String answer = assistant.chat("What does the documentation say about X?");When to use: Need to call external functions/APIs
class Tools {
@Tool("Get current weather")
String getWeather(String city) {
// Call weather API
return "Sunny, 72°F";
}
@Tool("Search database")
String searchDB(String query) {
// Query database
return "Found 10 results";
}
}
Assistant assistant = AiServices.builder(Assistant.class)
.chatModel(chatModel)
.tools(new Tools())
.build();
// LLM will automatically call tools when needed
String answer = assistant.chat("What's the weather in Paris?");When to use: Extract structured data from text
record Person(String name, int age, String city) {}
interface Extractor {
Person extractPerson(String text);
}
Extractor extractor = AiServices.create(Extractor.class, chatModel);
Person person = extractor.extractPerson("John is 30 years old and lives in NYC");
// Returns: Person[name=John, age=30, city=NYC]Need conversation history?
├─ NO → Don't configure memory (stateless)
└─ YES → How many users?
├─ Single user → Use MessageWindowChatMemory.withMaxMessages(N)
└─ Multiple users → Use ChatMemoryProvider with @MemoryId
└─ Token limit concern?
├─ YES → TokenWindowChatMemory with TokenCountEstimator
└─ NO → MessageWindowChatMemory with maxMessagesUser experience requirement?
├─ Show response token-by-token → StreamingChatModel + TokenStream return type
└─ Get complete response at once → ChatModel + String/POJO return typeData source?
├─ Documents/knowledge base → EmbeddingStoreContentRetriever
├─ Web search → WebSearchContentRetriever
├─ Custom data source → Implement ContentRetriever interface
└─ Multiple sources → Use QueryRouter with multiple retrieversLangChain4j is built around several key components:
Core interfaces for interacting with language models, including chat, streaming, and embeddings.
public interface ChatModel {
ChatResponse chat(ChatRequest chatRequest);
String chat(String userMessage);
ChatResponse chat(ChatMessage... messages);
Set<Capability> supportedCapabilities();
}
public interface StreamingChatModel {
void chat(ChatRequest chatRequest, StreamingChatResponseHandler handler);
void chat(String userMessage, StreamingChatResponseHandler handler);
}
public interface EmbeddingModel {
Response<Embedding> embed(String text);
Response<List<Embedding>> embedAll(List<TextSegment> textSegments);
int dimension();
}Thread Safety: ChatModel and EmbeddingModel implementations are typically thread-safe. StreamingChatModel requires careful handling of concurrent requests to avoid handler confusion.
Common Pitfalls:
supportedCapabilities() before using advanced features like tool calling or JSON response formatsRelated APIs: Chat and Language Models
Message types for chat interactions including UserMessage, AiMessage, SystemMessage, and multimodal content support.
public class UserMessage implements ChatMessage {
public UserMessage(String text);
public UserMessage(List<Content> contents);
public String singleText();
public List<Content> contents();
}
public class AiMessage implements ChatMessage {
public AiMessage(String text);
public String text();
public String thinking();
public List<ToolExecutionRequest> toolExecutionRequests();
}
public class SystemMessage implements ChatMessage {
public SystemMessage(String text);
public String text();
}Thread Safety: Message classes are immutable and thread-safe.
Common Pitfalls:
singleText() on multimodal UserMessage throws RuntimeException - use hasSingleText() firstcontents() or toolExecutionRequests() - they may be unmodifiableEdge Cases:
Related APIs: Chat Messages
Request and response types for chat model interactions with comprehensive parameter control.
public class ChatRequest {
public List<ChatMessage> messages();
public ChatRequestParameters parameters();
public static Builder builder();
}
public class ChatResponse {
public AiMessage aiMessage();
public TokenUsage tokenUsage();
public FinishReason finishReason();
}
public interface StreamingChatResponseHandler {
void onPartialResponse(String partialResponse);
void onCompleteResponse(ChatResponse completeResponse);
void onError(Throwable error);
}Thread Safety: ChatRequest and ChatResponse are immutable. StreamingChatResponseHandler callbacks are invoked sequentially on the same thread.
Common Pitfalls:
Performance Notes:
Related APIs: Chat Requests and Responses
Template system for creating reusable prompts with variable substitution. Supports structured prompts and automatic date/time injection.
/**
* Represents a prompt (input text sent to LLM)
*/
public class Prompt {
public Prompt(String text);
public String text();
public UserMessage toUserMessage();
public SystemMessage toSystemMessage();
public static Prompt from(String text);
}
/**
* Template with {{variable}} placeholders
* Special variables: {{current_date}}, {{current_time}}, {{current_date_time}}
*/
public class PromptTemplate {
public PromptTemplate(String template);
public Prompt apply(Object value);
public Prompt apply(Map<String, Object> variables);
public static PromptTemplate from(String template);
}
/**
* Annotation for structured prompts on Java classes
*/
@Target(TYPE)
@Retention(RUNTIME)
public @interface StructuredPrompt {
String[] value();
String delimiter() default "\n";
}Thread Safety: PromptTemplate is thread-safe and can be reused across threads. Prompt instances are immutable.
Common Pitfalls:
Performance Notes:
Related APIs: Prompts and Templates
Comprehensive RAG framework for augmenting LLM responses with retrieved information. Supports query transformation, routing, content retrieval, aggregation, and injection.
/**
* Entry point for RAG flow
*/
public interface RetrievalAugmentor {
AugmentationResult augment(AugmentationRequest augmentationRequest);
}
/**
* Default RAG implementation with full pipeline
*/
public class DefaultRetrievalAugmentor implements RetrievalAugmentor {
public static Builder builder();
}
/**
* Retrieves content from data sources using queries
*/
public interface ContentRetriever {
List<Content> retrieve(Query query);
}
/**
* Embedding-based content retrieval
*/
public class EmbeddingStoreContentRetriever implements ContentRetriever {
public static Builder builder();
}
/**
* Web search content retrieval
*/
public class WebSearchContentRetriever implements ContentRetriever {
public WebSearchContentRetriever(WebSearchEngine webSearchEngine);
}Thread Safety: DefaultRetrievalAugmentor is thread-safe if all configured components are thread-safe. EmbeddingStoreContentRetriever is thread-safe if the underlying EmbeddingStore is thread-safe.
Common Pitfalls:
Edge Cases:
Performance Notes:
Cost Considerations:
Related APIs: RAG (Retrieval Augmented Generation), Embedding Store
Core data types for documents, embeddings, tools, and structured data.
public interface Document {
String text();
Metadata metadata();
TextSegment toTextSegment();
}
public class TextSegment {
public String text();
public Metadata metadata();
}
public class Embedding {
public float[] vector();
public int dimension();
}
public class ToolSpecification {
public String name();
public String description();
public JsonObjectSchema parameters();
}Thread Safety: Document, TextSegment, and Embedding are immutable and thread-safe. Metadata is mutable - avoid concurrent modifications.
Common Pitfalls:
Edge Cases:
Related APIs: Data Types
High-level API for creating AI-powered services by defining Java interfaces. AiServices provides implementations that handle chat models, streaming, memory, RAG, tools, guardrails, and various output types.
/**
* Abstract class for building AI services from Java interfaces.
* Supports system/user message templates, chat memory, RAG, tools, streaming,
* moderation, and various return types.
*/
public abstract class AiServices<T> {
/**
* Create a simple AI service with a chat model
* @param aiService Interface defining the AI service API
* @param chatModel Chat model to use
* @return Implementation of the AI service interface
* @throws IllegalConfigurationException if configuration is invalid
*/
public static <T> T create(Class<T> aiService, ChatModel chatModel);
/**
* Create a simple AI service with a streaming chat model
* @param aiService Interface defining the AI service API
* @param streamingChatModel Streaming chat model to use
* @return Implementation of the AI service interface
* @throws IllegalConfigurationException if configuration is invalid
*/
public static <T> T create(Class<T> aiService, StreamingChatModel streamingChatModel);
/**
* Begin building an AI service with full configuration options
* @param aiService Interface defining the AI service API
* @return Builder for configuring the AI service
*/
public static <T> AiServices<T> builder(Class<T> aiService);
}Thread Safety: Generated AI service implementations are thread-safe. Multiple threads can call methods concurrently. However, if using ChatMemory without ChatMemoryProvider, memory is shared across threads.
Common Pitfalls:
.build() on builder - returns builder, not serviceEdge Cases:
Performance Notes:
Cost Considerations:
Exception Handling:
Testing Patterns:
// Use mock ChatModel for testing
ChatModel mockModel = (request) -> ChatResponse.builder()
.aiMessage(AiMessage.from("Mocked response"))
.build();
Assistant assistant = AiServices.create(Assistant.class, mockModel);
String response = assistant.chat("test");
assertEquals("Mocked response", response);Related APIs: AI Services, Tools, Memory
Chat memory implementations for maintaining conversation context. Supports message window and token window strategies with optional persistence.
/**
* Provider interface for obtaining ChatMemory instances
*/
public interface ChatMemoryProvider {
/**
* Get ChatMemory for given memory ID (user/conversation)
* @param memoryId Identifier for the memory (can be any type with proper equals/hashCode)
* @return ChatMemory instance for the given ID
*/
ChatMemory get(Object memoryId);
}
/**
* ChatMemory implementation that retains a fixed number of most recent messages
*/
public class MessageWindowChatMemory implements ChatMemory {
/**
* Create with max message limit
* @param maxMessages Maximum number of messages to retain
* @return MessageWindowChatMemory instance
*/
public static MessageWindowChatMemory withMaxMessages(int maxMessages);
/**
* Create builder for full configuration
* @return Builder instance
*/
public static Builder builder();
}
/**
* ChatMemory implementation that retains messages within a token limit
*/
public class TokenWindowChatMemory implements ChatMemory {
/**
* Create with max token limit
* @param maxTokens Maximum number of tokens to retain
* @param tokenizer Token count estimator
* @return TokenWindowChatMemory instance
* @throws IllegalArgumentException if maxTokens <= 0 or tokenizer is null
*/
public static TokenWindowChatMemory withMaxTokens(int maxTokens, TokenCountEstimator tokenizer);
/**
* Create builder for full configuration
* @return Builder instance
*/
public static Builder builder();
}Thread Safety: MessageWindowChatMemory and TokenWindowChatMemory are NOT thread-safe. Use ChatMemoryProvider with concurrent map for thread-safe multi-user scenarios.
Common Pitfalls:
Edge Cases:
Performance Notes:
Cost Considerations:
Testing Patterns:
// Test memory isolation
ChatMemoryProvider provider = memoryId -> MessageWindowChatMemory.withMaxMessages(10);
Assistant assistant = AiServices.builder(Assistant.class)
.chatModel(mockModel)
.chatMemoryProvider(provider)
.build();
assistant.chat("user1", "My name is Alice");
assistant.chat("user2", "My name is Bob");
// Verify each user has separate memoryRelated APIs: Chat Memory, AI Services
Loaders, parsers, splitters, and sources for working with documents. Supports file system, classpath, and URL sources.
/**
* Document loader for loading documents from the file system
*/
public class FileSystemDocumentLoader {
/**
* Load a single document from path
* @param filePath Path to the document
* @return Loaded document
* @throws RuntimeException if file cannot be read
*/
public static Document loadDocument(Path filePath);
/**
* Load a single document with custom parser
* @param filePath Path to the document
* @param documentParser Parser to use
* @return Loaded document
* @throws RuntimeException if file cannot be read or parsing fails
*/
public static Document loadDocument(Path filePath, DocumentParser documentParser);
/**
* Load all documents from directory (non-recursive)
* @param directoryPath Path to directory
* @return List of loaded documents
* @throws RuntimeException if directory cannot be read
*/
public static List<Document> loadDocuments(Path directoryPath);
/**
* Load documents recursively from directory
* @param directoryPath Path to directory
* @return List of loaded documents
* @throws RuntimeException if directory cannot be read
*/
public static List<Document> loadDocumentsRecursively(Path directoryPath);
}
/**
* Utility class providing factory methods for recommended document splitters
*/
public class DocumentSplitters {
/**
* Create recursive splitter with token limits (recommended for generic text)
* @param maxSegmentSizeInTokens Maximum segment size in tokens
* @param maxOverlapSizeInTokens Maximum overlap size in tokens
* @param tokenCountEstimator Token count estimator
* @return Configured document splitter
* @throws IllegalArgumentException if maxSegmentSize <= 0 or overlap >= maxSegmentSize
*/
public static DocumentSplitter recursive(
int maxSegmentSizeInTokens,
int maxOverlapSizeInTokens,
TokenCountEstimator tokenCountEstimator
);
/**
* Create recursive splitter with character limits
* @param maxSegmentSizeInChars Maximum segment size in characters
* @param maxOverlapSizeInChars Maximum overlap size in characters
* @return Configured document splitter
* @throws IllegalArgumentException if maxSegmentSize <= 0 or overlap >= maxSegmentSize
*/
public static DocumentSplitter recursive(
int maxSegmentSizeInChars,
int maxOverlapSizeInChars
);
}Thread Safety: Document loaders are stateless and thread-safe. Document splitters are stateless and can be reused across threads.
Common Pitfalls:
Edge Cases:
Performance Notes:
Cost Considerations:
Testing Patterns:
// Test with in-memory documents
Document doc = new Document("Test content", Metadata.from("source", "test"));
DocumentSplitter splitter = DocumentSplitters.recursive(100, 10);
List<TextSegment> segments = splitter.split(doc);
assertTrue(segments.size() > 0);Related APIs: Document Processing, RAG
Automatic conversion of LLM outputs to Java types including primitives, dates, enums, POJOs, and collections.
/**
* Interface for parsing LLM output to desired types
*/
public interface OutputParser<T> {
/**
* Parse LLM output text to target type
* @param text Output text from LLM
* @return Parsed object of type T
* @throws OutputParsingException if parsing fails
*/
T parse(String text);
/**
* Get format instructions to include in prompt
* @return Format instructions string
*/
String formatInstructions();
}Available output parsers:
BooleanOutputParser, ByteOutputParser, ShortOutputParser, IntegerOutputParser, LongOutputParser, FloatOutputParser, DoubleOutputParserBigIntegerOutputParser, BigDecimalOutputParserDateOutputParser, LocalDateOutputParser, LocalTimeOutputParser, LocalDateTimeOutputParserEnumOutputParser, EnumListOutputParser, EnumSetOutputParser, EnumCollectionOutputParserStringListOutputParser, StringSetOutputParser, StringCollectionOutputParserPojoOutputParser, PojoListOutputParser, PojoSetOutputParser, PojoCollectionOutputParserThread Safety: OutputParser implementations are stateless and thread-safe.
Common Pitfalls:
Edge Cases:
Performance Notes:
Exception Handling:
Testing Patterns:
// Test parser with known output
record Person(String name, int age) {}
OutputParser<Person> parser = new PojoOutputParser<>(Person.class);
String llmOutput = "{\"name\": \"Alice\", \"age\": 30}";
Person person = parser.parse(llmOutput);
assertEquals("Alice", person.name());
assertEquals(30, person.age());Related APIs: Output Parsing, AI Services
Framework for function calling and tool execution. Allows LLMs to call Java methods as tools with automatic JSON argument parsing.
/**
* Interface for executing tools
*/
public interface ToolExecutor {
/**
* Execute tool with given request
* @param toolExecutionRequest Request containing tool name and arguments
* @param memoryId Memory ID for context
* @return Result string from tool execution
* @throws RuntimeException if tool execution fails and propagation is enabled
*/
String execute(ToolExecutionRequest toolExecutionRequest, Object memoryId);
}
/**
* Interface for providing tools dynamically
*/
public interface ToolProvider {
/**
* Provide tools for the given request
* @param request Request containing context for tool selection
* @return Result with tools to make available
*/
ToolProviderResult provideTools(ToolProviderRequest request);
}
/**
* Context object passed before tool execution
*/
public class BeforeToolExecution {
// Contains tool execution request, memory ID, and context
}
/**
* Represents a tool execution with request and result
*/
public class ToolExecution {
// Contains tool execution request and result
}Thread Safety: ToolExecutor implementations must be thread-safe if used in concurrent scenarios. Tool objects should be stateless or use proper synchronization.
Common Pitfalls:
Edge Cases:
{}Performance Notes:
executeToolsConcurrently()) speeds up parallel tool callsCost Considerations:
Exception Handling:
Testing Patterns:
// Mock tools for testing
class MockTools {
@Tool("Get weather")
String getWeather(String city) {
return "Mocked: Sunny in " + city;
}
}
Assistant assistant = AiServices.builder(Assistant.class)
.chatModel(mockChatModel)
.tools(new MockTools())
.build();
// Test tool execution tracking
List<ToolExecution> executions = new ArrayList<>();
assistant = AiServices.builder(Assistant.class)
.chatModel(chatModel)
.tools(new MockTools())
.afterToolExecution(executions::add)
.build();
assistant.chat("What's the weather?");
assertFalse(executions.isEmpty());Related APIs: Tools, AI Services
Input and output validation and filtering for AI services.
/**
* Annotation for configuring input guardrails at class level
*/
@Target(TYPE)
@Retention(RUNTIME)
public @interface InputGuardrails {
// Configuration for input guardrails
}
/**
* Annotation for configuring output guardrails at class level
*/
@Target(TYPE)
@Retention(RUNTIME)
public @interface OutputGuardrails {
// Configuration for output guardrails
}Thread Safety: Guardrail implementations must be thread-safe as they're shared across all invocations.
Common Pitfalls:
Edge Cases:
Performance Notes:
Testing Patterns:
// Test guardrail in isolation
class TestGuardrail implements Guardrail {
public String validate(String input) {
if (input.contains("bad")) {
return "Input rejected";
}
return input;
}
}
Assistant assistant = AiServices.builder(Assistant.class)
.chatModel(mockModel)
.inputGuardrails(new TestGuardrail())
.build();Related APIs: Guardrails, AI Services
In-memory implementation of embedding store for vector similarity search.
/**
* In-memory implementation of EmbeddingStore
* Stores embeddings in memory without persistence
*/
public class InMemoryEmbeddingStore<Embedded> implements EmbeddingStore<Embedded> {
/**
* Default constructor
*/
public InMemoryEmbeddingStore();
/**
* Load from file
* @param file Path to file
* @return InMemoryEmbeddingStore instance
* @throws RuntimeException if file cannot be read or parsed
*/
public static <Embedded> InMemoryEmbeddingStore<Embedded> fromFile(Path file);
/**
* Add embedding
* @param embedding Embedding to add
* @return Generated ID
*/
public String add(Embedding embedding);
/**
* Add embedding with embedded object
* @param embedding Embedding to add
* @param embedded Embedded object to associate
* @return Generated ID
*/
public String add(Embedding embedding, Embedded embedded);
/**
* Find relevant embeddings
* @param referenceEmbedding Reference embedding for similarity search
* @param maxResults Maximum number of results
* @param minScore Minimum similarity score (0.0 to 1.0)
* @return List of embedding matches sorted by score descending
*/
public List<EmbeddingMatch<Embedded>> findRelevant(
Embedding referenceEmbedding,
int maxResults,
double minScore
);
/**
* Serialize to file
* @param file Path to file
* @throws RuntimeException if file cannot be written
*/
public void serializeToFile(Path file);
}Thread Safety: InMemoryEmbeddingStore is thread-safe. All methods are synchronized for concurrent access.
Common Pitfalls:
Edge Cases:
Performance Notes:
Memory Considerations:
Testing Patterns:
// Test with small in-memory store
InMemoryEmbeddingStore<TextSegment> store = new InMemoryEmbeddingStore<>();
Embedding emb1 = embeddingModel.embed("test").content();
TextSegment seg1 = TextSegment.from("test");
String id = store.add(emb1, seg1);
// Test similarity search
List<EmbeddingMatch<TextSegment>> results = store.findRelevant(emb1, 5, 0.7);
assertEquals(1, results.size());
assertEquals(1.0, results.get(0).score(), 0.01); // Perfect matchRelated APIs: Embedding Store, RAG
Text classification using embedding-based similarity with labeled examples.
/**
* Interface for classifying text based on a set of labels
* Can return zero, one, or multiple labels for each classification
*/
public interface TextClassifier<L> {
/**
* Classify text
* @param text Text to classify
* @return List of labels (may be empty)
*/
List<L> classify(String text);
/**
* Classify text with scores
* @param text Text to classify
* @return Classification result with scored labels
*/
ClassificationResult<L> classifyWithScores(String text);
}
/**
* TextClassifier implementation using EmbeddingModel and predefined examples
* Classification performed by computing similarity between input embedding
* and embeddings of labeled example texts
*/
public class EmbeddingModelTextClassifier<L> implements TextClassifier<L> {
/**
* Constructor with default values
* @param embeddingModel Embedding model to use
* @param examplesByLabel Map of labels to example texts
* @throws IllegalArgumentException if embeddingModel is null or examplesByLabel is empty
*/
public EmbeddingModelTextClassifier(
EmbeddingModel embeddingModel,
Map<L, ? extends Collection<String>> examplesByLabel
);
/**
* Full constructor with configuration
* @param embeddingModel Embedding model to use
* @param examplesByLabel Map of labels to example texts
* @param maxResults Maximum number of labels to return (default: 1)
* @param minScore Minimum score threshold (default: 0.0)
* @param meanToMaxScoreRatio Ratio for filtering results (default: 0.0)
* @throws IllegalArgumentException if embeddingModel is null, examplesByLabel is empty,
* maxResults < 1, minScore < 0, or meanToMaxScoreRatio < 0
*/
public EmbeddingModelTextClassifier(
EmbeddingModel embeddingModel,
Map<L, ? extends Collection<String>> examplesByLabel,
int maxResults,
double minScore,
double meanToMaxScoreRatio
);
}
/**
* Represents the result of classification with scored labels
*/
public class ClassificationResult<L> {
/**
* Constructor
* @param scoredLabels List of scored labels
*/
public ClassificationResult(List<ScoredLabel<L>> scoredLabels);
/**
* Get scored labels
* @return List of scored labels sorted by score descending
*/
public List<ScoredLabel<L>> scoredLabels();
}
/**
* Represents a classification label with associated score
*/
public class ScoredLabel<L> {
/**
* Constructor
* @param label The label
* @param score The score (0.0 to 1.0)
*/
public ScoredLabel(L label, double score);
/**
* Get the label
* @return The label
*/
public L label();
/**
* Get the score
* @return The score (0.0 to 1.0)
*/
public double score();
}Thread Safety: EmbeddingModelTextClassifier is thread-safe if the EmbeddingModel is thread-safe. Classification operations can be performed concurrently.
Common Pitfalls:
Edge Cases:
Performance Notes:
Cost Considerations:
Testing Patterns:
// Test with known examples
Map<String, List<String>> examples = Map.of(
"positive", List.of("great", "excellent", "wonderful"),
"negative", List.of("bad", "terrible", "awful")
);
TextClassifier<String> classifier = new EmbeddingModelTextClassifier<>(
embeddingModel,
examples
);
ClassificationResult<String> result = classifier.classifyWithScores("amazing product");
assertEquals("positive", result.scoredLabels().get(0).label());
assertTrue(result.scoredLabels().get(0).score() > 0.7);Related APIs: Text Classification, Embedding Models
Legacy chain API for sequential processing. Deprecated in favor of AiServices.
/**
* Functional interface representing a chain step
* Deprecated in favor of AiServices
*/
@Deprecated
public interface Chain<Input, Output> {
/**
* Execute the chain step
* @param input Input to process
* @return Output from processing
*/
Output execute(Input input);
}
/**
* A chain for conversing with a ChatModel while maintaining memory
* Deprecated in favor of AiServices
*/
@Deprecated
public class ConversationalChain implements Chain<String, String> {
/**
* Create builder
* @return Builder instance
*/
public static ConversationalChainBuilder builder();
/**
* Execute chain with user message
* @param userMessage User message
* @return Response from chat model
*/
public String execute(String userMessage);
}
/**
* A chain for conversing with a ChatModel based on retrieved information
* Supports RAG with RetrievalAugmentor
* Deprecated in favor of AiServices
*/
@Deprecated
public class ConversationalRetrievalChain implements Chain<String, String> {
/**
* Create builder
* @return Builder instance
*/
public static Builder builder();
/**
* Execute chain with query
* @param query Query to process
* @return Response from chat model
*/
public String execute(String query);
}Migration Guide:
Related APIs: Chains, AI Services
SPI interfaces for customization and framework integration.
/**
* SPI factory interface for creating AI service contexts
*/
public interface AiServiceContextFactory {
// Factory methods for creating AI service contexts
}
/**
* SPI factory interface for creating AI services
*/
public interface AiServicesFactory {
// Factory methods for creating AI services
}
/**
* SPI adapter interface for token streams
*/
public interface TokenStreamAdapter {
// Adapter methods for token streams
}
/**
* SPI factory interface for creating guardrail service builders
*/
public interface GuardrailServiceBuilderFactory {
// Factory methods for guardrail service builders
}
/**
* SPI factory interface for creating JSON codecs for in-memory embedding store
*/
public interface InMemoryEmbeddingStoreJsonCodecFactory {
// Factory methods for JSON codecs
}Thread Safety: SPI implementations must be thread-safe as they're typically singletons.
Common Pitfalls:
Related APIs: Service Provider Interfaces
| Scenario | Recommended Approach | Reason |
|---|---|---|
| Single user, short conversations | MessageWindowChatMemory(10-20) | Simple and fast |
| Multiple users, short conversations | ChatMemoryProvider + MessageWindowChatMemory | User isolation |
| Long conversations near token limit | TokenWindowChatMemory | Precise token control |
| Need persistence across restarts | ChatMemoryStore integration | Data durability |
| Extremely high concurrency | Distributed cache + ChatMemoryStore | Scalability |
| Data Source | Retriever Type | When to Use |
|---|---|---|
| Document database | EmbeddingStoreContentRetriever | Semantic search on internal documents |
| Web content | WebSearchContentRetriever | Real-time information from web |
| SQL database | Custom ContentRetriever | Structured data queries |
| Multiple sources | QueryRouter + multiple retrievers | Hybrid search across systems |
| Graph database | Custom ContentRetriever | Relationship-based retrieval |
| Return Type | Parser | Notes |
|---|---|---|
| Simple types (String, int, boolean) | Automatic primitive parsers | No configuration needed |
| Enum | EnumOutputParser | Case-sensitive by default |
| Single POJO | PojoOutputParser | Requires public fields or setters |
| List of POJOs | PojoListOutputParser | May fail on malformed JSON |
| Date/Time | Date/LocalDate/LocalTime parsers | Format awareness needed |
| Complex nested objects | PojoOutputParser with nested classes | Validate schema carefully |
// Step 1: Load documents
List<Document> documents = FileSystemDocumentLoader
.loadDocumentsRecursively(Paths.get("/path/to/docs"));
// Step 2: Split into chunks
DocumentSplitter splitter = DocumentSplitters.recursive(500, 50, tokenizer);
List<TextSegment> segments = new ArrayList<>();
for (Document doc : documents) {
segments.addAll(splitter.split(doc));
}
// Step 3: Embed and store
EmbeddingStore<TextSegment> store = new InMemoryEmbeddingStore<>();
for (int i = 0; i < segments.size(); i++) {
Embedding embedding = embeddingModel.embed(segments.get(i).text()).content();
store.add(embedding, segments.get(i));
}
// Step 4: Create RAG retriever
ContentRetriever retriever = EmbeddingStoreContentRetriever.builder()
.embeddingStore(store)
.embeddingModel(embeddingModel)
.maxResults(5)
.minScore(0.7)
.build();
// Step 5: Build AI service
interface KnowledgeBot {
String chat(@MemoryId String userId, String question);
}
KnowledgeBot bot = AiServices.builder(KnowledgeBot.class)
.chatModel(chatModel)
.chatMemoryProvider(id -> MessageWindowChatMemory.withMaxMessages(10))
.contentRetriever(retriever)
.build();
// Step 6: Use the bot
String answer = bot.chat("user123", "How do I configure X?");// Step 1: Define tools
class BusinessTools {
@Tool("Get customer information by ID")
String getCustomer(String customerId) {
// Call customer service
return "Customer: " + customerId;
}
@Tool("Get order status by order ID")
String getOrderStatus(String orderId) {
// Call order service
return "Order status: Shipped";
}
@Tool("Create support ticket")
String createTicket(String customerId, String issue) {
// Create ticket in system
return "Ticket created: #12345";
}
}
// Step 2: Configure AI service with tools
interface SupportAgent {
String chat(@MemoryId String sessionId, String message);
}
SupportAgent agent = AiServices.builder(SupportAgent.class)
.chatModel(chatModel)
.chatMemoryProvider(id -> MessageWindowChatMemory.withMaxMessages(20))
.tools(new BusinessTools())
.systemMessage("You are a customer support agent. Use the available tools to help customers.")
.beforeToolExecution(ctx ->
log.info("Executing tool: " + ctx.toolExecutionRequest().name()))
.afterToolExecution(exec ->
log.info("Tool result: " + exec.result()))
.build();
// Step 3: Handle customer requests
String response = agent.chat("session456",
"What's the status of my order #67890?");
// Agent will automatically call getOrderStatus tool// Step 1: Define data structure
record Product(
String name,
String category,
double price,
List<String> features
) {}
// Step 2: Create extractor
interface ProductExtractor {
List<Product> extractProducts(String text);
}
ProductExtractor extractor = AiServices.create(ProductExtractor.class, chatModel);
// Step 3: Process documents in parallel
List<Document> documents = FileSystemDocumentLoader.loadDocuments(inputPath);
ExecutorService executor = Executors.newFixedThreadPool(4);
List<Future<List<Product>>> futures = documents.stream()
.map(doc -> executor.submit(() -> extractor.extractProducts(doc.text())))
.toList();
// Step 4: Collect results
List<Product> allProducts = new ArrayList<>();
for (Future<List<Product>> future : futures) {
try {
allProducts.addAll(future.get());
} catch (Exception e) {
log.error("Extraction failed", e);
}
}
executor.shutdown();
// Step 5: Save results
saveToDatabase(allProducts);// Reuse chat model instances - they typically maintain connection pools
ChatModel chatModel = OpenAiChatModel.builder()
.apiKey(System.getenv("OPENAI_API_KEY"))
.modelName("gpt-4")
.build();
// Don't create new instance per request
// BAD: new OpenAiChatModel(...) in hot path
// GOOD: Singleton or application-scoped instance// For long-running applications, periodically clean up memory
ChatMemoryProvider provider = new ChatMemoryProvider() {
private final Map<Object, ChatMemory> memories = new ConcurrentHashMap<>();
private final ScheduledExecutorService cleaner =
Executors.newSingleThreadScheduledExecutor();
{
// Clean up stale memories every hour
cleaner.scheduleAtFixedRate(() -> {
long cutoff = System.currentTimeMillis() - TimeUnit.HOURS.toMillis(1);
memories.entrySet().removeIf(entry ->
((TimestampedMemory) entry.getValue()).lastAccessTime() < cutoff
);
}, 1, 1, TimeUnit.HOURS);
}
@Override
public ChatMemory get(Object memoryId) {
return memories.computeIfAbsent(memoryId,
id -> new TimestampedMemory(MessageWindowChatMemory.withMaxMessages(20))
);
}
};// Retry with exponential backoff for transient failures
interface ResilientAssistant {
@SystemMessage("You are a helpful assistant")
String chat(String message);
}
ChatModel resilientModel = new ChatModel() {
private final ChatModel delegate = actualChatModel;
private final int maxRetries = 3;
@Override
public ChatResponse chat(ChatRequest request) {
int attempt = 0;
while (attempt < maxRetries) {
try {
return delegate.chat(request);
} catch (Exception e) {
attempt++;
if (attempt >= maxRetries) throw e;
long delay = (long) Math.pow(2, attempt) * 1000;
try {
Thread.sleep(delay);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw new RuntimeException(ie);
}
}
}
throw new IllegalStateException("Should not reach here");
}
// Implement other methods...
};Possible causes:
Solutions:
// Ensure model is configured
Assistant assistant = AiServices.builder(Assistant.class)
.chatModel(chatModel) // Must be set
.build();
// Check return type is supported (String, POJOs, enums, primitives, etc.)Possible causes:
Solutions:
// Make descriptions very specific
@Tool("Get the current weather forecast for a specific city. " +
"Returns temperature, conditions, and humidity.")
String getWeather(
@P("The city name, e.g., 'San Francisco' or 'New York'") String city
) {
// ...
}
// Check model capabilities
if (chatModel.supportedCapabilities().contains(Capability.TOOLS)) {
// Model supports tools
}Possible causes:
Solutions:
// Process documents in batches
int batchSize = 100;
for (int i = 0; i < documents.size(); i += batchSize) {
List<Document> batch = documents.subList(i,
Math.min(i + batchSize, documents.size()));
processDocuments(batch);
}
// Use persistent embedding store instead of in-memory
// Consider vector databases like Pinecone, Weaviate, etc.Possible causes:
Solutions:
// Tune retrieval parameters
ContentRetriever retriever = EmbeddingStoreContentRetriever.builder()
.embeddingStore(store)
.embeddingModel(embeddingModel)
.maxResults(3) // Start small
.minScore(0.75) // Higher threshold
.build();
// Use better chunking strategy
DocumentSplitter splitter = DocumentSplitters.recursive(
300, // Smaller chunks
50, // More overlap
tokenizer
);Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j