Build LLM-powered applications in Java with support for chatbots, agents, RAG, tools, and much more
Core model interfaces for interacting with language models. These interfaces provide the foundation for all LLM interactions in LangChain4j, supporting both synchronous and streaming chat, embeddings, and simple text generation.
Main interface for synchronous chat interactions with language models. Supports single-turn and multi-turn conversations with full message history.
package dev.langchain4j.model.chat;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.model.chat.listener.ChatModelListener;
import dev.langchain4j.model.chat.request.ChatRequest;
import dev.langchain4j.model.chat.request.ChatRequestParameters;
import dev.langchain4j.model.chat.response.ChatResponse;
import dev.langchain4j.model.ModelProvider;
import java.util.List;
import java.util.Set;
/**
* Represents a language model that has a chat API
*/
public interface ChatModel {
/**
* Main API to interact with the chat model
* @param chatRequest Request containing all inputs to the LLM
* @return Response containing all outputs from the LLM
*/
ChatResponse chat(ChatRequest chatRequest);
/**
* Convenience method for simple text-based chat
* @param userMessage User message text
* @return Response text from the LLM
*/
String chat(String userMessage);
/**
* Chat with variable number of messages
* @param messages Messages to send
* @return Response from the LLM
*/
ChatResponse chat(ChatMessage... messages);
/**
* Chat with list of messages
* @param messages List of messages to send
* @return Response from the LLM
*/
ChatResponse chat(List<ChatMessage> messages);
/**
* Get supported capabilities
* @return Set of capabilities this model supports
*/
Set<Capability> supportedCapabilities();
/**
* Get default request parameters
* @return Default parameters for requests
*/
ChatRequestParameters defaultRequestParameters();
/**
* Get registered listeners
* @return List of chat model listeners
*/
List<ChatModelListener> listeners();
/**
* Get model provider
* @return Provider of this model
*/
ModelProvider provider();
}Thread Safety:
Common Pitfalls:
chat(String) method if you need structured responses or metadatasupportedCapabilities() before using advanced featuresEdge Cases:
Performance Notes:
Cost Considerations:
chat() call consumes tokens (input + output)ChatResponse.tokenUsage()estimateTokenCount() (if model implements TokenCountEstimator) before expensive callsException Handling:
RuntimeException or subclasses for network errors, API errors, rate limitingIOException wrapped in runtime exception for network failuresIllegalArgumentException for invalid inputs (null messages, empty text)ChatResponse.finishReason() - may indicate truncation or content filteringUsage Example (Basic):
import dev.langchain4j.model.chat.ChatModel;
import dev.langchain4j.model.chat.request.ChatRequest;
import dev.langchain4j.model.chat.response.ChatResponse;
import dev.langchain4j.data.message.UserMessage;
import dev.langchain4j.data.message.SystemMessage;
// Simple text chat
String response = chatModel.chat("Hello, how are you?");
// Structured chat with request
ChatRequest request = ChatRequest.builder()
.messages(
SystemMessage.from("You are a helpful assistant."),
UserMessage.from("What is the capital of France?")
)
.build();
ChatResponse chatResponse = chatModel.chat(request);
String answer = chatResponse.aiMessage().text();Usage Example (Production-Ready with Error Handling):
import dev.langchain4j.model.chat.ChatModel;
import dev.langchain4j.model.chat.request.ChatRequest;
import dev.langchain4j.model.chat.response.ChatResponse;
import dev.langchain4j.data.message.UserMessage;
import dev.langchain4j.data.message.SystemMessage;
import dev.langchain4j.model.output.FinishReason;
import java.util.concurrent.TimeUnit;
public class SafeChatExample {
private final ChatModel chatModel;
public String chatWithRetry(String userMessage, int maxRetries) {
for (int attempt = 0; attempt < maxRetries; attempt++) {
try {
ChatRequest request = ChatRequest.builder()
.messages(
SystemMessage.from("You are a helpful assistant."),
UserMessage.from(userMessage)
)
.build();
ChatResponse response = chatModel.chat(request);
// Check finish reason
if (response.finishReason() == FinishReason.STOP) {
// Log token usage for cost tracking
System.out.println("Tokens used: " + response.tokenUsage());
return response.aiMessage().text();
} else if (response.finishReason() == FinishReason.LENGTH) {
throw new RuntimeException("Response truncated due to length");
} else if (response.finishReason() == FinishReason.CONTENT_FILTER) {
throw new RuntimeException("Content filtered by moderation");
}
} catch (IllegalArgumentException e) {
// Invalid input - don't retry
throw e;
} catch (RuntimeException e) {
if (attempt == maxRetries - 1) {
throw new RuntimeException("Failed after " + maxRetries + " attempts", e);
}
// Exponential backoff for rate limiting
try {
TimeUnit.SECONDS.sleep((long) Math.pow(2, attempt));
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw new RuntimeException("Interrupted during retry", ie);
}
}
}
throw new RuntimeException("Should not reach here");
}
}Capability Checking Pattern:
import dev.langchain4j.model.chat.Capability;
// Check if model supports JSON schema responses before using
if (chatModel.supportedCapabilities().contains(Capability.RESPONSE_FORMAT_JSON_SCHEMA)) {
// Use structured output
ChatRequestParameters params = ChatRequestParameters.builder()
.responseFormat(ResponseFormat.JSON)
.build();
} else {
// Fallback to parsing text responses
System.out.println("Model doesn't support JSON schema, using text parsing");
}Related APIs:
StreamingChatModel - For streaming responsesChatMessage - Message types (System, User, AI, Tool)ChatRequest - Request builder with parametersChatResponse - Response with metadataChatModelListener - For monitoring and loggingTokenCountEstimator - For estimating costs before callsInterface for streaming chat interactions where responses are delivered token-by-token in real-time.
package dev.langchain4j.model.chat;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.model.chat.listener.ChatModelListener;
import dev.langchain4j.model.chat.request.ChatRequest;
import dev.langchain4j.model.chat.request.ChatRequestParameters;
import dev.langchain4j.model.chat.response.StreamingChatResponseHandler;
import dev.langchain4j.model.ModelProvider;
import java.util.List;
import java.util.Set;
/**
* Represents a language model that can stream responses token-by-token
*/
public interface StreamingChatModel {
/**
* Main API to interact with the streaming chat model
* @param chatRequest Request containing all inputs to the LLM
* @param handler Handler for streaming response
*/
void chat(ChatRequest chatRequest, StreamingChatResponseHandler handler);
/**
* Convenience method for simple text-based streaming chat
* @param userMessage User message text
* @param handler Handler for streaming response
*/
void chat(String userMessage, StreamingChatResponseHandler handler);
/**
* Stream chat with list of messages
* @param messages List of messages to send
* @param handler Handler for streaming response
*/
void chat(List<ChatMessage> messages, StreamingChatResponseHandler handler);
/**
* Get supported capabilities
* @return Set of capabilities this model supports
*/
Set<Capability> supportedCapabilities();
/**
* Get default request parameters
* @return Default parameters for requests
*/
ChatRequestParameters defaultRequestParameters();
/**
* Get registered listeners
* @return List of chat model listeners
*/
List<ChatModelListener> listeners();
/**
* Get model provider
* @return Provider of this model
*/
ModelProvider provider();
}Thread Safety:
Common Pitfalls:
onPartialResponse() - it delays subsequent tokensonError() - silent failures are hard to debugonCompleteResponse() is called AFTER all partial responsesEdge Cases:
onPartialResponse() may be called only once if response is very shortonCompleteResponse() without any onPartialResponse() callsonError() mid-streamPerformance Notes:
onCompleteResponse() to get complete textCost Considerations:
ChatResponse in onCompleteResponse()Exception Handling:
onError() is called for all errors during streamingonError() - never leave it emptyUsage Example (Basic):
import dev.langchain4j.model.chat.StreamingChatModel;
import dev.langchain4j.model.chat.response.StreamingChatResponseHandler;
import dev.langchain4j.model.chat.response.ChatResponse;
StreamingChatResponseHandler handler = new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String partialResponse) {
System.out.print(partialResponse); // Print each token as it arrives
}
@Override
public void onCompleteResponse(ChatResponse completeResponse) {
System.out.println("\nComplete!");
}
@Override
public void onError(Throwable error) {
error.printStackTrace();
}
};
streamingChatModel.chat("Tell me a story", handler);Usage Example (Production-Ready with CompletableFuture):
import dev.langchain4j.model.chat.StreamingChatModel;
import dev.langchain4j.model.chat.response.StreamingChatResponseHandler;
import dev.langchain4j.model.chat.response.ChatResponse;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeUnit;
public class StreamingChatHelper {
public CompletableFuture<String> chatAsync(
StreamingChatModel model,
String message,
int timeoutSeconds) {
CompletableFuture<String> future = new CompletableFuture<>();
StreamingChatResponseHandler handler = new StreamingChatResponseHandler() {
private final StringBuilder accumulated = new StringBuilder();
@Override
public void onPartialResponse(String token) {
synchronized (accumulated) {
accumulated.append(token);
// Optional: Publish progress
System.out.print(token);
}
}
@Override
public void onCompleteResponse(ChatResponse response) {
// Validate finish reason
if (response.finishReason() == FinishReason.STOP) {
future.complete(accumulated.toString());
} else {
future.completeExceptionally(
new RuntimeException("Unexpected finish reason: " +
response.finishReason())
);
}
}
@Override
public void onError(Throwable error) {
future.completeExceptionally(error);
}
};
try {
model.chat(message, handler);
} catch (Exception e) {
future.completeExceptionally(e);
}
// Apply timeout
return future.orTimeout(timeoutSeconds, TimeUnit.SECONDS);
}
}Usage Example (Rate Limiting Handler):
import java.util.concurrent.atomic.AtomicInteger;
public class RateLimitingHandler implements StreamingChatResponseHandler {
private final int maxTokens;
private final AtomicInteger tokenCount = new AtomicInteger(0);
private final StringBuilder result = new StringBuilder();
private volatile boolean stopped = false;
public RateLimitingHandler(int maxTokens) {
this.maxTokens = maxTokens;
}
@Override
public void onPartialResponse(String token) {
if (stopped) return;
if (tokenCount.incrementAndGet() > maxTokens) {
stopped = true;
System.err.println("Token limit exceeded, stopping stream");
return;
}
result.append(token);
System.out.print(token);
}
@Override
public void onCompleteResponse(ChatResponse response) {
if (!stopped) {
System.out.println("\nCompleted: " + response.tokenUsage());
}
}
@Override
public void onError(Throwable error) {
System.err.println("Error during streaming: " + error.getMessage());
}
public String getResult() {
return result.toString();
}
}Related APIs:
ChatModel - Non-streaming equivalentStreamingChatResponseHandler - Handler interfaceChatRequest - Request configurationChatResponse - Final response with metadataCompletableFuture - For async patternsInterface for converting text into vector embeddings for semantic search and similarity comparisons.
package dev.langchain4j.model.embedding;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.listener.EmbeddingModelListener;
import dev.langchain4j.model.output.Response;
import java.util.List;
/**
* Represents a model that can convert text into embeddings (vector representations)
*/
public interface EmbeddingModel {
/**
* Embed a single text string
* @param text Text to embed
* @return Response containing the embedding
*/
Response<Embedding> embed(String text);
/**
* Embed the text content of a TextSegment
* @param textSegment Text segment to embed
* @return Response containing the embedding
*/
Response<Embedding> embed(TextSegment textSegment);
/**
* Embed multiple text segments in a batch
* @param textSegments Text segments to embed
* @return Response containing list of embeddings
*/
Response<List<Embedding>> embedAll(List<TextSegment> textSegments);
/**
* Get the dimension of embeddings produced by this model
* @return Embedding dimension
*/
int dimension();
/**
* Get the name of the underlying embedding model
* @return Model name or "unknown" if not provided
*/
String modelName();
/**
* Add a listener for embedding operations
* @param listener Listener to add
* @return EmbeddingModel with listener attached
*/
EmbeddingModel addListener(EmbeddingModelListener listener);
/**
* Add multiple listeners for embedding operations
* @param listeners Listeners to add
* @return EmbeddingModel with listeners attached
*/
EmbeddingModel addListeners(List<EmbeddingModelListener> listeners);
}Thread Safety:
embedAll() for batch processing - more efficient than parallel embed() callsCommon Pitfalls:
embedAll() (much faster)Edge Cases:
Performance Notes:
embedAll() is 5-10x faster than multiple embed() calls for batchesCost Considerations:
embedAll()) cost the same as individual but are fasterResponse.tokenUsage() for actual token countsException Handling:
IllegalArgumentException for null or invalid inputUsage Example (Basic):
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.output.Response;
// Embed a single text
Response<Embedding> response = embeddingModel.embed("Hello world");
Embedding embedding = response.content();
float[] vector = embedding.vector();
int dimension = embedding.dimension();
// Embed multiple texts
List<TextSegment> segments = List.of(
TextSegment.from("First text"),
TextSegment.from("Second text")
);
Response<List<Embedding>> multiResponse = embeddingModel.embedAll(segments);
List<Embedding> embeddings = multiResponse.content();Usage Example (Production-Ready with Caching):
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
public class CachedEmbeddingService {
private final EmbeddingModel model;
private final Map<String, Embedding> cache = new ConcurrentHashMap<>();
private final int maxCacheSize;
public CachedEmbeddingService(EmbeddingModel model, int maxCacheSize) {
this.model = model;
this.maxCacheSize = maxCacheSize;
}
public List<Embedding> embedWithCache(List<String> texts) {
// Separate cached and uncached
List<String> uncached = new ArrayList<>();
List<Embedding> results = new ArrayList<>(texts.size());
for (String text : texts) {
Embedding cached = cache.get(text);
if (cached != null) {
results.add(cached);
} else {
uncached.add(text);
results.add(null); // Placeholder
}
}
// Batch embed uncached texts
if (!uncached.isEmpty()) {
try {
List<TextSegment> segments = uncached.stream()
.map(TextSegment::from)
.collect(Collectors.toList());
Response<List<Embedding>> response = model.embedAll(segments);
List<Embedding> newEmbeddings = response.content();
// Cache new embeddings
for (int i = 0; i < uncached.size(); i++) {
Embedding embedding = newEmbeddings.get(i);
cache.put(uncached.get(i), embedding);
// Evict if cache too large (simple LRU would be better)
if (cache.size() > maxCacheSize) {
String firstKey = cache.keySet().iterator().next();
cache.remove(firstKey);
}
}
// Fill in results
int uncachedIdx = 0;
for (int i = 0; i < results.size(); i++) {
if (results.get(i) == null) {
results.set(i, newEmbeddings.get(uncachedIdx++));
}
}
} catch (RuntimeException e) {
throw new RuntimeException("Failed to embed texts: " + e.getMessage(), e);
}
}
return results;
}
public void clearCache() {
cache.clear();
}
public int getCacheSize() {
return cache.size();
}
}Usage Example (Similarity Search):
import dev.langchain4j.data.embedding.Embedding;
public class SimilarityHelper {
// Cosine similarity (assumes vectors are normalized)
public static double cosineSimilarity(Embedding e1, Embedding e2) {
float[] v1 = e1.vector();
float[] v2 = e2.vector();
if (v1.length != v2.length) {
throw new IllegalArgumentException("Embedding dimensions don't match");
}
double dotProduct = 0.0;
double norm1 = 0.0;
double norm2 = 0.0;
for (int i = 0; i < v1.length; i++) {
dotProduct += v1[i] * v2[i];
norm1 += v1[i] * v1[i];
norm2 += v2[i] * v2[i];
}
return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
}
public static List<String> findMostSimilar(
String query,
List<String> documents,
EmbeddingModel model,
int topK) {
// Embed all at once for efficiency
List<String> allTexts = new ArrayList<>();
allTexts.add(query);
allTexts.addAll(documents);
List<TextSegment> segments = allTexts.stream()
.map(TextSegment::from)
.collect(Collectors.toList());
List<Embedding> embeddings = model.embedAll(segments).content();
Embedding queryEmbedding = embeddings.get(0);
// Calculate similarities
List<Map.Entry<String, Double>> similarities = new ArrayList<>();
for (int i = 0; i < documents.size(); i++) {
double similarity = cosineSimilarity(queryEmbedding, embeddings.get(i + 1));
similarities.add(Map.entry(documents.get(i), similarity));
}
// Sort and return top K
return similarities.stream()
.sorted(Map.Entry.<String, Double>comparingByValue().reversed())
.limit(topK)
.map(Map.Entry::getKey)
.collect(Collectors.toList());
}
}Related APIs:
Embedding - Vector representationTextSegment - Structured text with metadataEmbeddingStore - For persisting embeddingsEmbeddingModelListener - For monitoringSimple text generation interface without chat message structure. Recommended to use ChatModel instead for more features.
package dev.langchain4j.model.language;
import dev.langchain4j.model.input.Prompt;
import dev.langchain4j.model.output.Response;
/**
* Represents a language model with a simple text interface
* ChatModel is recommended for most use cases
*/
public interface LanguageModel {
/**
* Generate a response to the given prompt
* @param prompt Prompt text
* @return Response containing generated text
*/
Response<String> generate(String prompt);
/**
* Generate a response to the given prompt
* @param prompt Prompt object
* @return Response containing generated text
*/
Response<String> generate(Prompt prompt);
}Thread Safety:
Common Pitfalls:
Edge Cases:
Performance Notes:
Cost Considerations:
Exception Handling:
Related APIs:
ChatModel - Recommended alternativeStreamingLanguageModel - Streaming versionPrompt - Prompt template supportEnum representing capabilities that models can support.
package dev.langchain4j.model.chat;
/**
* Represents a capability of a ChatModel or StreamingChatModel
* Used by low-level APIs to communicate supported features to high-level APIs
*/
public enum Capability {
/**
* Indicates model supports responding in JSON format according to a specified JSON schema
*/
RESPONSE_FORMAT_JSON_SCHEMA
}Thread Safety:
Common Pitfalls:
supportedCapabilities() before using advanced featuresUsage Example:
import dev.langchain4j.model.chat.Capability;
public class CapabilityChecker {
public static boolean supportsJsonSchema(ChatModel model) {
return model.supportedCapabilities()
.contains(Capability.RESPONSE_FORMAT_JSON_SCHEMA);
}
public static void useJsonSchemaIfSupported(ChatModel model) {
if (supportsJsonSchema(model)) {
// Use structured output
System.out.println("Using JSON schema mode");
} else {
// Fallback to text parsing
System.out.println("Falling back to text parsing");
}
}
}Related APIs:
ChatModel.supportedCapabilities() - Check model capabilitiesChatRequestParameters - Configure request based on capabilitiesSimple streaming text generation interface. Recommended to use StreamingChatModel instead for more features.
package dev.langchain4j.model.language;
import dev.langchain4j.model.StreamingResponseHandler;
import dev.langchain4j.model.input.Prompt;
/**
* Represents a language model with streaming text generation
* StreamingChatModel is recommended for most use cases
*/
public interface StreamingLanguageModel {
/**
* Stream a response to the given prompt
* @param prompt Prompt text
* @param handler Handler for streaming response
*/
void generate(String prompt, StreamingResponseHandler<String> handler);
/**
* Stream a response to the given prompt
* @param prompt Prompt object
* @param handler Handler for streaming response
*/
void generate(Prompt prompt, StreamingResponseHandler<String> handler);
}Thread Safety:
Common Pitfalls:
Performance Notes:
Related APIs:
StreamingChatModel - Recommended alternativeStreamingResponseHandler - Handler interfaceInterface for content moderation to detect harmful, unsafe, or policy-violating content.
package dev.langchain4j.model.moderation;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.input.Prompt;
import dev.langchain4j.model.output.Response;
import java.util.List;
/**
* Represents a model that can moderate text content
* Used for detecting harmful, unsafe, or policy-violating content
*/
public interface ModerationModel {
/**
* Moderate text
* @param text Text to moderate
* @return Moderation response
*/
Response<Moderation> moderate(String text);
/**
* Moderate prompt
* @param prompt Prompt to moderate
* @return Moderation response
*/
Response<Moderation> moderate(Prompt prompt);
/**
* Moderate single chat message
* @param message Chat message to moderate
* @return Moderation response
*/
Response<Moderation> moderate(ChatMessage message);
/**
* Moderate list of chat messages
* @param messages Chat messages to moderate
* @return Moderation response
*/
Response<Moderation> moderate(List<ChatMessage> messages);
/**
* Moderate text segment
* @param textSegment Text segment to moderate
* @return Moderation response
*/
Response<Moderation> moderate(TextSegment textSegment);
}Thread Safety:
Common Pitfalls:
Edge Cases:
Performance Notes:
Cost Considerations:
Exception Handling:
Usage Example (Basic):
import dev.langchain4j.model.moderation.ModerationModel;
import dev.langchain4j.model.moderation.Moderation;
import dev.langchain4j.model.output.Response;
Response<Moderation> response = moderationModel.moderate("Some text to check");
Moderation moderation = response.content();
if (moderation.flagged()) {
System.out.println("Content flagged: " + moderation.flaggedText());
}Usage Example (Production-Ready with Detailed Checks):
import dev.langchain4j.model.moderation.ModerationModel;
import dev.langchain4j.model.moderation.Moderation;
import dev.langchain4j.data.message.UserMessage;
import java.util.logging.Logger;
public class ContentModerator {
private static final Logger log = Logger.getLogger(ContentModerator.class.getName());
private final ModerationModel moderationModel;
public ModerationResult moderateUserInput(String userInput) {
try {
Response<Moderation> response = moderationModel.moderate(userInput);
Moderation moderation = response.content();
if (moderation.flagged()) {
// Log for compliance and review
log.warning("Content flagged: categories=" +
moderation.flaggedCategories() +
", user input length=" + userInput.length());
return ModerationResult.blocked(
"Your message contains content that violates our policies: " +
moderation.flaggedCategories()
);
}
return ModerationResult.allowed();
} catch (RuntimeException e) {
// Fail open: allow content but log error
log.severe("Moderation service error: " + e.getMessage());
return ModerationResult.allowed(); // Or fail closed with .blocked()
}
}
public static class ModerationResult {
private final boolean allowed;
private final String reason;
private ModerationResult(boolean allowed, String reason) {
this.allowed = allowed;
this.reason = reason;
}
public static ModerationResult allowed() {
return new ModerationResult(true, null);
}
public static ModerationResult blocked(String reason) {
return new ModerationResult(false, reason);
}
public boolean isAllowed() { return allowed; }
public String getReason() { return reason; }
}
}Usage Example (Async Moderation for Non-Blocking):
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
public class AsyncModerator {
private final ModerationModel moderationModel;
private final ExecutorService executor = Executors.newFixedThreadPool(5);
public CompletableFuture<Boolean> moderateAsync(String content) {
return CompletableFuture.supplyAsync(() -> {
try {
Response<Moderation> response = moderationModel.moderate(content);
return !response.content().flagged();
} catch (Exception e) {
// Log and fail open
return true;
}
}, executor);
}
public void shutdown() {
executor.shutdown();
}
}Related APIs:
Moderation - Moderation result with categoriesChatMessage - For moderating messagesResponse - Wrapper with metadataInterface for scoring/reranking text segments against a query. Useful for re-ranking retrieved documents.
package dev.langchain4j.model.scoring;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.output.Response;
import java.util.List;
/**
* Represents a model capable of scoring text against a query
* Useful for re-ranking retrieved documents by relevance
*/
public interface ScoringModel {
/**
* Score a single text against query
* @param text Text to score
* @param query Query to score against
* @return Response containing relevance score
*/
Response<Double> score(String text, String query);
/**
* Score a single segment against query
* @param segment Text segment to score
* @param query Query to score against
* @return Response containing relevance score
*/
Response<Double> score(TextSegment segment, String query);
/**
* Score multiple segments against query
* @param segments Text segments to score
* @param query Query to score against
* @return Response containing list of scores (same order as input)
*/
Response<List<Double>> scoreAll(List<TextSegment> segments, String query);
}Thread Safety:
Common Pitfalls:
Edge Cases:
Performance Notes:
scoreAll() is more efficientCost Considerations:
Exception Handling:
Usage Example (Basic):
import dev.langchain4j.model.scoring.ScoringModel;
import dev.langchain4j.data.segment.TextSegment;
import java.util.List;
// Score multiple documents for re-ranking
List<TextSegment> documents = List.of(
TextSegment.from("Document about Java programming"),
TextSegment.from("Document about Python"),
TextSegment.from("Document about web development")
);
String query = "How to program in Java?";
Response<List<Double>> scores = scoringModel.scoreAll(documents, query);
// Use scores to re-rank documentsUsage Example (Production-Ready Reranking Pipeline):
import dev.langchain4j.model.scoring.ScoringModel;
import dev.langchain4j.data.segment.TextSegment;
import java.util.*;
import java.util.stream.Collectors;
public class RerankerService {
private final ScoringModel scoringModel;
private final int maxRerank;
public RerankerService(ScoringModel scoringModel, int maxRerank) {
this.scoringModel = scoringModel;
this.maxRerank = maxRerank;
}
public List<ScoredDocument> rerank(String query, List<String> candidates) {
// Limit candidates to maxRerank for performance
List<String> toRerank = candidates.stream()
.limit(maxRerank)
.collect(Collectors.toList());
try {
List<TextSegment> segments = toRerank.stream()
.map(TextSegment::from)
.collect(Collectors.toList());
Response<List<Double>> response = scoringModel.scoreAll(segments, query);
List<Double> scores = response.content();
// Combine documents with scores
List<ScoredDocument> scored = new ArrayList<>();
for (int i = 0; i < toRerank.size(); i++) {
scored.add(new ScoredDocument(toRerank.get(i), scores.get(i)));
}
// Sort by score descending
scored.sort(Comparator.comparingDouble(ScoredDocument::getScore).reversed());
return scored;
} catch (RuntimeException e) {
// Fallback: return original order
return toRerank.stream()
.map(doc -> new ScoredDocument(doc, 0.0))
.collect(Collectors.toList());
}
}
public static class ScoredDocument {
private final String document;
private final double score;
public ScoredDocument(String document, double score) {
this.document = document;
this.score = score;
}
public String getDocument() { return document; }
public double getScore() { return score; }
}
}Usage Example (Two-Stage Retrieval with Reranking):
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.scoring.ScoringModel;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import java.util.List;
import java.util.stream.Collectors;
public class TwoStageRetriever {
private final EmbeddingModel embeddingModel;
private final ScoringModel scoringModel;
private final List<String> documentCorpus;
private final List<Embedding> documentEmbeddings;
public TwoStageRetriever(
EmbeddingModel embeddingModel,
ScoringModel scoringModel,
List<String> documentCorpus) {
this.embeddingModel = embeddingModel;
this.scoringModel = scoringModel;
this.documentCorpus = documentCorpus;
// Pre-compute embeddings
List<TextSegment> segments = documentCorpus.stream()
.map(TextSegment::from)
.collect(Collectors.toList());
this.documentEmbeddings = embeddingModel.embedAll(segments).content();
}
public List<String> retrieve(String query, int topK) {
// Stage 1: Fast embedding-based retrieval
Embedding queryEmbedding = embeddingModel.embed(query).content();
List<ScoredDoc> candidates = new ArrayList<>();
for (int i = 0; i < documentCorpus.size(); i++) {
double similarity = cosineSimilarity(
queryEmbedding,
documentEmbeddings.get(i)
);
candidates.add(new ScoredDoc(i, documentCorpus.get(i), similarity));
}
// Get top 50 by embedding similarity
List<String> topCandidates = candidates.stream()
.sorted(Comparator.comparingDouble(ScoredDoc::getScore).reversed())
.limit(50)
.map(ScoredDoc::getDoc)
.collect(Collectors.toList());
// Stage 2: Accurate reranking with scoring model
List<TextSegment> toRerank = topCandidates.stream()
.map(TextSegment::from)
.collect(Collectors.toList());
List<Double> scores = scoringModel.scoreAll(toRerank, query).content();
List<ScoredDoc> reranked = new ArrayList<>();
for (int i = 0; i < topCandidates.size(); i++) {
reranked.add(new ScoredDoc(i, topCandidates.get(i), scores.get(i)));
}
return reranked.stream()
.sorted(Comparator.comparingDouble(ScoredDoc::getScore).reversed())
.limit(topK)
.map(ScoredDoc::getDoc)
.collect(Collectors.toList());
}
private static class ScoredDoc {
int index;
String doc;
double score;
ScoredDoc(int index, String doc, double score) {
this.index = index;
this.doc = doc;
this.score = score;
}
String getDoc() { return doc; }
double getScore() { return score; }
}
private double cosineSimilarity(Embedding e1, Embedding e2) {
float[] v1 = e1.vector();
float[] v2 = e2.vector();
double dot = 0.0, norm1 = 0.0, norm2 = 0.0;
for (int i = 0; i < v1.length; i++) {
dot += v1[i] * v2[i];
norm1 += v1[i] * v1[i];
norm2 += v2[i] * v2[i];
}
return dot / (Math.sqrt(norm1) * Math.sqrt(norm2));
}
}Related APIs:
EmbeddingModel - For first-stage retrievalTextSegment - Document representationResponse - Score wrapperInterface for generating images from text prompts.
package dev.langchain4j.model.image;
import dev.langchain4j.data.image.Image;
import dev.langchain4j.model.output.Response;
/**
* Represents a model that can generate images from text prompts
*/
public interface ImageModel {
/**
* Generate image from prompt
* @param prompt Text prompt describing desired image
* @return Response containing generated image
*/
Response<Image> generate(String prompt);
}Thread Safety:
Common Pitfalls:
Edge Cases:
Performance Notes:
Cost Considerations:
Exception Handling:
Usage Example (Basic):
import dev.langchain4j.model.image.ImageModel;
import dev.langchain4j.data.image.Image;
import dev.langchain4j.model.output.Response;
Response<Image> response = imageModel.generate(
"A serene landscape with mountains and a lake"
);
Image image = response.content();
String url = image.url();Usage Example (Production-Ready with Async):
import dev.langchain4j.model.image.ImageModel;
import dev.langchain4j.data.image.Image;
import java.util.concurrent.*;
public class AsyncImageGenerator {
private final ImageModel imageModel;
private final ExecutorService executor = Executors.newFixedThreadPool(3);
public CompletableFuture<Image> generateAsync(String prompt) {
return CompletableFuture.supplyAsync(() -> {
try {
Response<Image> response = imageModel.generate(prompt);
return response.content();
} catch (RuntimeException e) {
throw new CompletionException(
"Image generation failed: " + e.getMessage(), e
);
}
}, executor).orTimeout(60, TimeUnit.SECONDS);
}
public void shutdown() {
executor.shutdown();
}
}Related APIs:
Image - Generated image dataResponse - Wrapper with metadataInterface for estimating token counts without making API calls to the model.
package dev.langchain4j.model;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.agent.tool.ToolSpecification;
import java.util.List;
/**
* Interface for estimating token counts
* Useful for staying within model token limits
*/
public interface TokenCountEstimator {
/**
* Estimate tokens in text
* @param text Text to estimate
* @return Estimated token count
*/
int estimateTokenCount(String text);
/**
* Estimate tokens in messages
* @param messages Chat messages to estimate
* @return Estimated token count
*/
int estimateTokenCount(List<ChatMessage> messages);
/**
* Estimate tokens in messages and tools
* @param messages Chat messages to estimate
* @param toolSpecifications Tool specifications
* @return Estimated token count
*/
int estimateTokenCount(List<ChatMessage> messages,
List<ToolSpecification> toolSpecifications);
}Thread Safety:
Common Pitfalls:
Edge Cases:
Performance Notes:
Cost Considerations:
Exception Handling:
Usage Example (Basic):
import dev.langchain4j.model.TokenCountEstimator;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.data.message.UserMessage;
import java.util.List;
TokenCountEstimator estimator = (TokenCountEstimator) chatModel;
int textTokens = estimator.estimateTokenCount("Hello, world!");
List<ChatMessage> messages = List.of(
UserMessage.from("What is AI?")
);
int messageTokens = estimator.estimateTokenCount(messages);Usage Example (Token Budget Management):
import dev.langchain4j.model.TokenCountEstimator;
import dev.langchain4j.model.chat.ChatModel;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.data.message.UserMessage;
import dev.langchain4j.data.message.SystemMessage;
import java.util.ArrayList;
import java.util.List;
public class TokenBudgetManager {
private final TokenCountEstimator estimator;
private final int maxContextTokens;
private final int reservedForResponse;
public TokenBudgetManager(
TokenCountEstimator estimator,
int maxContextTokens,
int reservedForResponse) {
this.estimator = estimator;
this.maxContextTokens = maxContextTokens;
this.reservedForResponse = reservedForResponse;
}
public List<ChatMessage> fitToContext(
SystemMessage systemMessage,
List<ChatMessage> history,
UserMessage currentMessage) {
List<ChatMessage> result = new ArrayList<>();
result.add(systemMessage);
int availableTokens = maxContextTokens - reservedForResponse;
int systemTokens = estimator.estimateTokenCount(
List.of(systemMessage)
);
int currentTokens = estimator.estimateTokenCount(
List.of(currentMessage)
);
availableTokens -= (systemTokens + currentTokens);
if (availableTokens < 0) {
throw new IllegalArgumentException(
"System message and current message exceed token budget"
);
}
// Add history from most recent, working backwards
for (int i = history.size() - 1; i >= 0; i--) {
ChatMessage message = history.get(i);
int messageTokens = estimator.estimateTokenCount(List.of(message));
if (messageTokens <= availableTokens) {
result.add(1, message); // Insert after system message
availableTokens -= messageTokens;
} else {
break; // No more room
}
}
result.add(currentMessage);
return result;
}
public int estimateRemainingBudget(List<ChatMessage> messages) {
int used = estimator.estimateTokenCount(messages);
return maxContextTokens - used - reservedForResponse;
}
}Usage Example (Pre-validation):
public class TokenValidator {
private final TokenCountEstimator estimator;
private final int maxInputTokens;
public void validateBeforeCall(List<ChatMessage> messages) {
int estimatedTokens = estimator.estimateTokenCount(messages);
if (estimatedTokens > maxInputTokens) {
throw new IllegalArgumentException(
String.format(
"Message too long: %d tokens (max %d)",
estimatedTokens,
maxInputTokens
)
);
}
}
}Related APIs:
ChatModel - Often implements TokenCountEstimatorChatMessage - Messages to estimateToolSpecification - Tools add token overheadEnum identifying the provider of a model.
package dev.langchain4j.model;
/**
* Identifies the provider of a model
*/
public enum ModelProvider {
OPENAI,
ANTHROPIC,
GOOGLE,
AZURE,
OLLAMA,
// ... and other providers
OTHER
}Thread Safety:
Usage Example:
import dev.langchain4j.model.ModelProvider;
import dev.langchain4j.model.chat.ChatModel;
public class ProviderSpecificLogic {
public int getDefaultTimeout(ChatModel model) {
ModelProvider provider = model.provider();
switch (provider) {
case OPENAI:
return 30;
case ANTHROPIC:
return 60;
case OLLAMA:
return 120; // Local, may be slower
default:
return 45;
}
}
public boolean supportsStreaming(ChatModel model) {
// Check if model is also a StreamingChatModel
return model instanceof StreamingChatModel;
}
}Related APIs:
ChatModel.provider() - Get model providerimport dev.langchain4j.model.chat.ChatModel;
import dev.langchain4j.model.chat.response.ChatResponse;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.model.output.FinishReason;
import dev.langchain4j.model.output.TokenUsage;
import org.mockito.Mockito;
public class ChatModelTestHelper {
public static ChatModel createMockChatModel(String response) {
ChatModel mock = Mockito.mock(ChatModel.class);
ChatResponse chatResponse = ChatResponse.builder()
.aiMessage(AiMessage.from(response))
.finishReason(FinishReason.STOP)
.tokenUsage(new TokenUsage(10, 20))
.build();
Mockito.when(mock.chat(Mockito.anyString()))
.thenReturn(response);
Mockito.when(mock.chat(Mockito.any(ChatRequest.class)))
.thenReturn(chatResponse);
return mock;
}
public static ChatModel createErrorMock() {
ChatModel mock = Mockito.mock(ChatModel.class);
Mockito.when(mock.chat(Mockito.anyString()))
.thenThrow(new RuntimeException("API Error"));
return mock;
}
}import dev.langchain4j.model.chat.ChatModel;
import dev.langchain4j.model.openai.OpenAiChatModel;
public class TestableService {
private final ChatModel chatModel;
public TestableService(ChatModel chatModel) {
this.chatModel = chatModel;
}
// Business logic here
// In tests:
public static void testWithMock() {
ChatModel testModel = ChatModelTestHelper.createMockChatModel(
"Test response"
);
TestableService service = new TestableService(testModel);
// Test service...
}
}import dev.langchain4j.model.chat.StreamingChatModel;
import dev.langchain4j.model.chat.response.StreamingChatResponseHandler;
import java.util.concurrent.CompletableFuture;
public class StreamingTestHelper {
public static CompletableFuture<String> captureStreamingResponse(
StreamingChatModel model,
String message) {
CompletableFuture<String> future = new CompletableFuture<>();
StringBuilder captured = new StringBuilder();
StreamingChatResponseHandler handler = new StreamingChatResponseHandler() {
@Override
public void onPartialResponse(String token) {
captured.append(token);
}
@Override
public void onCompleteResponse(ChatResponse response) {
future.complete(captured.toString());
}
@Override
public void onError(Throwable error) {
future.completeExceptionally(error);
}
};
model.chat(message, handler);
return future;
}
}import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.model.output.Response;
import org.mockito.Mockito;
public class EmbeddingModelTestHelper {
public static EmbeddingModel createMockEmbedding(int dimension) {
EmbeddingModel mock = Mockito.mock(EmbeddingModel.class);
Mockito.when(mock.dimension()).thenReturn(dimension);
Mockito.when(mock.embed(Mockito.anyString()))
.thenAnswer(invocation -> {
float[] vector = new float[dimension];
for (int i = 0; i < dimension; i++) {
vector[i] = (float) Math.random();
}
return Response.from(new Embedding(vector));
});
return mock;
}
}import java.util.concurrent.TimeUnit;
public class RetryHelper {
public static <T> T retryWithBackoff(
Supplier<T> operation,
int maxRetries,
long initialDelayMs) {
Exception lastException = null;
for (int attempt = 0; attempt < maxRetries; attempt++) {
try {
return operation.get();
} catch (Exception e) {
lastException = e;
if (attempt < maxRetries - 1) {
long delay = initialDelayMs * (long) Math.pow(2, attempt);
try {
TimeUnit.MILLISECONDS.sleep(delay);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw new RuntimeException("Interrupted during retry", ie);
}
}
}
}
throw new RuntimeException(
"Operation failed after " + maxRetries + " attempts",
lastException
);
}
}import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
public class CircuitBreaker {
private final int failureThreshold;
private final long timeoutMs;
private final AtomicInteger failureCount = new AtomicInteger(0);
private final AtomicLong lastFailureTime = new AtomicLong(0);
private volatile boolean open = false;
public CircuitBreaker(int failureThreshold, long timeoutMs) {
this.failureThreshold = failureThreshold;
this.timeoutMs = timeoutMs;
}
public <T> T execute(Supplier<T> operation) {
if (open) {
long elapsed = System.currentTimeMillis() - lastFailureTime.get();
if (elapsed < timeoutMs) {
throw new RuntimeException("Circuit breaker is OPEN");
} else {
// Try half-open state
open = false;
failureCount.set(0);
}
}
try {
T result = operation.get();
failureCount.set(0); // Reset on success
return result;
} catch (Exception e) {
failureCount.incrementAndGet();
lastFailureTime.set(System.currentTimeMillis());
if (failureCount.get() >= failureThreshold) {
open = true;
}
throw e;
}
}
public boolean isOpen() {
return open;
}
}import dev.langchain4j.model.chat.ChatModel;
public class FallbackChatService {
private final ChatModel primaryModel;
private final ChatModel fallbackModel;
public String chatWithFallback(String message) {
try {
return primaryModel.chat(message);
} catch (RuntimeException e) {
System.err.println("Primary model failed, using fallback: " +
e.getMessage());
try {
return fallbackModel.chat(message);
} catch (RuntimeException fallbackError) {
throw new RuntimeException(
"Both primary and fallback models failed",
fallbackError
);
}
}
}
}import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
public class RateLimitedChatModel {
private final ChatModel delegate;
private final Semaphore rateLimiter;
public RateLimitedChatModel(ChatModel delegate, int maxConcurrent) {
this.delegate = delegate;
this.rateLimiter = new Semaphore(maxConcurrent);
}
public String chat(String message, long timeoutSeconds) throws InterruptedException {
if (!rateLimiter.tryAcquire(timeoutSeconds, TimeUnit.SECONDS)) {
throw new RuntimeException("Rate limit timeout");
}
try {
return delegate.chat(message);
} finally {
rateLimiter.release();
}
}
}import com.google.common.util.concurrent.RateLimiter;
public class RateLimitedService {
private final RateLimiter rateLimiter;
private final ChatModel chatModel;
public RateLimitedService(ChatModel chatModel, double requestsPerSecond) {
this.chatModel = chatModel;
this.rateLimiter = RateLimiter.create(requestsPerSecond);
}
public String chat(String message) {
rateLimiter.acquire(); // Blocks until permit available
return chatModel.chat(message);
}
}public class RateLimitHandler {
public static <T> T handleRateLimits(Supplier<T> operation) {
int maxRetries = 5;
for (int i = 0; i < maxRetries; i++) {
try {
return operation.get();
} catch (RuntimeException e) {
if (isRateLimitError(e) && i < maxRetries - 1) {
long waitTime = calculateWaitTime(e, i);
try {
TimeUnit.MILLISECONDS.sleep(waitTime);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw new RuntimeException("Interrupted", ie);
}
} else {
throw e;
}
}
}
throw new RuntimeException("Should not reach here");
}
private static boolean isRateLimitError(RuntimeException e) {
return e.getMessage() != null &&
(e.getMessage().contains("429") ||
e.getMessage().contains("rate limit"));
}
private static long calculateWaitTime(RuntimeException e, int attempt) {
// Check for Retry-After header in exception message
// Default to exponential backoff
return 1000 * (long) Math.pow(2, attempt);
}
}import dev.langchain4j.model.chat.ChatModel;
import dev.langchain4j.model.chat.Capability;
public class FeatureDetector {
public static class ModelCapabilities {
public final boolean supportsJsonSchema;
public final boolean supportsStreaming;
public final boolean supportsTokenEstimation;
public final boolean supportsFunctionCalling;
public ModelCapabilities(ChatModel model) {
this.supportsJsonSchema = model.supportedCapabilities()
.contains(Capability.RESPONSE_FORMAT_JSON_SCHEMA);
this.supportsStreaming = model instanceof StreamingChatModel;
this.supportsTokenEstimation = model instanceof TokenCountEstimator;
this.supportsFunctionCalling = checkFunctionCalling(model);
}
private boolean checkFunctionCalling(ChatModel model) {
// Check if model accepts tool specifications
try {
model.chat(ChatRequest.builder()
.messages(UserMessage.from("test"))
.toolSpecifications(List.of())
.build());
return true;
} catch (UnsupportedOperationException e) {
return false;
} catch (Exception e) {
// Other error, assume supported
return true;
}
}
}
public static ModelCapabilities detect(ChatModel model) {
return new ModelCapabilities(model);
}
}public class AdaptiveService {
private final ChatModel model;
private final ModelCapabilities capabilities;
public AdaptiveService(ChatModel model) {
this.model = model;
this.capabilities = FeatureDetector.detect(model);
}
public String processWithStructuredOutput(String prompt) {
if (capabilities.supportsJsonSchema) {
// Use native JSON schema support
return processWithNativeJson(prompt);
} else {
// Fallback to prompt engineering
return processWithPromptEngineering(prompt);
}
}
private String processWithNativeJson(String prompt) {
ChatRequest request = ChatRequest.builder()
.messages(UserMessage.from(prompt))
.responseFormat(ResponseFormat.JSON)
.build();
return model.chat(request).aiMessage().text();
}
private String processWithPromptEngineering(String prompt) {
String enhancedPrompt = prompt +
"\n\nPlease respond in valid JSON format.";
return model.chat(enhancedPrompt);
}
}This enhanced documentation provides production-grade guidance for coding agents, covering all aspects of using LangChain4j models safely and efficiently in real-world applications.
Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j