Build LLM-powered applications in Java with support for chatbots, agents, RAG, tools, and much more
High-level API for creating AI-powered services by defining Java interfaces. AiServices provides implementations that automatically handle chat models, streaming, memory management, RAG, tools, guardrails, and various output types.
The primary entry point for building AI services from Java interfaces.
package dev.langchain4j.service;
/**
* Abstract class for building AI services from Java interfaces.
* Supports system/user message templates, chat memory, RAG, tools, streaming,
* moderation, and various return types.
*/
public abstract class AiServices<T> {
/**
* Create a simple AI service with a chat model
* @param aiService Interface defining the AI service API
* @param chatModel Chat model to use
* @return Implementation of the AI service interface
*/
public static <T> T create(Class<T> aiService, ChatModel chatModel);
/**
* Create a simple AI service with a streaming chat model
* @param aiService Interface defining the AI service API
* @param streamingChatModel Streaming chat model to use
* @return Implementation of the AI service interface
*/
public static <T> T create(Class<T> aiService, StreamingChatModel streamingChatModel);
/**
* Begin building an AI service with full configuration options
* @param aiService Interface defining the AI service API
* @return Builder for configuring the AI service
*/
public static <T> AiServices<T> builder(Class<T> aiService);
}AiServices.create() and AiServices.builder().build() methods return thread-safe proxy instanceschatMemory(), ensure the ChatMemory implementation is thread-safe (MessageWindowChatMemory is thread-safe)chatMemoryProvider(), the provider itself must be thread-safe and return thread-safe memory instances per memoryIdtools() must be thread-safe if concurrent invocations are expectednull for the interface class - throws IllegalArgumentExceptioncreate() with both chatModel and streamingChatModel null - at least one must be providedbuild() on the builder - the builder itself is not an AI service instancebuild() time, not during individual setter callschatMemory() or chatMemoryProvider() increases token usage with each additional message in historycontentRetriever()) adds retrieved content tokens to each requesttoolExecutionErrorHandler() if configuredModerationException if @Moderate annotation is usedComplete builder API for configuring AI services with all available options.
/**
* Builder for configuring AI services
*/
public class Builder<T> {
/**
* Configure chat model
* @param chatModel Chat model to use
* @return Builder instance
*/
public Builder<T> chatModel(ChatModel chatModel);
/**
* Configure streaming chat model
* @param streamingChatModel Streaming chat model to use
* @return Builder instance
*/
public Builder<T> streamingChatModel(StreamingChatModel streamingChatModel);
/**
* Set system message for all invocations
* @param systemMessage System message text
* @return Builder instance
*/
public Builder<T> systemMessage(String systemMessage);
/**
* Set system message provider function
* @param systemMessageProvider Function to provide system message
* @return Builder instance
*/
public Builder<T> systemMessageProvider(Function<Object, String> systemMessageProvider);
/**
* Set user message for all invocations
* @param userMessage User message text
* @return Builder instance
*/
public Builder<T> userMessage(String userMessage);
/**
* Set user message provider function
* @param userMessageProvider Function to provide user message
* @return Builder instance
*/
public Builder<T> userMessageProvider(Function<Object, String> userMessageProvider);
/**
* Set shared chat memory
* @param chatMemory Chat memory instance
* @return Builder instance
*/
public Builder<T> chatMemory(ChatMemory chatMemory);
/**
* Set chat memory provider for per-user/conversation memory
* @param chatMemoryProvider Chat memory provider
* @return Builder instance
*/
public Builder<T> chatMemoryProvider(ChatMemoryProvider chatMemoryProvider);
/**
* Set chat request transformer
* @param chatRequestTransformer Transformer to modify requests
* @return Builder instance
*/
public Builder<T> chatRequestTransformer(UnaryOperator<ChatRequest> chatRequestTransformer);
/**
* Set chat request transformer with memory ID
* @param chatRequestTransformer Transformer with memory ID parameter
* @return Builder instance
*/
public Builder<T> chatRequestTransformer(
BiFunction<ChatRequest, Object, ChatRequest> chatRequestTransformer
);
/**
* Set moderation model for content moderation
* @param moderationModel Moderation model to use
* @return Builder instance
*/
public Builder<T> moderationModel(ModerationModel moderationModel);
/**
* Configure tools (objects with @Tool annotated methods)
* @param objectsWithTools Objects containing tool methods
* @return Builder instance
*/
public Builder<T> tools(Object... objectsWithTools);
/**
* Configure tools from collection
* @param objectsWithTools Collection of objects containing tool methods
* @return Builder instance
*/
public Builder<T> tools(Collection<Object> objectsWithTools);
/**
* Configure tools programmatically
* @param tools Map of tool specifications to executors
* @return Builder instance
*/
public Builder<T> tools(Map<ToolSpecification, ToolExecutor> tools);
/**
* Configure tools with immediate return names
* @param tools Map of tool specifications to executors
* @param immediateReturnToolNames Set of tool names that return immediately
* @return Builder instance
*/
public Builder<T> tools(
Map<ToolSpecification, ToolExecutor> tools,
Set<String> immediateReturnToolNames
);
/**
* Configure tool provider for dynamic tool selection
* @param toolProvider Tool provider instance
* @return Builder instance
*/
public Builder<T> toolProvider(ToolProvider toolProvider);
/**
* Enable concurrent tool execution with default executor
* @return Builder instance
*/
public Builder<T> executeToolsConcurrently();
/**
* Enable concurrent tool execution with custom executor
* @param executor Executor for concurrent tool execution
* @return Builder instance
*/
public Builder<T> executeToolsConcurrently(Executor executor);
/**
* Set max sequential tool invocations (default: 100)
* @param maxSequentialToolsInvocations Maximum number of sequential tool invocations
* @return Builder instance
*/
public Builder<T> maxSequentialToolsInvocations(int maxSequentialToolsInvocations);
/**
* Set before tool execution callback
* @param beforeToolExecution Callback to invoke before tool execution
* @return Builder instance
*/
public Builder<T> beforeToolExecution(Consumer<BeforeToolExecution> beforeToolExecution);
/**
* Set after tool execution callback
* @param afterToolExecution Callback to invoke after tool execution
* @return Builder instance
*/
public Builder<T> afterToolExecution(Consumer<ToolExecution> afterToolExecution);
/**
* Set strategy for handling hallucinated tool names
* @param hallucinatedToolNameStrategy Strategy function
* @return Builder instance
*/
public Builder<T> hallucinatedToolNameStrategy(
Function<ToolExecutionRequest, ToolExecutionResultMessage> hallucinatedToolNameStrategy
);
/**
* Set handler for tool argument errors (JSON parsing, type mismatches)
* @param handler Tool arguments error handler
* @return Builder instance
*/
public Builder<T> toolArgumentsErrorHandler(ToolArgumentsErrorHandler handler);
/**
* Set handler for tool execution errors
* @param handler Tool execution error handler
* @return Builder instance
*/
public Builder<T> toolExecutionErrorHandler(ToolExecutionErrorHandler handler);
/**
* Configure content retriever for RAG
* @param contentRetriever Content retriever instance
* @return Builder instance
*/
public Builder<T> contentRetriever(ContentRetriever contentRetriever);
/**
* Configure retrieval augmentor for RAG
* @param retrievalAugmentor Retrieval augmentor instance
* @return Builder instance
*/
public Builder<T> retrievalAugmentor(RetrievalAugmentor retrievalAugmentor);
/**
* Register AI service listener
* @param listener Listener to register
* @return Builder instance
*/
public <I> Builder<T> registerListener(AiServiceListener<I> listener);
/**
* Register multiple AI service listeners
* @param listeners Listeners to register
* @return Builder instance
*/
public Builder<T> registerListeners(AiServiceListener<?>... listeners);
/**
* Register listener collection
* @param listeners Collection of listeners to register
* @return Builder instance
*/
public Builder<T> registerListeners(Collection<? extends AiServiceListener<?>> listeners);
/**
* Unregister AI service listener
* @param listener Listener to unregister
* @return Builder instance
*/
public <I> Builder<T> unregisterListener(AiServiceListener<I> listener);
/**
* Unregister multiple listeners
* @param listeners Listeners to unregister
* @return Builder instance
*/
public Builder<T> unregisterListeners(AiServiceListener<?>... listeners);
/**
* Configure input guardrails
* @param inputGuardrailsConfig Input guardrails configuration
* @return Builder instance
*/
public Builder<T> inputGuardrailsConfig(InputGuardrailsConfig inputGuardrailsConfig);
/**
* Configure output guardrails
* @param outputGuardrailsConfig Output guardrails configuration
* @return Builder instance
*/
public Builder<T> outputGuardrailsConfig(OutputGuardrailsConfig outputGuardrailsConfig);
/**
* Set input guardrail classes
* @param guardrailClasses List of guardrail classes
* @return Builder instance
*/
public <I> Builder<T> inputGuardrailClasses(List<Class<? extends I>> guardrailClasses);
/**
* Set input guardrail classes (varargs)
* @param guardrailClasses Guardrail classes
* @return Builder instance
*/
public <I> Builder<T> inputGuardrailClasses(Class<? extends I>... guardrailClasses);
/**
* Set input guardrails
* @param guardrails List of guardrails
* @return Builder instance
*/
public <I> Builder<T> inputGuardrails(List<I> guardrails);
/**
* Set input guardrails (varargs)
* @param guardrails Guardrails
* @return Builder instance
*/
public <I> Builder<T> inputGuardrails(I... guardrails);
/**
* Set output guardrail classes
* @param guardrailClasses List of guardrail classes
* @return Builder instance
*/
public <O> Builder<T> outputGuardrailClasses(List<Class<? extends O>> guardrailClasses);
/**
* Set output guardrail classes (varargs)
* @param guardrailClasses Guardrail classes
* @return Builder instance
*/
public <O> Builder<T> outputGuardrailClasses(Class<? extends O>... guardrailClasses);
/**
* Set output guardrails
* @param guardrails List of guardrails
* @return Builder instance
*/
public <O> Builder<T> outputGuardrails(List<O> guardrails);
/**
* Set output guardrails (varargs)
* @param guardrails Guardrails
* @return Builder instance
*/
public <O> Builder<T> outputGuardrails(O... guardrails);
/**
* Configure whether to store RAG-augmented messages in chat memory
* Default is true
* @param storeRetrievedContentInChatMemory Whether to store retrieved content
* @return Builder instance
*/
public Builder<T> storeRetrievedContentInChatMemory(
boolean storeRetrievedContentInChatMemory
);
/**
* Build the AI service
* @return Implementation of the AI service interface
*/
public T build();
}build() is called, the resulting AI service proxy is thread-safebuild() multiple times on the same builder - behavior is undefinedchatModel() and streamingChatModel() - only one should be setchatMemory() and chatMemoryProvider() - only one should be setcontentRetriever() and retrievalAugmentor() - only one should be setsystemMessage()/systemMessageProvider() and use @SystemMessage annotation - method annotation takes precedencebuild()maxSequentialToolsInvocations(0) effectively disables tool executionmaxSequentialToolsInvocations throw IllegalArgumentException at build timeexecuteToolsConcurrently() with null executor uses ForkJoinPool.commonPool()build() timemaxSequentialToolsInvocations allows more tool rounds but increases API callsexecuteToolsConcurrently() does not reduce API calls - it only parallelizes tool executionbuild() if configuration is inconsistent or incompletebuild() time when possiblechatModel() configurationstreamingChatModel() configurationchatMemory() and chatMemoryProvider() configurationtools() and related tool configurationcontentRetriever() and retrievalAugmentor() configurationAnnotations for configuring AI service methods and parameters.
package dev.langchain4j.service;
/**
* Specifies complete system message or template to be used on each invocation
* Can contain template variables resolved with values from @V annotated parameters
* Takes precedence over systemMessageProvider
*/
@Target({TYPE, METHOD})
@Retention(RUNTIME)
public @interface SystemMessage {
/**
* Prompt template (single or multiple lines)
* @return Template lines
*/
String[] value();
/**
* Delimiter for joining multiple lines (default: "\n")
* @return Delimiter string
*/
String delimiter() default "\n";
/**
* Resource path to read prompt template
* @return Resource path
*/
String fromResource() default "";
}
/**
* Specifies complete user message or template to be used on each invocation
* Can contain template variables resolved with values from @V annotated parameters
* Can be used on methods or parameters
* Takes precedence over userMessageProvider
*/
@Target({METHOD, PARAMETER})
@Retention(RUNTIME)
public @interface UserMessage {
/**
* Prompt template (single or multiple lines)
* @return Template lines
*/
String[] value();
/**
* Delimiter for joining multiple lines (default: "\n")
* @return Delimiter string
*/
String delimiter() default "\n";
/**
* Resource path to read prompt template
* @return Resource path
*/
String fromResource() default "";
}
/**
* Annotation for method parameters to mark them as prompt template variables
* Value will be injected into templates defined in @UserMessage, @SystemMessage,
* and systemMessageProvider
* Not necessary when "-parameters" compilation option is enabled or when using
* Quarkus/Spring Boot
*/
@Target(PARAMETER)
@Retention(RUNTIME)
public @interface V {
/**
* Name of variable/placeholder in prompt template
* @return Variable name
*/
String value();
}
/**
* Annotation for method parameters to specify memory ID for finding memory
* belonging to user/conversation
* Parameter can be of any type with proper equals()/hashCode() implementation
*/
@Target(PARAMETER)
@Retention(RUNTIME)
public @interface MemoryId {
}
/**
* Annotation for method parameters to inject value into 'name' field of UserMessage
*/
@Target(PARAMETER)
@Retention(RUNTIME)
public @interface UserName {
}
/**
* Annotation for methods to enable automatic content moderation
* When annotated, method invocation will call both LLM and moderation model in parallel
* If content is flagged, ModerationException is thrown
*/
@Target(METHOD)
@Retention(RUNTIME)
public @interface Moderate {
}value() and fromResource() in same annotation - fromResource takes precedence@V annotation when not using -parameters compiler flag - parameter names won't be available@MemoryId on multiple parameters in the same method - only first one is used@UserName on multiple parameters - only first one is used@SystemMessage at both type and method level expecting concatenation - method-level overrides completelyvalue() array results in empty messagevalue() array contributes empty line (or delimiter if multiple)fromResource() with non-existent resource throws IllegalConfigurationException at build time@UserMessage on parameter overrides method-level @UserMessage for that invocation@Moderate with no configured moderationModel() throws IllegalConfigurationException at build timefromResource() is cached after first access@Moderate adds parallel API call to moderation model - expect 2x latency for single-threaded usage@SystemMessage templates consume more input tokens on every invocation@Moderate doubles API costs (one chat model call + one moderation model call)fromResource() path is invalid@Moderate flags content@V annotation causes variable name to be unavailable unless using -parameters compiler flag@MemoryId annotation@Moderate annotationThe Result type provides access to additional information from AI service invocations.
package dev.langchain4j.service;
/**
* Represents the result of AI Service invocation containing actual content
* and additional information (token usage, finish reason, sources from RAG,
* tool executions, intermediate/final responses)
*/
public class Result<T> {
/**
* Constructor
* @param content The actual content/result
* @param tokenUsage Aggregate token usage
* @param sources Sources from RAG retrieval
* @param finishReason Finish reason from model
* @param toolExecutions All tool executions that occurred
*/
public Result(
T content,
TokenUsage tokenUsage,
List<Content> sources,
FinishReason finishReason,
List<ToolExecution> toolExecutions
);
/**
* Create builder
* @return Builder instance
*/
public static <T> ResultBuilder<T> builder();
/**
* Get content
* @return The actual content/result
*/
public T content();
/**
* Get aggregate token usage
* @return Token usage across all requests
*/
public TokenUsage tokenUsage();
/**
* Get sources from RAG
* @return List of retrieved content sources
*/
public List<Content> sources();
/**
* Get finish reason
* @return Finish reason from model
*/
public FinishReason finishReason();
/**
* Get all tool executions
* @return List of all tool executions
*/
public List<ToolExecution> toolExecutions();
/**
* Get intermediate responses (with tool execution requests)
* @return List of intermediate chat responses
*/
public List<ChatResponse> intermediateResponses();
/**
* Get final response (without tool execution requests)
* @return Final chat response
*/
public ChatResponse finalResponse();
}Result instances are immutable and fully thread-safetokenUsage() is non-null - some models don't report token usagesources() is non-empty - only populated when using RAGtoolExecutions() is non-empty - only populated when tools are usedcontent() is non-null - it can be null if the model returned no contentfinishReason() - STOP vs LENGTH vs CONTENT_FILTER indicate different outcomestoolExecutions() returns empty list (not null)sources() returns empty list (not null)tokenUsage() may return nullintermediateResponses() includes all responses with tool execution requestsfinalResponse() is the last response without tool execution requestsintermediateResponses() is empty and finalResponse() is the only responsetokenUsage() shows aggregate cost across all requests in the invocation (including tool rounds)toolExecutions().size()intermediateResponses() size indicates number of model calls beyond the final oneInterface for streaming responses from AI services.
package dev.langchain4j.service;
/**
* Represents token stream from model to subscribe and receive updates
* when new partial response is available, when streaming finishes, or when error occurs
* Intended as return type in AI Service
*/
public interface TokenStream {
/**
* Handle partial text responses
* @param partialResponseHandler Consumer for partial response strings
* @return TokenStream instance for chaining
*/
TokenStream onPartialResponse(Consumer<String> partialResponseHandler);
/**
* Handle partial responses with context (experimental)
* @param handler BiConsumer for partial response with context
* @return TokenStream instance for chaining
*/
TokenStream onPartialResponseWithContext(
BiConsumer<PartialResponse, PartialResponseContext> handler
);
/**
* Handle partial thinking/reasoning text (experimental)
* @param partialThinkingHandler Consumer for partial thinking
* @return TokenStream instance for chaining
*/
TokenStream onPartialThinking(Consumer<PartialThinking> partialThinkingHandler);
/**
* Handle partial thinking with context (experimental)
* @param handler BiConsumer for partial thinking with context
* @return TokenStream instance for chaining
*/
TokenStream onPartialThinkingWithContext(
BiConsumer<PartialThinking, PartialThinkingContext> handler
);
/**
* Handle partial tool calls (experimental)
* @param partialToolCallHandler Consumer for partial tool calls
* @return TokenStream instance for chaining
*/
TokenStream onPartialToolCall(Consumer<PartialToolCall> partialToolCallHandler);
/**
* Handle partial tool calls with context (experimental)
* @param handler BiConsumer for partial tool calls with context
* @return TokenStream instance for chaining
*/
TokenStream onPartialToolCallWithContext(
BiConsumer<PartialToolCall, PartialToolCallContext> handler
);
/**
* Handle retrieved contents from RAG
* @param contentHandler Consumer for retrieved content list
* @return TokenStream instance for chaining
*/
TokenStream onRetrieved(Consumer<List<Content>> contentHandler);
/**
* Handle intermediate chat responses (with tool execution requests)
* @param intermediateResponseHandler Consumer for intermediate responses
* @return TokenStream instance for chaining
*/
TokenStream onIntermediateResponse(Consumer<ChatResponse> intermediateResponseHandler);
/**
* Handle before tool execution
* @param beforeToolExecutionHandler Consumer for before tool execution context
* @return TokenStream instance for chaining
*/
TokenStream beforeToolExecution(Consumer<BeforeToolExecution> beforeToolExecutionHandler);
/**
* Handle after tool execution
* @param toolExecuteHandler Consumer for tool execution results
* @return TokenStream instance for chaining
*/
TokenStream onToolExecuted(Consumer<ToolExecution> toolExecuteHandler);
/**
* Handle final chat response
* @param completeResponseHandler Consumer for complete response
* @return TokenStream instance for chaining
*/
TokenStream onCompleteResponse(Consumer<ChatResponse> completeResponseHandler);
/**
* Handle errors
* @param errorHandler Consumer for throwable errors
* @return TokenStream instance for chaining
*/
TokenStream onError(Consumer<Throwable> errorHandler);
/**
* Ignore all errors (logged as WARN)
* @return TokenStream instance for chaining
*/
TokenStream ignoreErrors();
/**
* Start processing and send request to LLM
* Must be called after registering handlers
*/
void start();
}start()start() is called, handlers may be invoked from different threadsstart() - handlers won't execute without itstart() - behavior is undefinedstart() multiple times - throws IllegalStateExceptiononError() and ignoreErrors() together - ignoreErrors takes precedenceonPartialResponse() handler is registered, partial responses are discardedonError() handler is registered and ignoreErrors() is not called, errors propagate to calleronCompleteResponse() is always called after all partial responses (unless error occurs)onRetrieved() is called before first partial responsestart() is called multiple timesonError() handleronError() handler is registered, exceptions propagate to callerignoreErrors() suppresses all errors (logged at WARN level)Exceptions thrown by AI services.
package dev.langchain4j.service;
/**
* Exception thrown when AI service is misconfigured
*/
public class IllegalConfigurationException extends LangChain4jException {
/**
* Constructor
* @param message Error message
*/
public IllegalConfigurationException(String message);
/**
* Constructor with cause
* @param message Error message
* @param cause Underlying cause
*/
public IllegalConfigurationException(String message, Throwable cause);
}
/**
* Exception thrown when moderation model flags content
*/
public class ModerationException extends LangChain4jException {
/**
* Constructor
* @param message Error message
*/
public ModerationException(String message);
/**
* Constructor with cause
* @param message Error message
* @param cause Underlying cause
*/
public ModerationException(String message, Throwable cause);
}import dev.langchain4j.service.AiServices;
interface Assistant {
String chat(String message);
}
// Create simple AI service
Assistant assistant = AiServices.create(Assistant.class, chatModel);
try {
String response = assistant.chat("What is the capital of France?");
System.out.println(response);
} catch (RuntimeException e) {
System.err.println("AI service invocation failed: " + e.getMessage());
throw e;
}import dev.langchain4j.service.AiServices;
import dev.langchain4j.service.SystemMessage;
import dev.langchain4j.service.UserMessage;
import dev.langchain4j.service.V;
interface Chef {
@SystemMessage("You are a professional chef with expertise in {{cuisine}} cuisine.")
@UserMessage("Create a recipe for {{dish}} using {{ingredient}}.")
String createRecipe(@V("cuisine") String cuisine,
@V("dish") String dish,
@V("ingredient") String ingredient);
}
Chef chef = AiServices.create(Chef.class, chatModel);
try {
String recipe = chef.createRecipe("Italian", "pasta", "tomatoes");
System.out.println(recipe);
} catch (IllegalArgumentException e) {
System.err.println("Invalid template variables: " + e.getMessage());
}import dev.langchain4j.memory.chat.MessageWindowChatMemory;
import dev.langchain4j.service.AiServices;
import dev.langchain4j.service.MemoryId;
interface Assistant {
String chat(@MemoryId String userId, String message);
}
Assistant assistant = AiServices.builder(Assistant.class)
.chatModel(chatModel)
.chatMemoryProvider(memoryId -> MessageWindowChatMemory.withMaxMessages(10))
.build();
try {
// Different conversations for different users
String response1 = assistant.chat("user1", "My name is Alice");
String response2 = assistant.chat("user2", "My name is Bob");
String response3 = assistant.chat("user1", "What is my name?"); // Will respond "Alice"
System.out.println("User1 response: " + response3);
} catch (RuntimeException e) {
System.err.println("Conversation failed: " + e.getMessage());
// Handle error appropriately
}import dev.langchain4j.agent.tool.Tool;
import dev.langchain4j.service.AiServices;
class WeatherService {
@Tool("Get current weather for a location")
String getWeather(String location) {
// Implementation with error handling
try {
// Call weather API
return "Sunny, 72°F";
} catch (Exception e) {
return "Weather data unavailable for " + location;
}
}
}
interface Assistant {
String chat(String message);
}
Assistant assistant = AiServices.builder(Assistant.class)
.chatModel(chatModel)
.tools(new WeatherService())
.toolExecutionErrorHandler((toolExecutionRequest, throwable) -> {
System.err.println("Tool execution failed: " + throwable.getMessage());
return "Tool execution failed: " + throwable.getMessage();
})
.build();
try {
String response = assistant.chat("What's the weather in New York?");
System.out.println(response);
} catch (RuntimeException e) {
System.err.println("AI service with tools failed: " + e.getMessage());
}import dev.langchain4j.service.AiServices;
import dev.langchain4j.service.TokenStream;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicReference;
interface Assistant {
TokenStream chat(String message);
}
Assistant assistant = AiServices.create(Assistant.class, streamingChatModel);
CompletableFuture<String> future = new CompletableFuture<>();
AtomicReference<StringBuilder> responseBuilder = new AtomicReference<>(new StringBuilder());
assistant.chat("Tell me a story")
.onPartialResponse(token -> {
System.out.print(token);
responseBuilder.get().append(token);
})
.onCompleteResponse(response -> {
System.out.println("\nDone!");
future.complete(responseBuilder.get().toString());
})
.onError(throwable -> {
System.err.println("\nError: " + throwable.getMessage());
throwable.printStackTrace();
future.completeExceptionally(throwable);
})
.start();
try {
String fullResponse = future.get(); // Wait for completion
} catch (Exception e) {
System.err.println("Streaming failed: " + e.getMessage());
}import dev.langchain4j.service.AiServices;
import dev.langchain4j.service.Result;
interface Assistant {
Result<String> chat(String message);
}
Assistant assistant = AiServices.builder(Assistant.class)
.chatModel(chatModel)
.tools(new WeatherService())
.build();
try {
Result<String> result = assistant.chat("What's the weather?");
System.out.println("Content: " + result.content());
if (result.tokenUsage() != null) {
System.out.println("Token usage: " + result.tokenUsage());
System.out.println("Input tokens: " + result.tokenUsage().inputTokenCount());
System.out.println("Output tokens: " + result.tokenUsage().outputTokenCount());
}
if (!result.toolExecutions().isEmpty()) {
System.out.println("Tool executions: " + result.toolExecutions().size());
result.toolExecutions().forEach(te ->
System.out.println(" - " + te.toolName() + ": " + te.result())
);
}
System.out.println("Finish reason: " + result.finishReason());
} catch (RuntimeException e) {
System.err.println("AI service invocation failed: " + e.getMessage());
}import dev.langchain4j.service.AiServices;
interface Assistant {
String chat(String message);
}
Assistant assistant = AiServices.builder(Assistant.class)
.chatModel(chatModel)
.contentRetriever(contentRetriever)
.build();
try {
String response = assistant.chat("What does the documentation say about X?");
System.out.println(response);
} catch (RuntimeException e) {
System.err.println("RAG query failed: " + e.getMessage());
}import org.junit.jupiter.api.Test;
import org.mockito.Mockito;
import static org.mockito.ArgumentMatchers.*;
import static org.junit.jupiter.api.Assertions.*;
interface Assistant {
String chat(String message);
}
@Test
void testWithMockedAssistant() {
// Create mock
Assistant assistant = Mockito.mock(Assistant.class);
// Define behavior
Mockito.when(assistant.chat(anyString()))
.thenReturn("Mocked response");
// Test
String response = assistant.chat("Hello");
assertEquals("Mocked response", response);
// Verify
Mockito.verify(assistant, Mockito.times(1)).chat("Hello");
}import dev.langchain4j.model.chat.ChatLanguageModel;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.model.output.Response;
import org.junit.jupiter.api.Test;
class FakeChatModel implements ChatLanguageModel {
private final String fixedResponse;
public FakeChatModel(String fixedResponse) {
this.fixedResponse = fixedResponse;
}
@Override
public Response<AiMessage> generate(List<ChatMessage> messages) {
return Response.from(AiMessage.from(fixedResponse));
}
}
@Test
void testWithFakeChatModel() {
ChatLanguageModel model = new FakeChatModel("Test response");
Assistant assistant = AiServices.create(Assistant.class, model);
String response = assistant.chat("Any message");
assertEquals("Test response", response);
}import dev.langchain4j.memory.chat.MessageWindowChatMemory;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
interface Assistant {
String chat(@MemoryId String userId, String message);
}
@Test
void testMemoryIsolation() {
ChatLanguageModel model = new FakeChatModel("Echo");
Assistant assistant = AiServices.builder(Assistant.class)
.chatModel(model)
.chatMemoryProvider(id -> MessageWindowChatMemory.withMaxMessages(10))
.build();
// Verify different users have isolated memory
assistant.chat("user1", "Message from user1");
assistant.chat("user2", "Message from user2");
// Memory should be separate per user
// Add assertions based on your implementation
}import dev.langchain4j.agent.tool.Tool;
import org.junit.jupiter.api.Test;
import org.mockito.Mockito;
import static org.mockito.Mockito.*;
class WeatherService {
@Tool("Get weather")
String getWeather(String location) {
return "Sunny";
}
}
@Test
void testToolExecution() {
WeatherService weatherService = Mockito.spy(new WeatherService());
Assistant assistant = AiServices.builder(Assistant.class)
.chatModel(chatModel)
.tools(weatherService)
.build();
assistant.chat("What's the weather in Paris?");
// Verify tool was called
verify(weatherService, atLeastOnce()).getWeather(anyString());
}import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
@Test
void testToolExecutionError() {
class FailingTool {
@Tool("Failing tool")
String fail(String input) {
throw new RuntimeException("Tool failure");
}
}
AtomicBoolean errorHandlerCalled = new AtomicBoolean(false);
Assistant assistant = AiServices.builder(Assistant.class)
.chatModel(chatModel)
.tools(new FailingTool())
.toolExecutionErrorHandler((request, throwable) -> {
errorHandlerCalled.set(true);
return "Error handled: " + throwable.getMessage();
})
.build();
String response = assistant.chat("Trigger tool");
assertTrue(errorHandlerCalled.get(), "Error handler should be called");
}import org.junit.jupiter.api.Test;
import org.testcontainers.containers.GenericContainer;
import org.testcontainers.junit.jupiter.Container;
import org.testcontainers.junit.jupiter.Testcontainers;
@Testcontainers
class AiServiceIntegrationTest {
@Container
private static GenericContainer<?> ollama = new GenericContainer<>("ollama/ollama:latest")
.withExposedPorts(11434);
@Test
void testWithRealModel() {
String baseUrl = "http://" + ollama.getHost() + ":" + ollama.getFirstMappedPort();
ChatLanguageModel model = OllamaChatModel.builder()
.baseUrl(baseUrl)
.modelName("llama2")
.build();
Assistant assistant = AiServices.create(Assistant.class, model);
String response = assistant.chat("Hello");
assertNotNull(response);
assertFalse(response.isEmpty());
}
}import dev.langchain4j.service.AiServices;
import java.time.Duration;
interface Assistant {
String chat(String message);
}
class RetryableAiService {
private final Assistant assistant;
private final int maxRetries;
private final Duration initialDelay;
public RetryableAiService(Assistant assistant, int maxRetries, Duration initialDelay) {
this.assistant = assistant;
this.maxRetries = maxRetries;
this.initialDelay = initialDelay;
}
public String chatWithRetry(String message) {
int attempt = 0;
Duration delay = initialDelay;
while (attempt < maxRetries) {
try {
return assistant.chat(message);
} catch (RuntimeException e) {
attempt++;
if (attempt >= maxRetries) {
throw new RuntimeException("Failed after " + maxRetries + " attempts", e);
}
System.err.println("Attempt " + attempt + " failed, retrying in " + delay.toMillis() + "ms");
try {
Thread.sleep(delay.toMillis());
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw new RuntimeException("Interrupted during retry", ie);
}
delay = delay.multipliedBy(2); // Exponential backoff
}
}
throw new RuntimeException("Should not reach here");
}
}
// Usage
Assistant assistant = AiServices.create(Assistant.class, chatModel);
RetryableAiService retryable = new RetryableAiService(assistant, 3, Duration.ofSeconds(1));
String response = retryable.chatWithRetry("Hello");import java.time.Duration;
import java.time.Instant;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
class CircuitBreaker {
private enum State { CLOSED, OPEN, HALF_OPEN }
private final Assistant assistant;
private final int failureThreshold;
private final Duration timeout;
private final AtomicInteger failureCount = new AtomicInteger(0);
private final AtomicReference<State> state = new AtomicReference<>(State.CLOSED);
private final AtomicReference<Instant> lastFailureTime = new AtomicReference<>();
public CircuitBreaker(Assistant assistant, int failureThreshold, Duration timeout) {
this.assistant = assistant;
this.failureThreshold = failureThreshold;
this.timeout = timeout;
}
public String chat(String message) {
if (state.get() == State.OPEN) {
if (Instant.now().isAfter(lastFailureTime.get().plus(timeout))) {
state.set(State.HALF_OPEN);
System.out.println("Circuit breaker: transitioning to HALF_OPEN");
} else {
throw new RuntimeException("Circuit breaker is OPEN");
}
}
try {
String response = assistant.chat(message);
onSuccess();
return response;
} catch (RuntimeException e) {
onFailure();
throw e;
}
}
private void onSuccess() {
failureCount.set(0);
state.set(State.CLOSED);
}
private void onFailure() {
int failures = failureCount.incrementAndGet();
lastFailureTime.set(Instant.now());
if (failures >= failureThreshold) {
state.set(State.OPEN);
System.err.println("Circuit breaker: transitioning to OPEN after " + failures + " failures");
}
}
}
// Usage
Assistant assistant = AiServices.create(Assistant.class, chatModel);
CircuitBreaker circuitBreaker = new CircuitBreaker(assistant, 3, Duration.ofMinutes(1));
try {
String response = circuitBreaker.chat("Hello");
} catch (RuntimeException e) {
System.err.println("Circuit breaker prevented call or call failed: " + e.getMessage());
}interface Assistant {
String chat(String message);
}
class FallbackAiService {
private final Assistant primary;
private final Assistant fallback;
public FallbackAiService(Assistant primary, Assistant fallback) {
this.primary = primary;
this.fallback = fallback;
}
public String chat(String message) {
try {
return primary.chat(message);
} catch (RuntimeException e) {
System.err.println("Primary service failed: " + e.getMessage());
System.err.println("Falling back to secondary service");
try {
return fallback.chat(message);
} catch (RuntimeException fallbackException) {
System.err.println("Fallback service also failed: " + fallbackException.getMessage());
return "I apologize, but I'm currently unable to process your request. Please try again later.";
}
}
}
}
// Usage
Assistant primary = AiServices.create(Assistant.class, primaryChatModel);
Assistant fallback = AiServices.create(Assistant.class, fallbackChatModel);
FallbackAiService service = new FallbackAiService(primary, fallback);
String response = service.chat("Hello");import java.util.concurrent.*;
class TimeoutAiService {
private final Assistant assistant;
private final Duration timeout;
private final ExecutorService executor;
public TimeoutAiService(Assistant assistant, Duration timeout) {
this.assistant = assistant;
this.timeout = timeout;
this.executor = Executors.newCachedThreadPool();
}
public String chatWithTimeout(String message) {
Future<String> future = executor.submit(() -> assistant.chat(message));
try {
return future.get(timeout.toMillis(), TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
future.cancel(true);
throw new RuntimeException("Request timed out after " + timeout.toMillis() + "ms", e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException("Request was interrupted", e);
} catch (ExecutionException e) {
throw new RuntimeException("Request failed", e.getCause());
}
}
public void shutdown() {
executor.shutdown();
}
}
// Usage
Assistant assistant = AiServices.create(Assistant.class, chatModel);
TimeoutAiService timeoutService = new TimeoutAiService(assistant, Duration.ofSeconds(30));
try {
String response = timeoutService.chatWithTimeout("Hello");
} catch (RuntimeException e) {
System.err.println("Request failed or timed out: " + e.getMessage());
} finally {
timeoutService.shutdown();
}import java.util.concurrent.ConcurrentHashMap;
import java.util.Map;
class CachedAiService {
private final Assistant assistant;
private final Map<String, String> cache = new ConcurrentHashMap<>();
private final boolean useCacheOnError;
public CachedAiService(Assistant assistant, boolean useCacheOnError) {
this.assistant = assistant;
this.useCacheOnError = useCacheOnError;
}
public String chat(String message) {
// Check cache first
String cached = cache.get(message);
try {
String response = assistant.chat(message);
cache.put(message, response);
return response;
} catch (RuntimeException e) {
if (useCacheOnError && cached != null) {
System.err.println("Service failed, returning cached response: " + e.getMessage());
return cached + " [cached]";
}
throw e;
}
}
public void clearCache() {
cache.clear();
}
}
// Usage
Assistant assistant = AiServices.create(Assistant.class, chatModel);
CachedAiService cachedService = new CachedAiService(assistant, true);
String response = cachedService.chat("What is 2+2?");
// On subsequent failures, cached response is returnedimport org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.beans.factory.annotation.Value;
@Configuration
public class AiServiceConfig {
@Bean
public ChatLanguageModel chatLanguageModel(
@Value("${openai.api.key}") String apiKey,
@Value("${openai.model.name:gpt-4}") String modelName) {
return OpenAiChatModel.builder()
.apiKey(apiKey)
.modelName(modelName)
.temperature(0.7)
.timeout(Duration.ofSeconds(60))
.build();
}
@Bean
public ChatMemoryProvider chatMemoryProvider() {
return memoryId -> MessageWindowChatMemory.withMaxMessages(10);
}
@Bean
public Assistant assistant(
ChatLanguageModel chatModel,
ChatMemoryProvider chatMemoryProvider,
List<Object> tools) {
return AiServices.builder(Assistant.class)
.chatModel(chatModel)
.chatMemoryProvider(chatMemoryProvider)
.tools(tools)
.build();
}
@Bean
public Object weatherTool() {
return new WeatherService();
}
}
// Controller
@RestController
@RequestMapping("/api/chat")
public class ChatController {
private final Assistant assistant;
public ChatController(Assistant assistant) {
this.assistant = assistant;
}
@PostMapping
public ResponseEntity<ChatResponse> chat(
@RequestParam String userId,
@RequestBody ChatRequest request) {
try {
String response = assistant.chat(userId, request.getMessage());
return ResponseEntity.ok(new ChatResponse(response));
} catch (Exception e) {
return ResponseEntity
.status(HttpStatus.INTERNAL_SERVER_ERROR)
.body(new ChatResponse("Error: " + e.getMessage()));
}
}
}import dev.langchain4j.service.Result;
import org.springframework.web.bind.annotation.*;
interface AssistantWithMetadata {
Result<String> chat(@MemoryId String userId, String message);
}
@Configuration
public class AiServiceWithResultConfig {
@Bean
public AssistantWithMetadata assistantWithMetadata(
ChatLanguageModel chatModel,
ChatMemoryProvider chatMemoryProvider) {
return AiServices.builder(AssistantWithMetadata.class)
.chatModel(chatModel)
.chatMemoryProvider(chatMemoryProvider)
.build();
}
}
@RestController
@RequestMapping("/api/chat")
public class ChatControllerWithMetadata {
private final AssistantWithMetadata assistant;
public ChatControllerWithMetadata(AssistantWithMetadata assistant) {
this.assistant = assistant;
}
@PostMapping("/detailed")
public ResponseEntity<DetailedChatResponse> chatWithMetadata(
@RequestParam String userId,
@RequestBody ChatRequest request) {
try {
Result<String> result = assistant.chat(userId, request.getMessage());
return ResponseEntity.ok(DetailedChatResponse.builder()
.content(result.content())
.tokenUsage(result.tokenUsage())
.finishReason(result.finishReason())
.toolExecutions(result.toolExecutions())
.build());
} catch (Exception e) {
return ResponseEntity
.status(HttpStatus.INTERNAL_SERVER_ERROR)
.body(DetailedChatResponse.error(e.getMessage()));
}
}
}import javax.enterprise.context.ApplicationScoped;
import javax.enterprise.inject.Produces;
import org.eclipse.microprofile.config.inject.ConfigProperty;
@ApplicationScoped
public class AiServiceProducer {
@ConfigProperty(name = "openai.api.key")
String apiKey;
@ConfigProperty(name = "openai.model.name", defaultValue = "gpt-4")
String modelName;
@Produces
@ApplicationScoped
public ChatLanguageModel chatLanguageModel() {
return OpenAiChatModel.builder()
.apiKey(apiKey)
.modelName(modelName)
.build();
}
@Produces
@ApplicationScoped
public Assistant assistant(ChatLanguageModel chatModel) {
return AiServices.builder(Assistant.class)
.chatModel(chatModel)
.chatMemoryProvider(id -> MessageWindowChatMemory.withMaxMessages(10))
.build();
}
}
// Resource
@Path("/chat")
@ApplicationScoped
public class ChatResource {
@Inject
Assistant assistant;
@POST
@Produces(MediaType.APPLICATION_JSON)
@Consumes(MediaType.APPLICATION_JSON)
public Response chat(@QueryParam("userId") String userId, ChatRequest request) {
try {
String response = assistant.chat(userId, request.getMessage());
return Response.ok(new ChatResponse(response)).build();
} catch (Exception e) {
return Response
.serverError()
.entity(Map.of("error", e.getMessage()))
.build();
}
}
}import reactor.core.publisher.Flux;
import reactor.core.publisher.Sinks;
interface StreamingAssistant {
TokenStream chat(String message);
}
@Service
public class ReactiveAiService {
private final StreamingAssistant assistant;
public ReactiveAiService(StreamingChatModel streamingChatModel) {
this.assistant = AiServices.create(StreamingAssistant.class, streamingChatModel);
}
public Flux<String> chatReactive(String message) {
Sinks.Many<String> sink = Sinks.many().multicast().onBackpressureBuffer();
assistant.chat(message)
.onPartialResponse(sink::tryEmitNext)
.onCompleteResponse(response -> sink.tryEmitComplete())
.onError(throwable -> sink.tryEmitError(throwable))
.start();
return sink.asFlux();
}
}
// Controller
@RestController
@RequestMapping("/api/stream")
public class StreamingChatController {
private final ReactiveAiService aiService;
public StreamingChatController(ReactiveAiService aiService) {
this.aiService = aiService;
}
@GetMapping(value = "/chat", produces = MediaType.TEXT_EVENT_STREAM_VALUE)
public Flux<String> streamChat(@RequestParam String message) {
return aiService.chatReactive(message)
.onErrorResume(throwable -> {
return Flux.just("Error: " + throwable.getMessage());
});
}
}apiVersion: v1
kind: ConfigMap
metadata:
name: ai-service-config
data:
application.properties: |
openai.api.key=${OPENAI_API_KEY}
openai.model.name=gpt-4
openai.timeout=60s
chat.memory.max.messages=10
---
apiVersion: v1
kind: Secret
metadata:
name: ai-service-secrets
type: Opaque
stringData:
openai-api-key: "your-api-key-here"
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ai-service
spec:
replicas: 3
selector:
matchLabels:
app: ai-service
template:
metadata:
labels:
app: ai-service
spec:
containers:
- name: ai-service
image: your-registry/ai-service:latest
ports:
- containerPort: 8080
env:
- name: OPENAI_API_KEY
valueFrom:
secretKeyRef:
name: ai-service-secrets
key: openai-api-key
volumeMounts:
- name: config
mountPath: /config
resources:
requests:
memory: "512Mi"
cpu: "500m"
limits:
memory: "1Gi"
cpu: "1000m"
livenessProbe:
httpGet:
path: /actuator/health
port: 8080
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /actuator/health/readiness
port: 8080
initialDelaySeconds: 20
periodSeconds: 5
volumes:
- name: config
configMap:
name: ai-service-config
---
apiVersion: v1
kind: Service
metadata:
name: ai-service
spec:
selector:
app: ai-service
ports:
- protocol: TCP
port: 80
targetPort: 8080
type: LoadBalancerimport io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Timer;
import org.springframework.stereotype.Service;
@Service
public class ObservableAiService {
private final Assistant assistant;
private final MeterRegistry meterRegistry;
public ObservableAiService(Assistant assistant, MeterRegistry meterRegistry) {
this.assistant = assistant;
this.meterRegistry = meterRegistry;
}
public String chat(String userId, String message) {
Timer.Sample sample = Timer.start(meterRegistry);
try {
String response = assistant.chat(userId, message);
sample.stop(Timer.builder("ai.service.request")
.tag("status", "success")
.tag("user", userId)
.register(meterRegistry));
meterRegistry.counter("ai.service.requests.total",
"status", "success").increment();
return response;
} catch (Exception e) {
sample.stop(Timer.builder("ai.service.request")
.tag("status", "error")
.tag("user", userId)
.tag("error", e.getClass().getSimpleName())
.register(meterRegistry));
meterRegistry.counter("ai.service.requests.total",
"status", "error",
"error", e.getClass().getSimpleName()).increment();
throw e;
}
}
}Install with Tessl CLI
npx tessl i tessl/maven-dev-langchain4j--langchain4j@1.11.0