tessl install tessl/maven-io-quarkiverse-langchain4j--quarkus-langchain4j-core@1.5.0Quarkus LangChain4j Core provides runtime integration for LangChain4j with the Quarkus framework, enabling declarative AI service creation through CDI annotations.
This document covers advanced usage patterns, edge cases, and solutions to uncommon but important scenarios.
Problem: Tools that perform blocking I/O throw BlockingToolNotAllowedException when called on the Vert.x event loop.
Solution: Use @Blocking annotation or offload to worker thread.
import io.smallrye.common.annotation.Blocking;
import dev.langchain4j.agent.tool.Tool;
import jakarta.enterprise.context.ApplicationScoped;
@ApplicationScoped
public class DatabaseTool {
@Tool("Query database")
@Blocking // Forces execution on worker thread
public String query(String sql) {
// Blocking JDBC call is safe here
return jdbcTemplate.queryForObject(sql, String.class);
}
}
// Alternative: Use reactive/non-blocking APIs
@ApplicationScoped
public class ReactiveDatabaseTool {
@Inject
PgPool pgPool;
@Tool("Query database reactively")
public Uni<String> queryReactive(String sql) {
return pgPool.query(sql)
.execute()
.onItem().transform(rows -> formatResults(rows));
}
}Problem: In-memory chat histories grow unbounded, causing memory leaks.
Solution: Implement custom eviction based on time, size, or access patterns.
import dev.langchain4j.memory.ChatMemoryProvider;
import dev.langchain4j.memory.ChatMemory;
import dev.langchain4j.memory.chat.MessageWindowChatMemory;
import jakarta.enterprise.context.ApplicationScoped;
import io.quarkus.scheduler.Scheduled;
import java.time.Instant;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
@ApplicationScoped
public class EvictingMemoryProvider implements ChatMemoryProvider {
private final Map<Object, TimestampedMemory> memories = new ConcurrentHashMap<>();
private static final long MAX_IDLE_MS = 3600_000; // 1 hour
private static final int MAX_ENTRIES = 10000;
@Override
public ChatMemory get(Object memoryId) {
// LRU-style: update timestamp on access
TimestampedMemory tm = memories.compute(memoryId, (id, existing) -> {
long now = System.currentTimeMillis();
if (existing == null) {
return new TimestampedMemory(
MessageWindowChatMemory.withMaxMessages(100),
now
);
}
return new TimestampedMemory(existing.memory, now);
});
// Evict if over capacity
if (memories.size() > MAX_ENTRIES) {
evictOldest();
}
return tm.memory;
}
@Scheduled(every = "15m")
void evictStale() {
long now = System.currentTimeMillis();
memories.entrySet().removeIf(entry ->
(now - entry.getValue().lastAccess) > MAX_IDLE_MS
);
}
private void evictOldest() {
memories.entrySet().stream()
.min(Map.Entry.comparingByValue(
(a, b) -> Long.compare(a.lastAccess, b.lastAccess)
))
.ifPresent(entry -> memories.remove(entry.getKey()));
}
private record TimestampedMemory(ChatMemory memory, long lastAccess) {}
}Problem: Need to add/remove tools at runtime based on user permissions or configuration.
Solution: Use ToolProvider SPI for dynamic tool discovery.
import dev.langchain4j.agent.tool.ToolProvider;
import dev.langchain4j.agent.tool.ToolSpecification;
import dev.langchain4j.agent.tool.ToolProviderRequest;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
@ApplicationScoped
public class DynamicToolProvider implements ToolProvider {
@Inject
PermissionService permissions;
@Inject
ToolRegistry registry;
@Override
public List<ToolSpecification> provideTools(ToolProviderRequest request) {
Object memoryId = request.memoryId();
Set<String> userPermissions = permissions.getPermissions(memoryId);
List<ToolSpecification> tools = new ArrayList<>();
// Add tools based on permissions
if (userPermissions.contains("read:data")) {
tools.add(registry.getTool("queryData"));
}
if (userPermissions.contains("write:data")) {
tools.add(registry.getTool("updateData"));
}
if (userPermissions.contains("admin")) {
tools.addAll(registry.getAdminTools());
}
return tools;
}
}
@RegisterAiService(toolProviderSupplier = BeanIfExistsToolProviderSupplier.class)
public interface DynamicAssistant {
String chat(@MemoryId String userId, String message);
}Problem: Transient failures should be retried, but with backoff to avoid overwhelming services.
Solution: Implement custom retry logic in tool error handlers.
import io.quarkiverse.langchain4j.HandleToolExecutionError;
import dev.langchain4j.agent.tool.Tool;
import dev.langchain4j.agent.tool.ToolErrorContext;
import java.util.concurrent.TimeUnit;
@ApplicationScoped
public class ExternalApiTool {
private static final int MAX_RETRIES = 3;
private static final long BASE_DELAY_MS = 1000;
@Tool("Call external API")
public String callApi(String endpoint) {
return executeWithRetry(endpoint, 0);
}
private String executeWithRetry(String endpoint, int attempt) {
try {
return httpClient.get(endpoint);
} catch (IOException | TimeoutException e) {
if (attempt < MAX_RETRIES) {
long delay = BASE_DELAY_MS * (1L << attempt); // Exponential backoff
sleep(delay);
return executeWithRetry(endpoint, attempt + 1);
}
throw new RuntimeException("Failed after " + MAX_RETRIES + " retries", e);
}
}
@HandleToolExecutionError
public static String handleError(Throwable error, ToolErrorContext context) {
if (error.getMessage().contains("retries")) {
return "The external service is currently unavailable. Please try again later.";
}
return "Error accessing external service: " + error.getMessage();
}
private void sleep(long ms) {
try {
TimeUnit.MILLISECONDS.sleep(ms);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
}
}
}Problem: Long conversations exceed model context windows or cause performance issues.
Solution: Implement smart history truncation or summarization.
import dev.langchain4j.memory.ChatMemory;
import dev.langchain4j.memory.chat.MessageWindowChatMemory;
import dev.langchain4j.data.message.ChatMessage;
import dev.langchain4j.data.message.SystemMessage;
import dev.langchain4j.model.Tokenizer;
@ApplicationScoped
public class SmartMemoryProvider implements ChatMemoryProvider {
@Inject
Tokenizer tokenizer;
@Inject
@ModelName("gpt-3.5-turbo")
ChatModel summarizer;
private static final int MAX_TOKENS = 3000;
private static final int CONTEXT_MESSAGES = 10; // Keep recent messages
@Override
public ChatMemory get(Object memoryId) {
return new SmartChatMemory(memoryId, tokenizer, summarizer);
}
private static class SmartChatMemory implements ChatMemory {
private final List<ChatMessage> messages = new ArrayList<>();
private final Object memoryId;
private final Tokenizer tokenizer;
private final ChatModel summarizer;
private String conversationSummary;
SmartChatMemory(Object memoryId, Tokenizer tokenizer, ChatModel summarizer) {
this.memoryId = memoryId;
this.tokenizer = tokenizer;
this.summarizer = summarizer;
}
@Override
public void add(ChatMessage message) {
messages.add(message);
// Check if we need to summarize
int totalTokens = messages.stream()
.mapToInt(m -> tokenizer.estimateTokenCountInMessage(m))
.sum();
if (totalTokens > MAX_TOKENS) {
summarizeOldMessages();
}
}
@Override
public List<ChatMessage> messages() {
List<ChatMessage> result = new ArrayList<>();
// Add summary if exists
if (conversationSummary != null) {
result.add(SystemMessage.from(
"Previous conversation summary: " + conversationSummary
));
}
// Add recent messages
int start = Math.max(0, messages.size() - CONTEXT_MESSAGES);
result.addAll(messages.subList(start, messages.size()));
return result;
}
private void summarizeOldMessages() {
int splitPoint = messages.size() - CONTEXT_MESSAGES;
if (splitPoint <= 0) return;
List<ChatMessage> oldMessages = messages.subList(0, splitPoint);
String conversation = oldMessages.stream()
.map(m -> m.type() + ": " + m.text())
.collect(Collectors.joining("\n"));
String prompt = "Summarize this conversation concisely:\n\n" + conversation;
conversationSummary = summarizer.generate(prompt).text();
// Remove old messages, keep recent ones
messages.subList(0, splitPoint).clear();
}
@Override
public void clear() {
messages.clear();
conversationSummary = null;
}
}
}Problem: Need to support multiple languages with the same AI service.
Solution: Use dynamic template selection based on user locale.
import dev.langchain4j.service.UserMessage;
import dev.langchain4j.service.V;
@RegisterAiService
public interface MultilingualAssistant {
// Template selected at runtime based on locale parameter
String greet(@V("locale") String locale, @V("name") String name);
}
// Custom implementation that handles template selection
@ApplicationScoped
public class TemplateSelector {
private final Map<String, String> greetingTemplates = Map.of(
"en", "Hello {name}! How can I help you today?",
"es", "¡Hola {name}! ¿Cómo puedo ayudarte hoy?",
"fr", "Bonjour {name}! Comment puis-je vous aider aujourd'hui?",
"de", "Hallo {name}! Wie kann ich Ihnen heute helfen?"
);
public String getTemplate(String locale, String templateKey) {
return greetingTemplates.getOrDefault(locale, greetingTemplates.get("en"));
}
}
// Alternative: Use resource bundles
@RegisterAiService
public interface LocalizedAssistant {
@UserMessage(fromResource = "templates/greeting", delimiter = "\n")
String greet(String name);
}
// templates/greeting_en.properties
// greeting=Hello {name}! How can I help you today?
// templates/greeting_es.properties
// greeting=¡Hola {name}! ¿Cómo puedo ayudarte hoy?Problem: Guardrails should only execute under certain conditions.
Solution: Implement conditional logic within guardrails.
import io.quarkiverse.langchain4j.guardrails.*;
@ApplicationScoped
public class ConditionalAuthGuardrail implements ToolInputGuardrail {
@Inject
ConfigService config;
@Override
public ToolInputGuardrailResult validate(ToolInputGuardrailRequest request) {
// Skip authorization for internal calls
if (isInternalCall(request)) {
return ToolInputGuardrailResult.success();
}
// Skip authorization in development mode
if (config.isDevelopmentMode()) {
return ToolInputGuardrailResult.success();
}
// Perform full authorization check in production
return performAuthCheck(request);
}
private boolean isInternalCall(ToolInputGuardrailRequest request) {
return request.invocationContext()
.hasParameter("internal") &&
Boolean.TRUE.equals(request.invocationContext().parameter("internal"));
}
private ToolInputGuardrailResult performAuthCheck(ToolInputGuardrailRequest request) {
// Full authorization logic
Object memoryId = request.memoryId();
String resource = request.argumentsAsJson().getString("resourceId");
if (!authService.canAccess(memoryId.toString(), resource)) {
return ToolInputGuardrailResult.failure("Access denied");
}
return ToolInputGuardrailResult.success();
}
}Problem: Primary model fails or is unavailable; need automatic failover.
Solution: Implement fallback chain with different models.
import io.quarkiverse.langchain4j.ModelName;
import dev.langchain4j.model.chat.ChatModel;
import jakarta.enterprise.inject.Instance;
@ApplicationScoped
public class ResilientAssistant {
@Inject
Instance<ChatModel> models;
private final List<String> modelChain = List.of(
"gpt-4",
"gpt-3.5-turbo",
"claude-3-opus",
"local-llama"
);
public String chat(String message) {
List<Exception> failures = new ArrayList<>();
for (String modelName : modelChain) {
try {
ChatModel model = models.select(ModelName.Literal.of(modelName)).get();
Response<AiMessage> response = model.generate(message);
logger.info("Successfully used model: {}", modelName);
return response.content().text();
} catch (Exception e) {
logger.warn("Model {} failed: {}", modelName, e.getMessage());
failures.add(e);
}
}
// All models failed
throw new RuntimeException(
"All models failed. Errors: " +
failures.stream().map(Exception::getMessage).collect(Collectors.joining("; "))
);
}
}Problem: OAuth2 tokens expire and need automatic refresh.
Solution: Implement ModelAuthProvider with token refresh logic.
import io.quarkiverse.langchain4j.auth.ModelAuthProvider;
import io.quarkiverse.langchain4j.ModelName;
import jakarta.enterprise.context.ApplicationScoped;
import java.time.Instant;
import java.util.concurrent.locks.ReentrantLock;
@ApplicationScoped
@ModelName("enterprise-model")
public class RefreshableAuthProvider implements ModelAuthProvider {
@Inject
OAuth2TokenService tokenService;
private volatile TokenCache cache;
private final ReentrantLock refreshLock = new ReentrantLock();
@Override
public String getAuthorization(Input input) {
TokenCache current = cache;
// Check if token needs refresh (5 min buffer)
if (current == null || Instant.now().plusSeconds(300).isAfter(current.expiry)) {
refreshToken();
current = cache;
}
return "Bearer " + current.token;
}
private void refreshToken() {
refreshLock.lock();
try {
// Double-check after acquiring lock
if (cache == null || Instant.now().plusSeconds(300).isAfter(cache.expiry)) {
TokenResponse response = tokenService.refreshToken();
cache = new TokenCache(
response.accessToken(),
Instant.now().plusSeconds(response.expiresIn())
);
logger.info("Token refreshed, expires at {}", cache.expiry);
}
} catch (Exception e) {
logger.error("Failed to refresh token", e);
throw new RuntimeException("Authentication failed", e);
} finally {
refreshLock.unlock();
}
}
private record TokenCache(String token, Instant expiry) {}
}Problem: Streaming responses can fail mid-stream; need graceful recovery.
Solution: Implement error handling in reactive stream.
import io.smallrye.mutiny.Multi;
import io.smallrye.mutiny.Uni;
import java.time.Duration;
@RegisterAiService
public interface ResilientStreamingAssistant {
Multi<String> chatStreaming(String message);
}
@Path("/chat")
public class ChatResource {
@Inject
ResilientStreamingAssistant assistant;
@GET
@Path("/stream")
@Produces(MediaType.SERVER_SENT_EVENTS)
public Multi<String> stream(@QueryParam("message") String message) {
return assistant.chatStreaming(message)
// Retry on failure with exponential backoff
.onFailure().retry().withBackOff(Duration.ofSeconds(1)).atMost(3)
// Timeout individual chunks
.onItem().call(chunk -> Uni.createFrom().item(chunk)
.ifNoItem().after(Duration.ofSeconds(30))
.failWith(new TimeoutException("Chunk timeout")))
// Handle final failure gracefully
.onFailure().recoverWithItem(error ->
"\n\n[Error: Stream interrupted - " + error.getMessage() + "]")
// Log progress
.invoke(chunk -> logger.debug("Chunk: {}", chunk));
}
}Problem: In clustered deployments, memory needs to be shared across instances.
Solution: Use distributed cache (Redis, Hazelcast) for chat memory.
import dev.langchain4j.memory.ChatMemoryProvider;
import dev.langchain4j.memory.ChatMemory;
import dev.langchain4j.data.message.ChatMessage;
import io.quarkus.redis.datasource.RedisDataSource;
import io.quarkus.redis.datasource.value.ValueCommands;
@ApplicationScoped
public class DistributedMemoryProvider implements ChatMemoryProvider {
private final ValueCommands<String, List<ChatMessage>> redis;
private final Map<Object, ChatMemory> localCache = new ConcurrentHashMap<>();
public DistributedMemoryProvider(RedisDataSource redis) {
this.redis = redis.value(new TypeReference<List<ChatMessage>>() {});
}
@Override
public ChatMemory get(Object memoryId) {
return localCache.computeIfAbsent(memoryId,
id -> new RedisChatMemory(id, redis));
}
private static class RedisChatMemory implements ChatMemory {
private final String key;
private final ValueCommands<String, List<ChatMessage>> redis;
RedisChatMemory(Object memoryId, ValueCommands<String, List<ChatMessage>> redis) {
this.key = "chat:memory:" + memoryId;
this.redis = redis;
}
@Override
public void add(ChatMessage message) {
List<ChatMessage> messages = messages();
messages.add(message);
redis.set(key, messages);
}
@Override
public List<ChatMessage> messages() {
List<ChatMessage> messages = redis.get(key);
return messages != null ? new ArrayList<>(messages) : new ArrayList<>();
}
@Override
public void clear() {
redis.getdel(key);
}
}
}Problem: LLM provides parameters in wrong format; need transformation before tool execution.
Solution: Use input guardrails to transform parameters.
import io.quarkiverse.langchain4j.guardrails.*;
import io.vertx.core.json.JsonObject;
@ApplicationScoped
public class ParameterTransformGuardrail implements ToolInputGuardrail {
@Override
public ToolInputGuardrailResult validate(ToolInputGuardrailRequest request) {
JsonObject args = request.argumentsAsJson();
JsonObject transformed = new JsonObject();
// Transform date format
if (args.containsKey("date")) {
String date = args.getString("date");
transformed.put("date", normalizeDate(date));
}
// Normalize amount (remove currency symbols)
if (args.containsKey("amount")) {
String amount = args.getString("amount");
transformed.put("amount", normalizeAmount(amount));
}
// Convert yes/no to boolean
if (args.containsKey("confirm")) {
String confirm = args.getString("confirm").toLowerCase();
transformed.put("confirm", confirm.equals("yes") || confirm.equals("true"));
}
// Return modified request
ToolExecutionRequest modified = ToolExecutionRequest.builder()
.id(request.executionRequest().id())
.name(request.executionRequest().name())
.arguments(transformed.encode())
.build();
return ToolInputGuardrailResult.successWith(modified);
}
private String normalizeDate(String date) {
// Convert various formats to ISO-8601
// "12/25/2023", "Dec 25 2023", "25-12-2023" -> "2023-12-25"
return DateParser.parse(date).format(DateTimeFormatter.ISO_LOCAL_DATE);
}
private String normalizeAmount(String amount) {
// "$1,234.56", "1234.56 USD", "€1.234,56" -> "1234.56"
return amount.replaceAll("[^0-9.]", "");
}
}