Spring AI Chat Client provides a fluent API for building AI-powered applications with LLMs, supporting advisors, streaming, structured outputs, and conversation memory
Advanced usage patterns, edge cases, and troubleshooting.
String content = chatClient.prompt("Query").call().content();
if (content == null || content.isBlank()) {
// Handle empty response
log.warn("Received empty response");
content = "No response available";
}
// Or use Optional
String safe = Optional.ofNullable(content)
.filter(s -> !s.isBlank())
.orElse("No response");Flux<String> stream = chatClient
.prompt("Long story")
.stream()
.content();
Disposable subscription = stream
.doOnCancel(() -> log.info("Stream cancelled"))
.subscribe(System.out::print);
// Cancel after 5 seconds
Thread.sleep(5000);
subscription.dispose();Flux<String> stream = chatClient
.prompt("Generate report")
.stream()
.content();
stream
.timeout(Duration.ofSeconds(30))
.onErrorResume(TimeoutException.class, e -> {
log.error("Stream timed out");
return Flux.just("[Timeout - partial response]");
})
.subscribe(System.out::print);// ChatClient is thread-safe
private final ChatClient chatClient = ChatClient.create(chatModel);
// Safe to call from multiple threads
ExecutorService executor = Executors.newFixedThreadPool(10);
List<CompletableFuture<String>> futures = queries.stream()
.map(query -> CompletableFuture.supplyAsync(
() -> chatClient.prompt(query).call().content(),
executor
))
.collect(Collectors.toList());
// Wait for all
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();public class ExpiringChatMemory implements ChatMemory {
private final Map<String, TimestampedMessages> conversations =
new ConcurrentHashMap<>();
private final Duration expiration;
public ExpiringChatMemory(Duration expiration) {
this.expiration = expiration;
}
@Override
public void add(String conversationId, List<Message> messages) {
conversations.put(conversationId,
new TimestampedMessages(messages, Instant.now())
);
}
@Override
public List<Message> get(String conversationId, int lastN) {
TimestampedMessages entry = conversations.get(conversationId);
if (entry == null) return Collections.emptyList();
// Check expiration
if (Duration.between(entry.timestamp, Instant.now())
.compareTo(expiration) > 0) {
conversations.remove(conversationId);
return Collections.emptyList();
}
return entry.messages;
}
@Override
public void clear(String conversationId) {
conversations.remove(conversationId);
}
private record TimestampedMessages(List<Message> messages, Instant timestamp) {}
}
// Usage
ChatMemory memory = new ExpiringChatMemory(Duration.ofHours(1));public class RateLimitedChatService {
private final ChatClient chatClient;
private final RateLimiter rateLimiter;
public RateLimitedChatService(ChatModel chatModel) {
this.rateLimiter = RateLimiter.create(10.0); // 10 requests/second
this.chatClient = ChatClient.builder(chatModel)
.defaultAdvisors(new RateLimitAdvisor(rateLimiter))
.build();
}
public String chat(String message) {
if (!rateLimiter.tryAcquire(Duration.ofSeconds(1))) {
throw new RateLimitException("Rate limit exceeded");
}
return chatClient.prompt(message).call().content();
}
}
class RateLimitAdvisor implements BaseAdvisor {
private final RateLimiter rateLimiter;
@Override
public ChatClientRequest before(ChatClientRequest request, AdvisorChain chain) {
if (!rateLimiter.tryAcquire()) {
throw new RateLimitException("Too many requests");
}
return request;
}
// ... other methods
}public class CircuitBreakerChatService {
private final ChatClient chatClient;
private final CircuitBreaker circuitBreaker;
public CircuitBreakerChatService(ChatModel chatModel) {
this.chatClient = ChatClient.create(chatModel);
this.circuitBreaker = CircuitBreaker.of("chatService",
CircuitBreakerConfig.custom()
.failureRateThreshold(50)
.waitDurationInOpenState(Duration.ofSeconds(30))
.build()
);
}
public String chat(String message) {
return circuitBreaker.executeSupplier(() ->
chatClient.prompt(message).call().content()
);
}
}public String chatWithFallback(String query) {
// Try primary model
try {
return primaryClient.prompt(query).call().content();
} catch (Exception e) {
log.warn("Primary model failed, trying secondary", e);
// Try secondary model
try {
return secondaryClient.prompt(query).call().content();
} catch (Exception e2) {
log.error("Secondary model failed, using cached response", e2);
// Try cache
return cacheService.get(query)
.orElse("Service temporarily unavailable");
}
}
}public String handleLargeContext(String query, String largeContext) {
// Check token count
int estimatedTokens = estimateTokens(largeContext);
if (estimatedTokens > 100000) {
// Split context into chunks
List<String> chunks = splitIntoChunks(largeContext, 50000);
// Process each chunk
List<String> chunkSummaries = chunks.stream()
.map(chunk -> chatClient
.prompt("Summarize: " + chunk)
.call()
.content()
)
.collect(Collectors.toList());
// Final query with summaries
return chatClient
.prompt()
.user(spec -> spec
.text("Answer {query} based on: {summaries}")
.param("query", query)
.param("summaries", String.join("\n", chunkSummaries))
)
.call()
.content();
}
// Context fits, use directly
return chatClient
.prompt()
.user(spec -> spec
.text("Answer {query} based on: {context}")
.param("query", query)
.param("context", largeContext)
)
.call()
.content();
}public String analyzeImages(List<byte[]> images, String question) {
List<Media> mediaList = images.stream()
.map(bytes -> new Media(
MimeTypeUtils.IMAGE_PNG,
new ByteArrayResource(bytes)
))
.collect(Collectors.toList());
return chatClient
.prompt()
.user(spec -> {
spec.text(question);
mediaList.forEach(spec::media);
return spec;
})
.call()
.content();
}public String streamAndAggregate(String query) {
Flux<String> stream = chatClient
.prompt(query)
.stream()
.content();
// Collect all chunks
Mono<String> complete = stream
.collect(StringBuilder::new, StringBuilder::append)
.map(StringBuilder::toString);
// Block and return complete response
return complete.block(Duration.ofMinutes(2));
}public String chatWithConditionalFeatures(
String query,
boolean enableMemory,
boolean enableLogging
) {
List<Advisor> advisors = new ArrayList<>();
if (enableMemory) {
advisors.add(MessageChatMemoryAdvisor.builder(chatMemory).build());
}
if (enableLogging) {
advisors.add(SimpleLoggerAdvisor.builder().build());
}
var request = chatClient.prompt().user(query);
if (!advisors.isEmpty()) {
request.advisors(advisors.toArray(new Advisor[0]));
}
return request.call().content();
}class ErrorRecoveryAdvisor implements CallAdvisor {
private final int maxRetries = 3;
@Override
public ChatClientResponse adviseCall(
ChatClientRequest request,
CallAdvisorChain chain
) {
for (int i = 0; i < maxRetries; i++) {
try {
return chain.nextCall(request);
} catch (Exception e) {
if (i == maxRetries - 1) throw e;
log.warn("Attempt {} failed, retrying", i + 1);
Thread.sleep(1000 * (i + 1)); // Exponential backoff
}
}
throw new IllegalStateException("Should not reach here");
}
// ... other methods
}// Tool that calls another tool
public SearchResult searchAndSummarize(SearchRequest request) {
// First tool call
List<String> docs = vectorStore.search(request.query());
// Second tool call via ChatClient
String summary = chatClient
.prompt("Summarize these documents: " + String.join("\n", docs))
.call()
.content();
return new SearchResult(docs, summary);
}public String handleTokenLimits(String query, String context) {
try {
return chatClient
.prompt()
.user(spec -> spec
.text("Query: {query}\nContext: {context}")
.param("query", query)
.param("context", context)
)
.call()
.content();
} catch (TokenLimitExceededException e) {
// Truncate context and retry
String truncated = truncateToTokenLimit(context, 50000);
return chatClient
.prompt()
.user(spec -> spec
.text("Query: {query}\nContext (truncated): {context}")
.param("query", query)
.param("context", truncated)
)
.call()
.content();
}
}class StatefulAdvisor implements BaseAdvisor {
// GOOD: Thread-safe
private final AtomicLong requestCount = new AtomicLong();
private final ConcurrentHashMap<String, Object> cache = new ConcurrentHashMap<>();
// BAD: Not thread-safe
// private long requestCount = 0;
// private Map<String, Object> cache = new HashMap<>();
@Override
public ChatClientRequest before(ChatClientRequest request, AdvisorChain chain) {
long count = requestCount.incrementAndGet();
request.context().put("requestNumber", count);
return request;
}
// ... other methods
}