CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-client-chat

Spring AI Chat Client provides a fluent API for building AI-powered applications with LLMs, supporting advisors, streaming, structured outputs, and conversation memory

Overview
Eval results
Files

edge-cases.mddocs/examples/

Edge Cases and Advanced Scenarios

Advanced usage patterns, edge cases, and troubleshooting.

Handling Empty Responses

String content = chatClient.prompt("Query").call().content();

if (content == null || content.isBlank()) {
    // Handle empty response
    log.warn("Received empty response");
    content = "No response available";
}

// Or use Optional
String safe = Optional.ofNullable(content)
    .filter(s -> !s.isBlank())
    .orElse("No response");

Streaming Cancellation

Flux<String> stream = chatClient
    .prompt("Long story")
    .stream()
    .content();

Disposable subscription = stream
    .doOnCancel(() -> log.info("Stream cancelled"))
    .subscribe(System.out::print);

// Cancel after 5 seconds
Thread.sleep(5000);
subscription.dispose();

Streaming Timeout

Flux<String> stream = chatClient
    .prompt("Generate report")
    .stream()
    .content();

stream
    .timeout(Duration.ofSeconds(30))
    .onErrorResume(TimeoutException.class, e -> {
        log.error("Stream timed out");
        return Flux.just("[Timeout - partial response]");
    })
    .subscribe(System.out::print);

Concurrent Requests

// ChatClient is thread-safe
private final ChatClient chatClient = ChatClient.create(chatModel);

// Safe to call from multiple threads
ExecutorService executor = Executors.newFixedThreadPool(10);

List<CompletableFuture<String>> futures = queries.stream()
    .map(query -> CompletableFuture.supplyAsync(
        () -> chatClient.prompt(query).call().content(),
        executor
    ))
    .collect(Collectors.toList());

// Wait for all
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();

Memory with Expiration

public class ExpiringChatMemory implements ChatMemory {
    private final Map<String, TimestampedMessages> conversations = 
        new ConcurrentHashMap<>();
    private final Duration expiration;
    
    public ExpiringChatMemory(Duration expiration) {
        this.expiration = expiration;
    }
    
    @Override
    public void add(String conversationId, List<Message> messages) {
        conversations.put(conversationId, 
            new TimestampedMessages(messages, Instant.now())
        );
    }
    
    @Override
    public List<Message> get(String conversationId, int lastN) {
        TimestampedMessages entry = conversations.get(conversationId);
        if (entry == null) return Collections.emptyList();
        
        // Check expiration
        if (Duration.between(entry.timestamp, Instant.now())
                .compareTo(expiration) > 0) {
            conversations.remove(conversationId);
            return Collections.emptyList();
        }
        
        return entry.messages;
    }
    
    @Override
    public void clear(String conversationId) {
        conversations.remove(conversationId);
    }
    
    private record TimestampedMessages(List<Message> messages, Instant timestamp) {}
}

// Usage
ChatMemory memory = new ExpiringChatMemory(Duration.ofHours(1));

Rate Limiting

public class RateLimitedChatService {
    private final ChatClient chatClient;
    private final RateLimiter rateLimiter;

    public RateLimitedChatService(ChatModel chatModel) {
        this.rateLimiter = RateLimiter.create(10.0); // 10 requests/second
        
        this.chatClient = ChatClient.builder(chatModel)
            .defaultAdvisors(new RateLimitAdvisor(rateLimiter))
            .build();
    }

    public String chat(String message) {
        if (!rateLimiter.tryAcquire(Duration.ofSeconds(1))) {
            throw new RateLimitException("Rate limit exceeded");
        }
        
        return chatClient.prompt(message).call().content();
    }
}

class RateLimitAdvisor implements BaseAdvisor {
    private final RateLimiter rateLimiter;
    
    @Override
    public ChatClientRequest before(ChatClientRequest request, AdvisorChain chain) {
        if (!rateLimiter.tryAcquire()) {
            throw new RateLimitException("Too many requests");
        }
        return request;
    }
    
    // ... other methods
}

Circuit Breaker Pattern

public class CircuitBreakerChatService {
    private final ChatClient chatClient;
    private final CircuitBreaker circuitBreaker;

    public CircuitBreakerChatService(ChatModel chatModel) {
        this.chatClient = ChatClient.create(chatModel);
        
        this.circuitBreaker = CircuitBreaker.of("chatService",
            CircuitBreakerConfig.custom()
                .failureRateThreshold(50)
                .waitDurationInOpenState(Duration.ofSeconds(30))
                .build()
        );
    }

    public String chat(String message) {
        return circuitBreaker.executeSupplier(() ->
            chatClient.prompt(message).call().content()
        );
    }
}

Fallback Chains

public String chatWithFallback(String query) {
    // Try primary model
    try {
        return primaryClient.prompt(query).call().content();
    } catch (Exception e) {
        log.warn("Primary model failed, trying secondary", e);
        
        // Try secondary model
        try {
            return secondaryClient.prompt(query).call().content();
        } catch (Exception e2) {
            log.error("Secondary model failed, using cached response", e2);
            
            // Try cache
            return cacheService.get(query)
                .orElse("Service temporarily unavailable");
        }
    }
}

Large Context Handling

public String handleLargeContext(String query, String largeContext) {
    // Check token count
    int estimatedTokens = estimateTokens(largeContext);
    
    if (estimatedTokens > 100000) {
        // Split context into chunks
        List<String> chunks = splitIntoChunks(largeContext, 50000);
        
        // Process each chunk
        List<String> chunkSummaries = chunks.stream()
            .map(chunk -> chatClient
                .prompt("Summarize: " + chunk)
                .call()
                .content()
            )
            .collect(Collectors.toList());
        
        // Final query with summaries
        return chatClient
            .prompt()
            .user(spec -> spec
                .text("Answer {query} based on: {summaries}")
                .param("query", query)
                .param("summaries", String.join("\n", chunkSummaries))
            )
            .call()
            .content();
    }
    
    // Context fits, use directly
    return chatClient
        .prompt()
        .user(spec -> spec
            .text("Answer {query} based on: {context}")
            .param("query", query)
            .param("context", largeContext)
        )
        .call()
        .content();
}

Multimodal with Multiple Images

public String analyzeImages(List<byte[]> images, String question) {
    List<Media> mediaList = images.stream()
        .map(bytes -> new Media(
            MimeTypeUtils.IMAGE_PNG,
            new ByteArrayResource(bytes)
        ))
        .collect(Collectors.toList());
    
    return chatClient
        .prompt()
        .user(spec -> {
            spec.text(question);
            mediaList.forEach(spec::media);
            return spec;
        })
        .call()
        .content();
}

Streaming with Aggregation

public String streamAndAggregate(String query) {
    Flux<String> stream = chatClient
        .prompt(query)
        .stream()
        .content();
    
    // Collect all chunks
    Mono<String> complete = stream
        .collect(StringBuilder::new, StringBuilder::append)
        .map(StringBuilder::toString);
    
    // Block and return complete response
    return complete.block(Duration.ofMinutes(2));
}

Conditional Advisor Execution

public String chatWithConditionalFeatures(
    String query,
    boolean enableMemory,
    boolean enableLogging
) {
    List<Advisor> advisors = new ArrayList<>();
    
    if (enableMemory) {
        advisors.add(MessageChatMemoryAdvisor.builder(chatMemory).build());
    }
    
    if (enableLogging) {
        advisors.add(SimpleLoggerAdvisor.builder().build());
    }
    
    var request = chatClient.prompt().user(query);
    
    if (!advisors.isEmpty()) {
        request.advisors(advisors.toArray(new Advisor[0]));
    }
    
    return request.call().content();
}

Custom Error Recovery

class ErrorRecoveryAdvisor implements CallAdvisor {
    private final int maxRetries = 3;
    
    @Override
    public ChatClientResponse adviseCall(
        ChatClientRequest request,
        CallAdvisorChain chain
    ) {
        for (int i = 0; i < maxRetries; i++) {
            try {
                return chain.nextCall(request);
            } catch (Exception e) {
                if (i == maxRetries - 1) throw e;
                
                log.warn("Attempt {} failed, retrying", i + 1);
                Thread.sleep(1000 * (i + 1));  // Exponential backoff
            }
        }
        throw new IllegalStateException("Should not reach here");
    }
    
    // ... other methods
}

Nested Tool Calls

// Tool that calls another tool
public SearchResult searchAndSummarize(SearchRequest request) {
    // First tool call
    List<String> docs = vectorStore.search(request.query());
    
    // Second tool call via ChatClient
    String summary = chatClient
        .prompt("Summarize these documents: " + String.join("\n", docs))
        .call()
        .content();
    
    return new SearchResult(docs, summary);
}

Handling Model Limits

public String handleTokenLimits(String query, String context) {
    try {
        return chatClient
            .prompt()
            .user(spec -> spec
                .text("Query: {query}\nContext: {context}")
                .param("query", query)
                .param("context", context)
            )
            .call()
            .content();
            
    } catch (TokenLimitExceededException e) {
        // Truncate context and retry
        String truncated = truncateToTokenLimit(context, 50000);
        
        return chatClient
            .prompt()
            .user(spec -> spec
                .text("Query: {query}\nContext (truncated): {context}")
                .param("query", query)
                .param("context", truncated)
            )
            .call()
            .content();
    }
}

Thread-Safe State in Advisors

class StatefulAdvisor implements BaseAdvisor {
    // GOOD: Thread-safe
    private final AtomicLong requestCount = new AtomicLong();
    private final ConcurrentHashMap<String, Object> cache = new ConcurrentHashMap<>();
    
    // BAD: Not thread-safe
    // private long requestCount = 0;
    // private Map<String, Object> cache = new HashMap<>();
    
    @Override
    public ChatClientRequest before(ChatClientRequest request, AdvisorChain chain) {
        long count = requestCount.incrementAndGet();
        request.context().put("requestNumber", count);
        return request;
    }
    
    // ... other methods
}

Next Steps

  • Reference Documentation - Complete API details
  • Real-World Scenarios - More production patterns

Install with Tessl CLI

npx tessl i tessl/maven-org-springframework-ai--spring-ai-client-chat

docs

examples

edge-cases.md

real-world-scenarios.md

index.md

tile.json