CtrlK
CommunityDocumentationLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-autoconfigure-retry

Spring Boot auto-configuration for AI retry capabilities with exponential backoff and intelligent HTTP error handling

Overview
Eval results
Files

real-world-scenarios.mddocs/examples/

Real-World Scenarios

Practical examples of using Spring AI Retry Auto Configuration in real-world applications.

Scenario 1: OpenAI API Integration

Requirements

  • Retry on rate limits (429) and server errors (5xx)
  • No retry on authentication errors (401)
  • Respect rate limit windows (60s)
  • Graceful degradation on failure

Configuration

spring.ai.retry.max-attempts=10
spring.ai.retry.on-http-codes=429,503
spring.ai.retry.exclude-on-http-codes=401
spring.ai.retry.backoff.initial-interval=5s
spring.ai.retry.backoff.multiplier=2
spring.ai.retry.backoff.max-interval=60s

Implementation

import org.springframework.retry.support.RetryTemplate;
import org.springframework.web.client.RestTemplate;
import org.springframework.web.client.ResponseErrorHandler;
import org.springframework.stereotype.Service;
import org.springframework.ai.retry.TransientAiException;
import org.springframework.ai.retry.NonTransientAiException;

@Service
public class OpenAiService {
    
    private final RetryTemplate retryTemplate;
    private final RestTemplate restTemplate;
    private final String apiKey;
    
    public OpenAiService(RetryTemplate retryTemplate, 
                         RestTemplate openAiRestTemplate,
                         @Value("${openai.api.key}") String apiKey) {
        this.retryTemplate = retryTemplate;
        this.restTemplate = openAiRestTemplate;
        this.apiKey = apiKey;
    }
    
    public String complete(String prompt) {
        try {
            return retryTemplate.execute(
                context -> callOpenAi(prompt),
                context -> getFallbackResponse(prompt)
            );
        } catch (NonTransientAiException e) {
            if (e.getMessage().contains("401")) {
                throw new IllegalStateException("Invalid OpenAI API key", e);
            }
            throw e;
        }
    }
    
    private String callOpenAi(String prompt) {
        HttpHeaders headers = new HttpHeaders();
        headers.setBearerAuth(apiKey);
        headers.setContentType(MediaType.APPLICATION_JSON);
        
        Map<String, Object> request = Map.of(
            "model", "gpt-4",
            "messages", List.of(Map.of("role", "user", "content", prompt)),
            "max_tokens", 150
        );
        
        HttpEntity<Map<String, Object>> entity = new HttpEntity<>(request, headers);
        
        ResponseEntity<Map> response = restTemplate.postForEntity(
            "https://api.openai.com/v1/chat/completions",
            entity,
            Map.class
        );
        
        return extractCompletion(response.getBody());
    }
    
    private String getFallbackResponse(String prompt) {
        log.warn("Using fallback response for prompt: {}", prompt);
        return "Service temporarily unavailable. Please try again later.";
    }
    
    private String extractCompletion(Map<String, Object> response) {
        // Extract completion from response
        return (String) response.get("content");
    }
}

Scenario 2: Microservices Communication

Requirements

  • Quick retries for internal service calls
  • Short backoff for fast recovery
  • Circuit breaker pattern
  • Request correlation tracking

Configuration

spring.ai.retry.max-attempts=5
spring.ai.retry.backoff.initial-interval=500ms
spring.ai.retry.backoff.multiplier=3
spring.ai.retry.backoff.max-interval=10s

Implementation

import org.springframework.retry.support.RetryTemplate;
import org.springframework.web.client.RestTemplate;
import org.springframework.stereotype.Service;
import org.slf4j.MDC;

@Service
public class AiModelService {
    
    private final RetryTemplate retryTemplate;
    private final RestTemplate restTemplate;
    private final CircuitBreaker circuitBreaker;
    
    public AiModelService(RetryTemplate retryTemplate,
                          RestTemplate restTemplate,
                          CircuitBreakerFactory circuitBreakerFactory) {
        this.retryTemplate = retryTemplate;
        this.restTemplate = restTemplate;
        this.circuitBreaker = circuitBreakerFactory.create("ai-model-service");
    }
    
    public String classify(String text) {
        String correlationId = MDC.get("correlationId");
        
        return circuitBreaker.run(
            () -> retryTemplate.execute(context -> {
                // Preserve correlation ID across retries
                MDC.put("correlationId", correlationId);
                
                log.info("Calling AI model service, attempt {}", 
                         context.getRetryCount() + 1);
                
                return callModelService(text, correlationId);
            }),
            throwable -> {
                log.error("Circuit breaker fallback triggered", throwable);
                return "UNKNOWN";
            }
        );
    }
    
    private String callModelService(String text, String correlationId) {
        HttpHeaders headers = new HttpHeaders();
        headers.set("X-Correlation-ID", correlationId);
        
        HttpEntity<Map<String, String>> entity = new HttpEntity<>(
            Map.of("text", text),
            headers
        );
        
        ResponseEntity<Map> response = restTemplate.postForEntity(
            "http://ai-model-service/classify",
            entity,
            Map.class
        );
        
        return (String) response.getBody().get("classification");
    }
}

Scenario 3: Batch Processing with Retries

Requirements

  • Process large batches of items
  • Retry individual failures
  • Track success/failure rates
  • Continue processing on partial failures

Configuration

spring.ai.retry.max-attempts=3
spring.ai.retry.backoff.initial-interval=1s
spring.ai.retry.backoff.multiplier=2
spring.ai.retry.backoff.max-interval=5s

Implementation

import org.springframework.retry.support.RetryTemplate;
import org.springframework.stereotype.Service;
import org.springframework.ai.retry.TransientAiException;
import java.util.List;
import java.util.ArrayList;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;

@Service
public class BatchAiProcessor {
    
    private final RetryTemplate retryTemplate;
    private final AiClient aiClient;
    
    public BatchAiProcessor(RetryTemplate retryTemplate, AiClient aiClient) {
        this.retryTemplate = retryTemplate;
        this.aiClient = aiClient;
    }
    
    public BatchResult processBatch(List<String> items) {
        List<ItemResult> results = items.stream()
            .map(item -> processWithRetry(item))
            .collect(Collectors.toList());
        
        long successCount = results.stream().filter(ItemResult::isSuccess).count();
        long failureCount = results.size() - successCount;
        
        log.info("Batch complete: {} success, {} failures", 
                 successCount, failureCount);
        
        return new BatchResult(results, successCount, failureCount);
    }
    
    public BatchResult processBatchParallel(List<String> items) {
        List<CompletableFuture<ItemResult>> futures = items.stream()
            .map(item -> CompletableFuture.supplyAsync(() -> processWithRetry(item)))
            .collect(Collectors.toList());
        
        List<ItemResult> results = futures.stream()
            .map(CompletableFuture::join)
            .collect(Collectors.toList());
        
        long successCount = results.stream().filter(ItemResult::isSuccess).count();
        long failureCount = results.size() - successCount;
        
        return new BatchResult(results, successCount, failureCount);
    }
    
    private ItemResult processWithRetry(String item) {
        try {
            String result = retryTemplate.execute(context -> {
                log.debug("Processing item: {}, attempt {}", 
                          item, context.getRetryCount() + 1);
                return aiClient.process(item);
            });
            
            return ItemResult.success(item, result);
        } catch (TransientAiException e) {
            log.error("Failed to process item after retries: {}", item, e);
            return ItemResult.failure(item, "Transient error: " + e.getMessage());
        } catch (Exception e) {
            log.error("Failed to process item: {}", item, e);
            return ItemResult.failure(item, "Error: " + e.getMessage());
        }
    }
}

record BatchResult(List<ItemResult> results, long successCount, long failureCount) {}
record ItemResult(String item, String result, boolean success, String error) {
    static ItemResult success(String item, String result) {
        return new ItemResult(item, result, true, null);
    }
    static ItemResult failure(String item, String error) {
        return new ItemResult(item, null, false, error);
    }
}

Scenario 4: Rate-Limited Public API

Requirements

  • Respect aggressive rate limits
  • Implement token bucket pattern
  • Exponential backoff on 429
  • Monitor rate limit headers

Configuration

spring.ai.retry.max-attempts=10
spring.ai.retry.on-http-codes=429,503
spring.ai.retry.backoff.initial-interval=10s
spring.ai.retry.backoff.multiplier=2
spring.ai.retry.backoff.max-interval=120s

Implementation

import org.springframework.retry.support.RetryTemplate;
import org.springframework.web.client.RestTemplate;
import org.springframework.stereotype.Service;
import io.github.bucket4j.Bucket;
import io.github.bucket4j.Bandwidth;
import io.github.bucket4j.Refill;
import java.time.Duration;

@Service
public class RateLimitedAiService {
    
    private final RetryTemplate retryTemplate;
    private final RestTemplate restTemplate;
    private final Bucket tokenBucket;
    
    public RateLimitedAiService(RetryTemplate retryTemplate,
                                RestTemplate restTemplate) {
        this.retryTemplate = retryTemplate;
        this.restTemplate = restTemplate;
        
        // Token bucket: 10 requests per minute
        Bandwidth limit = Bandwidth.classic(10, Refill.intervally(10, Duration.ofMinutes(1)));
        this.tokenBucket = Bucket.builder()
            .addLimit(limit)
            .build();
    }
    
    public String analyze(String content) {
        // Wait for token availability
        tokenBucket.asBlocking().consume(1);
        
        return retryTemplate.execute(context -> {
            try {
                return callApi(content);
            } catch (RateLimitException e) {
                // Extract retry-after from headers
                int retryAfter = e.getRetryAfterSeconds();
                log.warn("Rate limited. Retry after {} seconds", retryAfter);
                
                // Update token bucket based on rate limit response
                updateTokenBucket(retryAfter);
                
                throw new TransientAiException(
                    "Rate limited. Retry after " + retryAfter + "s",
                    e
                );
            }
        });
    }
    
    private String callApi(String content) {
        HttpEntity<String> entity = new HttpEntity<>(content);
        
        ResponseEntity<String> response = restTemplate.postForEntity(
            "https://api.example.com/analyze",
            entity,
            String.class
        );
        
        // Check rate limit headers
        HttpHeaders headers = response.getHeaders();
        String remaining = headers.getFirst("X-RateLimit-Remaining");
        String reset = headers.getFirst("X-RateLimit-Reset");
        
        log.debug("Rate limit: {} remaining, resets at {}", remaining, reset);
        
        if ("0".equals(remaining)) {
            log.warn("Rate limit exhausted. Next reset: {}", reset);
        }
        
        return response.getBody();
    }
    
    private void updateTokenBucket(int retryAfterSeconds) {
        // Implement token bucket adjustment based on API feedback
        // This ensures we respect the API's rate limit signals
    }
}

Scenario 5: Multi-Provider Fallback

Requirements

  • Try primary AI provider first
  • Fallback to secondary on failure
  • Different retry strategies per provider
  • Track provider reliability

Implementation

import org.springframework.retry.support.RetryTemplate;
import org.springframework.stereotype.Service;
import org.springframework.ai.retry.TransientAiException;

@Service
public class MultiProviderAiService {
    
    private final RetryTemplate primaryRetryTemplate;
    private final RetryTemplate fallbackRetryTemplate;
    private final AiClient primaryClient;
    private final AiClient fallbackClient;
    private final MeterRegistry meterRegistry;
    
    public MultiProviderAiService(
            RetryTemplate primaryRetryTemplate,
            RetryTemplate fallbackRetryTemplate,
            @Qualifier("openai") AiClient primaryClient,
            @Qualifier("anthropic") AiClient fallbackClient,
            MeterRegistry meterRegistry) {
        this.primaryRetryTemplate = primaryRetryTemplate;
        this.fallbackRetryTemplate = fallbackRetryTemplate;
        this.primaryClient = primaryClient;
        this.fallbackClient = fallbackClient;
        this.meterRegistry = meterRegistry;
    }
    
    public String complete(String prompt) {
        // Try primary provider
        try {
            String result = primaryRetryTemplate.execute(context -> {
                log.debug("Calling primary provider, attempt {}", 
                          context.getRetryCount() + 1);
                return primaryClient.complete(prompt);
            });
            
            meterRegistry.counter("ai.provider.success", "provider", "primary").increment();
            return result;
        } catch (TransientAiException e) {
            log.warn("Primary provider failed after retries: {}", e.getMessage());
            meterRegistry.counter("ai.provider.failure", "provider", "primary").increment();
        }
        
        // Fallback to secondary provider
        try {
            log.info("Attempting fallback provider");
            String result = fallbackRetryTemplate.execute(context -> {
                log.debug("Calling fallback provider, attempt {}", 
                          context.getRetryCount() + 1);
                return fallbackClient.complete(prompt);
            });
            
            meterRegistry.counter("ai.provider.success", "provider", "fallback").increment();
            return result;
        } catch (TransientAiException e) {
            log.error("Fallback provider also failed: {}", e.getMessage());
            meterRegistry.counter("ai.provider.failure", "provider", "fallback").increment();
            throw new ServiceUnavailableException("All AI providers unavailable", e);
        }
    }
}

@Configuration
class MultiProviderRetryConfig {
    
    @Bean
    @Qualifier("primary")
    public RetryTemplate primaryRetryTemplate() {
        return RetryTemplate.builder()
            .maxAttempts(3)
            .exponentialBackoff(1000, 2, 10000)
            .retryOn(TransientAiException.class)
            .build();
    }
    
    @Bean
    @Qualifier("fallback")
    public RetryTemplate fallbackRetryTemplate() {
        return RetryTemplate.builder()
            .maxAttempts(5)
            .exponentialBackoff(2000, 3, 30000)
            .retryOn(TransientAiException.class)
            .build();
    }
}

Scenario 6: Streaming with Retry

Requirements

  • Retry on connection failures
  • Handle partial stream consumption
  • Resume from checkpoint
  • Timeout handling

Implementation

import org.springframework.retry.support.RetryTemplate;
import org.springframework.web.client.RestTemplate;
import org.springframework.stereotype.Service;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.concurrent.TimeUnit;

@Service
public class StreamingAiService {
    
    private final RetryTemplate retryTemplate;
    private final RestTemplate restTemplate;
    
    public StreamingAiService(RetryTemplate retryTemplate, 
                             RestTemplate restTemplate) {
        this.retryTemplate = retryTemplate;
        this.restTemplate = restTemplate;
    }
    
    public void streamCompletion(String prompt, StreamConsumer consumer) {
        retryTemplate.execute(context -> {
            log.info("Starting stream, attempt {}", context.getRetryCount() + 1);
            
            try {
                streamWithTimeout(prompt, consumer, 30, TimeUnit.SECONDS);
                return null;
            } catch (IOException e) {
                log.warn("Stream interrupted: {}", e.getMessage());
                throw new TransientAiException("Stream error", e);
            } catch (TimeoutException e) {
                log.warn("Stream timeout");
                throw new TransientAiException("Stream timeout", e);
            }
        });
    }
    
    private void streamWithTimeout(String prompt, 
                                   StreamConsumer consumer,
                                   long timeout,
                                   TimeUnit unit) throws IOException, TimeoutException {
        
        HttpEntity<String> entity = new HttpEntity<>(prompt);
        
        ResponseEntity<Resource> response = restTemplate.postForEntity(
            "https://api.example.com/stream",
            entity,
            Resource.class
        );
        
        try (InputStream is = response.getBody().getInputStream();
             BufferedReader reader = new BufferedReader(new InputStreamReader(is))) {
            
            long startTime = System.currentTimeMillis();
            String line;
            
            while ((line = reader.readLine()) != null) {
                // Check timeout
                long elapsed = System.currentTimeMillis() - startTime;
                if (elapsed > unit.toMillis(timeout)) {
                    throw new TimeoutException("Stream timeout after " + elapsed + "ms");
                }
                
                // Process line
                consumer.accept(line);
            }
        }
    }
}

@FunctionalInterface
interface StreamConsumer {
    void accept(String chunk);
}

Scenario 7: Caching with Retry

Requirements

  • Cache successful responses
  • Serve from cache on transient failures
  • TTL-based cache invalidation
  • Cache warming on startup

Implementation

import org.springframework.retry.support.RetryTemplate;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.cache.annotation.CacheEvict;
import org.springframework.stereotype.Service;
import org.springframework.ai.retry.TransientAiException;
import org.springframework.ai.retry.NonTransientAiException;

@Service
public class CachedAiService {
    
    private final RetryTemplate retryTemplate;
    private final AiClient aiClient;
    private final Cache cache;
    
    public CachedAiService(RetryTemplate retryTemplate,
                          AiClient aiClient,
                          CacheManager cacheManager) {
        this.retryTemplate = retryTemplate;
        this.aiClient = aiClient;
        this.cache = cacheManager.getCache("ai-responses");
    }
    
    public String complete(String prompt) {
        // Check cache first
        Cache.ValueWrapper cached = cache.get(prompt);
        
        try {
            // Attempt fresh call with retry
            String result = retryTemplate.execute(context -> 
                aiClient.complete(prompt)
            );
            
            // Update cache
            cache.put(prompt, result);
            return result;
            
        } catch (TransientAiException e) {
            // Serve from cache on transient failure
            if (cached != null) {
                log.warn("Using cached response due to transient error: {}", 
                         e.getMessage());
                return (String) cached.get();
            }
            throw e;
            
        } catch (NonTransientAiException e) {
            // Don't cache non-transient errors
            log.error("Non-transient error, not using cache: {}", e.getMessage());
            throw e;
        }
    }
    
    @Cacheable(value = "ai-responses", key = "#prompt")
    public String completeWithSpringCache(String prompt) {
        return retryTemplate.execute(context -> aiClient.complete(prompt));
    }
    
    @CacheEvict(value = "ai-responses", key = "#prompt")
    public void invalidateCache(String prompt) {
        log.info("Cache invalidated for prompt: {}", prompt);
    }
    
    @CacheEvict(value = "ai-responses", allEntries = true)
    public void clearCache() {
        log.info("All cache entries cleared");
    }
}

Common Patterns

Pattern: Idempotency Token

public String withIdempotencyToken(String prompt) {
    String idempotencyKey = UUID.randomUUID().toString();
    
    return retryTemplate.execute(context -> {
        HttpHeaders headers = new HttpHeaders();
        headers.set("Idempotency-Key", idempotencyKey);
        
        HttpEntity<String> entity = new HttpEntity<>(prompt, headers);
        return restTemplate.postForObject(url, entity, String.class);
    });
}

Pattern: Metrics and Monitoring

public String withMetrics(String prompt) {
    Timer.Sample sample = Timer.start(meterRegistry);
    
    try {
        String result = retryTemplate.execute(context -> {
            meterRegistry.counter("ai.retry.attempt").increment();
            return aiClient.complete(prompt);
        });
        
        sample.stop(meterRegistry.timer("ai.call.duration", "status", "success"));
        meterRegistry.counter("ai.call.total", "status", "success").increment();
        return result;
        
    } catch (Exception e) {
        sample.stop(meterRegistry.timer("ai.call.duration", "status", "failure"));
        meterRegistry.counter("ai.call.total", "status", "failure").increment();
        throw e;
    }
}

Pattern: Request Deduplication

private final Map<String, CompletableFuture<String>> inFlightRequests = new ConcurrentHashMap<>();

public CompletableFuture<String> deduplicated(String prompt) {
    return inFlightRequests.computeIfAbsent(prompt, key ->
        CompletableFuture.supplyAsync(() -> {
            try {
                return retryTemplate.execute(context -> aiClient.complete(key));
            } finally {
                inFlightRequests.remove(key);
            }
        })
    );
}

Next Steps

  • Explore edge cases and advanced scenarios
  • Review configuration guide
  • Check API reference
tessl i tessl/maven-org-springframework-ai--spring-ai-autoconfigure-retry@1.1.1

docs

examples

edge-cases.md

real-world-scenarios.md

index.md

tile.json