Spring Boot auto-configuration for AI retry capabilities with exponential backoff and intelligent HTTP error handling
Practical examples of using Spring AI Retry Auto Configuration in real-world applications.
spring.ai.retry.max-attempts=10
spring.ai.retry.on-http-codes=429,503
spring.ai.retry.exclude-on-http-codes=401
spring.ai.retry.backoff.initial-interval=5s
spring.ai.retry.backoff.multiplier=2
spring.ai.retry.backoff.max-interval=60simport org.springframework.retry.support.RetryTemplate;
import org.springframework.web.client.RestTemplate;
import org.springframework.web.client.ResponseErrorHandler;
import org.springframework.stereotype.Service;
import org.springframework.ai.retry.TransientAiException;
import org.springframework.ai.retry.NonTransientAiException;
@Service
public class OpenAiService {
private final RetryTemplate retryTemplate;
private final RestTemplate restTemplate;
private final String apiKey;
public OpenAiService(RetryTemplate retryTemplate,
RestTemplate openAiRestTemplate,
@Value("${openai.api.key}") String apiKey) {
this.retryTemplate = retryTemplate;
this.restTemplate = openAiRestTemplate;
this.apiKey = apiKey;
}
public String complete(String prompt) {
try {
return retryTemplate.execute(
context -> callOpenAi(prompt),
context -> getFallbackResponse(prompt)
);
} catch (NonTransientAiException e) {
if (e.getMessage().contains("401")) {
throw new IllegalStateException("Invalid OpenAI API key", e);
}
throw e;
}
}
private String callOpenAi(String prompt) {
HttpHeaders headers = new HttpHeaders();
headers.setBearerAuth(apiKey);
headers.setContentType(MediaType.APPLICATION_JSON);
Map<String, Object> request = Map.of(
"model", "gpt-4",
"messages", List.of(Map.of("role", "user", "content", prompt)),
"max_tokens", 150
);
HttpEntity<Map<String, Object>> entity = new HttpEntity<>(request, headers);
ResponseEntity<Map> response = restTemplate.postForEntity(
"https://api.openai.com/v1/chat/completions",
entity,
Map.class
);
return extractCompletion(response.getBody());
}
private String getFallbackResponse(String prompt) {
log.warn("Using fallback response for prompt: {}", prompt);
return "Service temporarily unavailable. Please try again later.";
}
private String extractCompletion(Map<String, Object> response) {
// Extract completion from response
return (String) response.get("content");
}
}spring.ai.retry.max-attempts=5
spring.ai.retry.backoff.initial-interval=500ms
spring.ai.retry.backoff.multiplier=3
spring.ai.retry.backoff.max-interval=10simport org.springframework.retry.support.RetryTemplate;
import org.springframework.web.client.RestTemplate;
import org.springframework.stereotype.Service;
import org.slf4j.MDC;
@Service
public class AiModelService {
private final RetryTemplate retryTemplate;
private final RestTemplate restTemplate;
private final CircuitBreaker circuitBreaker;
public AiModelService(RetryTemplate retryTemplate,
RestTemplate restTemplate,
CircuitBreakerFactory circuitBreakerFactory) {
this.retryTemplate = retryTemplate;
this.restTemplate = restTemplate;
this.circuitBreaker = circuitBreakerFactory.create("ai-model-service");
}
public String classify(String text) {
String correlationId = MDC.get("correlationId");
return circuitBreaker.run(
() -> retryTemplate.execute(context -> {
// Preserve correlation ID across retries
MDC.put("correlationId", correlationId);
log.info("Calling AI model service, attempt {}",
context.getRetryCount() + 1);
return callModelService(text, correlationId);
}),
throwable -> {
log.error("Circuit breaker fallback triggered", throwable);
return "UNKNOWN";
}
);
}
private String callModelService(String text, String correlationId) {
HttpHeaders headers = new HttpHeaders();
headers.set("X-Correlation-ID", correlationId);
HttpEntity<Map<String, String>> entity = new HttpEntity<>(
Map.of("text", text),
headers
);
ResponseEntity<Map> response = restTemplate.postForEntity(
"http://ai-model-service/classify",
entity,
Map.class
);
return (String) response.getBody().get("classification");
}
}spring.ai.retry.max-attempts=3
spring.ai.retry.backoff.initial-interval=1s
spring.ai.retry.backoff.multiplier=2
spring.ai.retry.backoff.max-interval=5simport org.springframework.retry.support.RetryTemplate;
import org.springframework.stereotype.Service;
import org.springframework.ai.retry.TransientAiException;
import java.util.List;
import java.util.ArrayList;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
@Service
public class BatchAiProcessor {
private final RetryTemplate retryTemplate;
private final AiClient aiClient;
public BatchAiProcessor(RetryTemplate retryTemplate, AiClient aiClient) {
this.retryTemplate = retryTemplate;
this.aiClient = aiClient;
}
public BatchResult processBatch(List<String> items) {
List<ItemResult> results = items.stream()
.map(item -> processWithRetry(item))
.collect(Collectors.toList());
long successCount = results.stream().filter(ItemResult::isSuccess).count();
long failureCount = results.size() - successCount;
log.info("Batch complete: {} success, {} failures",
successCount, failureCount);
return new BatchResult(results, successCount, failureCount);
}
public BatchResult processBatchParallel(List<String> items) {
List<CompletableFuture<ItemResult>> futures = items.stream()
.map(item -> CompletableFuture.supplyAsync(() -> processWithRetry(item)))
.collect(Collectors.toList());
List<ItemResult> results = futures.stream()
.map(CompletableFuture::join)
.collect(Collectors.toList());
long successCount = results.stream().filter(ItemResult::isSuccess).count();
long failureCount = results.size() - successCount;
return new BatchResult(results, successCount, failureCount);
}
private ItemResult processWithRetry(String item) {
try {
String result = retryTemplate.execute(context -> {
log.debug("Processing item: {}, attempt {}",
item, context.getRetryCount() + 1);
return aiClient.process(item);
});
return ItemResult.success(item, result);
} catch (TransientAiException e) {
log.error("Failed to process item after retries: {}", item, e);
return ItemResult.failure(item, "Transient error: " + e.getMessage());
} catch (Exception e) {
log.error("Failed to process item: {}", item, e);
return ItemResult.failure(item, "Error: " + e.getMessage());
}
}
}
record BatchResult(List<ItemResult> results, long successCount, long failureCount) {}
record ItemResult(String item, String result, boolean success, String error) {
static ItemResult success(String item, String result) {
return new ItemResult(item, result, true, null);
}
static ItemResult failure(String item, String error) {
return new ItemResult(item, null, false, error);
}
}spring.ai.retry.max-attempts=10
spring.ai.retry.on-http-codes=429,503
spring.ai.retry.backoff.initial-interval=10s
spring.ai.retry.backoff.multiplier=2
spring.ai.retry.backoff.max-interval=120simport org.springframework.retry.support.RetryTemplate;
import org.springframework.web.client.RestTemplate;
import org.springframework.stereotype.Service;
import io.github.bucket4j.Bucket;
import io.github.bucket4j.Bandwidth;
import io.github.bucket4j.Refill;
import java.time.Duration;
@Service
public class RateLimitedAiService {
private final RetryTemplate retryTemplate;
private final RestTemplate restTemplate;
private final Bucket tokenBucket;
public RateLimitedAiService(RetryTemplate retryTemplate,
RestTemplate restTemplate) {
this.retryTemplate = retryTemplate;
this.restTemplate = restTemplate;
// Token bucket: 10 requests per minute
Bandwidth limit = Bandwidth.classic(10, Refill.intervally(10, Duration.ofMinutes(1)));
this.tokenBucket = Bucket.builder()
.addLimit(limit)
.build();
}
public String analyze(String content) {
// Wait for token availability
tokenBucket.asBlocking().consume(1);
return retryTemplate.execute(context -> {
try {
return callApi(content);
} catch (RateLimitException e) {
// Extract retry-after from headers
int retryAfter = e.getRetryAfterSeconds();
log.warn("Rate limited. Retry after {} seconds", retryAfter);
// Update token bucket based on rate limit response
updateTokenBucket(retryAfter);
throw new TransientAiException(
"Rate limited. Retry after " + retryAfter + "s",
e
);
}
});
}
private String callApi(String content) {
HttpEntity<String> entity = new HttpEntity<>(content);
ResponseEntity<String> response = restTemplate.postForEntity(
"https://api.example.com/analyze",
entity,
String.class
);
// Check rate limit headers
HttpHeaders headers = response.getHeaders();
String remaining = headers.getFirst("X-RateLimit-Remaining");
String reset = headers.getFirst("X-RateLimit-Reset");
log.debug("Rate limit: {} remaining, resets at {}", remaining, reset);
if ("0".equals(remaining)) {
log.warn("Rate limit exhausted. Next reset: {}", reset);
}
return response.getBody();
}
private void updateTokenBucket(int retryAfterSeconds) {
// Implement token bucket adjustment based on API feedback
// This ensures we respect the API's rate limit signals
}
}import org.springframework.retry.support.RetryTemplate;
import org.springframework.stereotype.Service;
import org.springframework.ai.retry.TransientAiException;
@Service
public class MultiProviderAiService {
private final RetryTemplate primaryRetryTemplate;
private final RetryTemplate fallbackRetryTemplate;
private final AiClient primaryClient;
private final AiClient fallbackClient;
private final MeterRegistry meterRegistry;
public MultiProviderAiService(
RetryTemplate primaryRetryTemplate,
RetryTemplate fallbackRetryTemplate,
@Qualifier("openai") AiClient primaryClient,
@Qualifier("anthropic") AiClient fallbackClient,
MeterRegistry meterRegistry) {
this.primaryRetryTemplate = primaryRetryTemplate;
this.fallbackRetryTemplate = fallbackRetryTemplate;
this.primaryClient = primaryClient;
this.fallbackClient = fallbackClient;
this.meterRegistry = meterRegistry;
}
public String complete(String prompt) {
// Try primary provider
try {
String result = primaryRetryTemplate.execute(context -> {
log.debug("Calling primary provider, attempt {}",
context.getRetryCount() + 1);
return primaryClient.complete(prompt);
});
meterRegistry.counter("ai.provider.success", "provider", "primary").increment();
return result;
} catch (TransientAiException e) {
log.warn("Primary provider failed after retries: {}", e.getMessage());
meterRegistry.counter("ai.provider.failure", "provider", "primary").increment();
}
// Fallback to secondary provider
try {
log.info("Attempting fallback provider");
String result = fallbackRetryTemplate.execute(context -> {
log.debug("Calling fallback provider, attempt {}",
context.getRetryCount() + 1);
return fallbackClient.complete(prompt);
});
meterRegistry.counter("ai.provider.success", "provider", "fallback").increment();
return result;
} catch (TransientAiException e) {
log.error("Fallback provider also failed: {}", e.getMessage());
meterRegistry.counter("ai.provider.failure", "provider", "fallback").increment();
throw new ServiceUnavailableException("All AI providers unavailable", e);
}
}
}
@Configuration
class MultiProviderRetryConfig {
@Bean
@Qualifier("primary")
public RetryTemplate primaryRetryTemplate() {
return RetryTemplate.builder()
.maxAttempts(3)
.exponentialBackoff(1000, 2, 10000)
.retryOn(TransientAiException.class)
.build();
}
@Bean
@Qualifier("fallback")
public RetryTemplate fallbackRetryTemplate() {
return RetryTemplate.builder()
.maxAttempts(5)
.exponentialBackoff(2000, 3, 30000)
.retryOn(TransientAiException.class)
.build();
}
}import org.springframework.retry.support.RetryTemplate;
import org.springframework.web.client.RestTemplate;
import org.springframework.stereotype.Service;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.concurrent.TimeUnit;
@Service
public class StreamingAiService {
private final RetryTemplate retryTemplate;
private final RestTemplate restTemplate;
public StreamingAiService(RetryTemplate retryTemplate,
RestTemplate restTemplate) {
this.retryTemplate = retryTemplate;
this.restTemplate = restTemplate;
}
public void streamCompletion(String prompt, StreamConsumer consumer) {
retryTemplate.execute(context -> {
log.info("Starting stream, attempt {}", context.getRetryCount() + 1);
try {
streamWithTimeout(prompt, consumer, 30, TimeUnit.SECONDS);
return null;
} catch (IOException e) {
log.warn("Stream interrupted: {}", e.getMessage());
throw new TransientAiException("Stream error", e);
} catch (TimeoutException e) {
log.warn("Stream timeout");
throw new TransientAiException("Stream timeout", e);
}
});
}
private void streamWithTimeout(String prompt,
StreamConsumer consumer,
long timeout,
TimeUnit unit) throws IOException, TimeoutException {
HttpEntity<String> entity = new HttpEntity<>(prompt);
ResponseEntity<Resource> response = restTemplate.postForEntity(
"https://api.example.com/stream",
entity,
Resource.class
);
try (InputStream is = response.getBody().getInputStream();
BufferedReader reader = new BufferedReader(new InputStreamReader(is))) {
long startTime = System.currentTimeMillis();
String line;
while ((line = reader.readLine()) != null) {
// Check timeout
long elapsed = System.currentTimeMillis() - startTime;
if (elapsed > unit.toMillis(timeout)) {
throw new TimeoutException("Stream timeout after " + elapsed + "ms");
}
// Process line
consumer.accept(line);
}
}
}
}
@FunctionalInterface
interface StreamConsumer {
void accept(String chunk);
}import org.springframework.retry.support.RetryTemplate;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.cache.annotation.CacheEvict;
import org.springframework.stereotype.Service;
import org.springframework.ai.retry.TransientAiException;
import org.springframework.ai.retry.NonTransientAiException;
@Service
public class CachedAiService {
private final RetryTemplate retryTemplate;
private final AiClient aiClient;
private final Cache cache;
public CachedAiService(RetryTemplate retryTemplate,
AiClient aiClient,
CacheManager cacheManager) {
this.retryTemplate = retryTemplate;
this.aiClient = aiClient;
this.cache = cacheManager.getCache("ai-responses");
}
public String complete(String prompt) {
// Check cache first
Cache.ValueWrapper cached = cache.get(prompt);
try {
// Attempt fresh call with retry
String result = retryTemplate.execute(context ->
aiClient.complete(prompt)
);
// Update cache
cache.put(prompt, result);
return result;
} catch (TransientAiException e) {
// Serve from cache on transient failure
if (cached != null) {
log.warn("Using cached response due to transient error: {}",
e.getMessage());
return (String) cached.get();
}
throw e;
} catch (NonTransientAiException e) {
// Don't cache non-transient errors
log.error("Non-transient error, not using cache: {}", e.getMessage());
throw e;
}
}
@Cacheable(value = "ai-responses", key = "#prompt")
public String completeWithSpringCache(String prompt) {
return retryTemplate.execute(context -> aiClient.complete(prompt));
}
@CacheEvict(value = "ai-responses", key = "#prompt")
public void invalidateCache(String prompt) {
log.info("Cache invalidated for prompt: {}", prompt);
}
@CacheEvict(value = "ai-responses", allEntries = true)
public void clearCache() {
log.info("All cache entries cleared");
}
}public String withIdempotencyToken(String prompt) {
String idempotencyKey = UUID.randomUUID().toString();
return retryTemplate.execute(context -> {
HttpHeaders headers = new HttpHeaders();
headers.set("Idempotency-Key", idempotencyKey);
HttpEntity<String> entity = new HttpEntity<>(prompt, headers);
return restTemplate.postForObject(url, entity, String.class);
});
}public String withMetrics(String prompt) {
Timer.Sample sample = Timer.start(meterRegistry);
try {
String result = retryTemplate.execute(context -> {
meterRegistry.counter("ai.retry.attempt").increment();
return aiClient.complete(prompt);
});
sample.stop(meterRegistry.timer("ai.call.duration", "status", "success"));
meterRegistry.counter("ai.call.total", "status", "success").increment();
return result;
} catch (Exception e) {
sample.stop(meterRegistry.timer("ai.call.duration", "status", "failure"));
meterRegistry.counter("ai.call.total", "status", "failure").increment();
throw e;
}
}private final Map<String, CompletableFuture<String>> inFlightRequests = new ConcurrentHashMap<>();
public CompletableFuture<String> deduplicated(String prompt) {
return inFlightRequests.computeIfAbsent(prompt, key ->
CompletableFuture.supplyAsync(() -> {
try {
return retryTemplate.execute(context -> aiClient.complete(key));
} finally {
inFlightRequests.remove(key);
}
})
);
}tessl i tessl/maven-org-springframework-ai--spring-ai-autoconfigure-retry@1.1.1