Spring AI integration for Azure OpenAI services providing chat completion, text embeddings, image generation, and audio transcription with GPT, DALL-E, and Whisper models
Complete guide to exception handling and error recovery.
// Azure SDK exceptions
com.azure.core.exception.HttpResponseException // Base HTTP error
com.azure.core.exception.ResourceNotFoundException // 404 errors
// Spring AI exceptions
org.springframework.ai.retry.NonTransientAiException // Permanent failures
org.springframework.ai.retry.TransientAiException // Temporary failures
// Java exceptions
java.lang.IllegalArgumentException // Invalid parameters
java.lang.NullPointerException // Null required parametersCauses:
Example:
try {
response = chatModel.call(prompt);
} catch (HttpResponseException e) {
if (e.getResponse().getStatusCode() == 400) {
String errorMessage = e.getMessage();
if (errorMessage.contains("content_policy_violation")) {
// Handle content filter
} else if (errorMessage.contains("maximum context length")) {
// Handle token limit
} else if (errorMessage.contains("invalid_image_size")) {
// Handle invalid dimensions
}
}
}Causes:
Example:
try {
response = chatModel.call(prompt);
} catch (HttpResponseException e) {
if (e.getResponse().getStatusCode() == 401) {
throw new AuthenticationException(
"Invalid Azure OpenAI credentials. Check API key and endpoint.",
e
);
}
}Causes:
Example:
try {
response = chatModel.call(prompt);
} catch (HttpResponseException e) {
if (e.getResponse().getStatusCode() == 403) {
String errorBody = e.getResponse().getBodyAsString().block();
if (errorBody.contains("content_filter")) {
throw new ContentFilterException("Content blocked by filter", e);
} else if (errorBody.contains("quota")) {
throw new QuotaException("Quota exceeded", e);
}
}
}Causes:
Example:
try {
response = chatModel.call(prompt);
} catch (ResourceNotFoundException e) {
throw new ConfigurationException(
"Deployment '" + options.getDeploymentName() + "' not found. " +
"Check Azure portal for valid deployment names.",
e
);
}Causes:
Example:
try {
response = chatModel.call(prompt);
} catch (HttpResponseException e) {
if (e.getResponse().getStatusCode() == 429) {
// Extract retry-after header
String retryAfter = e.getResponse()
.getHeaders()
.getValue("Retry-After");
int waitSeconds = retryAfter != null ?
Integer.parseInt(retryAfter) : 60;
Thread.sleep(waitSeconds * 1000);
response = chatModel.call(prompt); // Retry
}
}Causes:
Example:
try {
response = chatModel.call(prompt);
} catch (HttpResponseException e) {
if (e.getResponse().getStatusCode() == 500) {
// Transient error - retry with backoff
Thread.sleep(2000);
response = chatModel.call(prompt);
}
}Causes:
Example:
try {
response = chatModel.call(prompt);
} catch (HttpResponseException e) {
if (e.getResponse().getStatusCode() == 503) {
// Service down - retry after delay
Thread.sleep(5000);
response = chatModel.call(prompt);
}
}public ChatResponse callWithExponentialBackoff(Prompt prompt) {
int maxRetries = 5;
int baseDelayMs = 1000;
for (int attempt = 0; attempt < maxRetries; attempt++) {
try {
return chatModel.call(prompt);
} catch (HttpResponseException e) {
int statusCode = e.getResponse().getStatusCode();
// Only retry on transient errors
if (statusCode == 429 || statusCode == 500 || statusCode == 503) {
if (attempt < maxRetries - 1) {
int delayMs = baseDelayMs * (1 << attempt);
Thread.sleep(delayMs);
continue;
}
}
throw e;
}
}
throw new RuntimeException("Max retries exceeded");
}public ChatResponse callWithJitter(Prompt prompt) {
int maxRetries = 5;
int baseDelayMs = 1000;
for (int attempt = 0; attempt < maxRetries; attempt++) {
try {
return chatModel.call(prompt);
} catch (HttpResponseException e) {
if (isRetryable(e) && attempt < maxRetries - 1) {
int exponentialDelay = baseDelayMs * (1 << attempt);
int jitter = ThreadLocalRandom.current()
.nextInt(0, exponentialDelay / 2);
int totalDelay = Math.min(
exponentialDelay + jitter,
60000 // Cap at 60 seconds
);
Thread.sleep(totalDelay);
continue;
}
throw e;
}
}
throw new RuntimeException("Max retries exceeded");
}
private boolean isRetryable(HttpResponseException e) {
int statusCode = e.getResponse().getStatusCode();
return statusCode == 429 || statusCode == 500 || statusCode == 503;
}public class CircuitBreakerService {
private enum State { CLOSED, OPEN, HALF_OPEN }
private State state = State.CLOSED;
private int failureCount = 0;
private final int failureThreshold = 5;
private long openedAt = 0;
private final long resetTimeout = 60000; // 1 minute
public ChatResponse callWithCircuitBreaker(Prompt prompt) {
if (state == State.OPEN) {
if (System.currentTimeMillis() - openedAt > resetTimeout) {
state = State.HALF_OPEN;
} else {
throw new RuntimeException("Circuit breaker is OPEN");
}
}
try {
ChatResponse response = chatModel.call(prompt);
if (state == State.HALF_OPEN) {
state = State.CLOSED;
failureCount = 0;
}
return response;
} catch (HttpResponseException e) {
failureCount++;
if (failureCount >= failureThreshold) {
state = State.OPEN;
openedAt = System.currentTimeMillis();
}
throw e;
}
}
}public String getResponseWithFallback(String prompt) {
try {
// Try primary model
ChatResponse response = chatModel.call(new Prompt(prompt));
return response.getResult().getOutput().getText();
} catch (HttpResponseException e) {
if (e.getResponse().getStatusCode() == 429) {
// Rate limited - use cached response if available
String cached = cache.get(prompt);
if (cached != null) {
return cached;
}
// Fall back to simpler model
return getFallbackResponse(prompt);
}
throw e;
}
}
private String getFallbackResponse(String prompt) {
AzureOpenAiChatOptions fallbackOptions = AzureOpenAiChatOptions.builder()
.deploymentName("gpt-35-turbo") // Cheaper, faster model
.maxTokens(500)
.build();
ChatResponse response = chatModel.call(
new Prompt(prompt, fallbackOptions)
);
return response.getResult().getOutput().getText();
}public String streamWithPartialResults(Prompt prompt) {
StringBuilder result = new StringBuilder();
try {
chatModel.stream(prompt).subscribe(
chunk -> {
String token = chunk.getResult().getOutput().getText();
if (token != null) {
result.append(token);
}
},
error -> {
// Stream failed - return partial result
if (result.length() > 0) {
result.append("\n[Incomplete response]");
}
}
);
return result.toString();
} catch (Exception e) {
// Return partial result if available
if (result.length() > 0) {
return result.toString() + "\n[Error occurred]";
}
throw e;
}
}public void validateChatOptions(AzureOpenAiChatOptions options) {
if (options.getTemperature() != null) {
double temp = options.getTemperature();
if (temp < 0.0 || temp > 2.0) {
throw new IllegalArgumentException(
"Temperature must be between 0.0 and 2.0, got: " + temp
);
}
}
if (options.getMaxTokens() != null &&
options.getMaxCompletionTokens() != null) {
throw new IllegalArgumentException(
"Cannot use both maxTokens and maxCompletionTokens"
);
}
if (options.getN() != null && options.getN() < 1) {
throw new IllegalArgumentException(
"N must be >= 1, got: " + options.getN()
);
}
}public void validatePrompt(String prompt) {
if (prompt == null || prompt.trim().isEmpty()) {
throw new IllegalArgumentException("Prompt cannot be null or empty");
}
int estimatedTokens = estimateTokenCount(prompt);
if (estimatedTokens > 128000) {
throw new IllegalArgumentException(
"Prompt too long: " + estimatedTokens + " tokens (max: 128000)"
);
}
}public ChatResponse callWithLogging(Prompt prompt) {
String requestId = UUID.randomUUID().toString();
try {
logger.info("API call started", Map.of(
"requestId", requestId,
"model", options.getDeploymentName(),
"promptLength", prompt.getContents().length()
));
ChatResponse response = chatModel.call(prompt);
logger.info("API call succeeded", Map.of(
"requestId", requestId,
"tokensUsed", response.getMetadata().getUsage().getTotalTokens()
));
return response;
} catch (HttpResponseException e) {
logger.error("API call failed", Map.of(
"requestId", requestId,
"statusCode", e.getResponse().getStatusCode(),
"error", e.getMessage()
));
throw e;
}
}public ChatResponse callWithMetrics(Prompt prompt) {
long startTime = System.currentTimeMillis();
try {
ChatResponse response = chatModel.call(prompt);
long duration = System.currentTimeMillis() - startTime;
metrics.recordSuccess(duration);
metrics.recordTokens(response.getMetadata().getUsage().getTotalTokens());
return response;
} catch (HttpResponseException e) {
long duration = System.currentTimeMillis() - startTime;
metrics.recordFailure(duration, e.getResponse().getStatusCode());
throw e;
}
}public ChatResponse robustCall(Prompt prompt) {
// Validate input
validatePrompt(prompt);
// Retry with backoff
int maxRetries = 3;
int baseDelay = 1000;
for (int attempt = 0; attempt < maxRetries; attempt++) {
try {
return chatModel.call(prompt);
} catch (HttpResponseException e) {
int statusCode = e.getResponse().getStatusCode();
// Handle specific errors
if (statusCode == 401) {
throw new AuthenticationException("Invalid credentials", e);
} else if (statusCode == 404) {
throw new ConfigurationException("Deployment not found", e);
} else if (statusCode == 400) {
if (e.getMessage().contains("content_policy")) {
throw new ContentFilterException("Content filtered", e);
}
throw new ValidationException("Invalid request", e);
}
// Retry transient errors
if ((statusCode == 429 || statusCode >= 500) &&
attempt < maxRetries - 1) {
int delay = baseDelay * (1 << attempt);
Thread.sleep(delay);
continue;
}
throw e;
} catch (Exception e) {
// Unexpected error
logger.error("Unexpected error", e);
throw new RuntimeException("API call failed", e);
}
}
throw new RuntimeException("Max retries exceeded");
}tessl i tessl/maven-org-springframework-ai--spring-ai-azure-openai@1.1.1