Spring AI integration for Azure OpenAI services providing chat completion, text embeddings, image generation, and audio transcription with GPT, DALL-E, and Whisper models
Handle complex situations and edge cases in production.
Implement robust retry with exponential backoff.
public class ResilientAIService {
private final AzureOpenAiChatModel chatModel;
private final int maxRetries = 5;
private final int baseDelayMs = 1000;
public ChatResponse callWithRetry(Prompt prompt) {
int attempt = 0;
Exception lastException = null;
while (attempt < maxRetries) {
try {
return chatModel.call(prompt);
} catch (HttpResponseException e) {
lastException = e;
int statusCode = e.getResponse().getStatusCode();
// Only retry on transient errors
if (statusCode == 429 || statusCode == 500 || statusCode == 503) {
attempt++;
if (attempt < maxRetries) {
int delayMs = calculateBackoff(attempt);
Thread.sleep(delayMs);
continue;
}
}
// Non-retryable error
throw e;
}
}
throw new RuntimeException("Max retries exceeded", lastException);
}
private int calculateBackoff(int attempt) {
// Exponential backoff with jitter
int exponentialDelay = baseDelayMs * (1 << (attempt - 1));
int jitter = ThreadLocalRandom.current().nextInt(0, exponentialDelay / 2);
return Math.min(exponentialDelay + jitter, 60000); // Cap at 60s
}
}Split and process large audio files.
public class LargeAudioProcessor {
private final AzureOpenAiAudioTranscriptionModel transcriptionModel;
public String transcribeLargeFile(File audioFile) throws IOException {
long fileSize = audioFile.length();
long maxChunkSize = 20 * 1024 * 1024; // 20MB chunks
if (fileSize <= maxChunkSize) {
return transcriptionModel.call(new FileSystemResource(audioFile));
}
// Split file into chunks
List<File> chunks = splitAudioFile(audioFile, maxChunkSize);
List<String> transcriptions = new ArrayList<>();
String previousContext = null;
for (File chunk : chunks) {
AzureOpenAiAudioTranscriptionOptions options =
AzureOpenAiAudioTranscriptionOptions.builder()
.language("en")
.prompt(previousContext) // Use previous text for continuity
.build();
AudioTranscriptionPrompt prompt = new AudioTranscriptionPrompt(
new FileSystemResource(chunk),
options
);
String transcription = transcriptionModel.call(prompt)
.getResult()
.getOutput();
transcriptions.add(transcription);
// Use last sentence as context for next chunk
previousContext = getLastSentences(transcription, 2);
}
// Clean up temporary chunks
chunks.forEach(File::delete);
return String.join(" ", transcriptions);
}
private List<File> splitAudioFile(File input, long chunkSize) {
// Implementation using audio processing library
// Ensure splits at silence points for better transcription
return AudioSplitter.split(input, chunkSize);
}
}Handle prompts that exceed token limits.
public class TokenAwareService {
private final AzureOpenAiChatModel chatModel;
private final TokenCounter tokenCounter;
public ChatResponse handleLongPrompt(String userInput, List<String> context) {
int maxTokens = 128000; // gpt-4o limit
int reservedForResponse = 4000;
int availableForPrompt = maxTokens - reservedForResponse;
// Count tokens
int userTokens = tokenCounter.count(userInput);
int contextTokens = context.stream()
.mapToInt(tokenCounter::count)
.sum();
// If within limits, proceed normally
if (userTokens + contextTokens < availableForPrompt) {
String fullPrompt = buildPrompt(userInput, context);
return chatModel.call(new Prompt(fullPrompt));
}
// Truncate context to fit
List<String> truncatedContext = truncateContext(
context,
availableForPrompt - userTokens
);
String prompt = buildPrompt(userInput, truncatedContext);
return chatModel.call(new Prompt(prompt));
}
private List<String> truncateContext(List<String> context, int maxTokens) {
List<String> result = new ArrayList<>();
int currentTokens = 0;
// Add most recent context first (usually more relevant)
for (int i = context.size() - 1; i >= 0; i--) {
String item = context.get(i);
int tokens = tokenCounter.count(item);
if (currentTokens + tokens <= maxTokens) {
result.add(0, item);
currentTokens += tokens;
} else {
break;
}
}
return result;
}
}Gracefully handle content policy violations.
public class SafeContentService {
private final AzureOpenAiChatModel chatModel;
public String generateSafeContent(String prompt) {
try {
ChatResponse response = chatModel.call(new Prompt(prompt));
return response.getResult().getOutput().getText();
} catch (HttpResponseException e) {
if (e.getResponse().getStatusCode() == 400) {
String errorBody = e.getResponse().getBodyAsString().block();
if (errorBody != null && errorBody.contains("content_filter")) {
// Content filtered - try rephrasing
String rephrasedPrompt = rephrasePrompt(prompt);
try {
ChatResponse response = chatModel.call(
new Prompt(rephrasedPrompt)
);
return response.getResult().getOutput().getText();
} catch (HttpResponseException e2) {
// Still filtered - return safe fallback
return "I'm unable to generate content for that request. " +
"Please try rephrasing your question.";
}
}
}
throw e;
}
}
private String rephrasePrompt(String original) {
// Add safety instructions
return "Please provide a safe, appropriate response to: " + original;
}
}Recover from stream failures.
public class ResilientStreamingService {
private final AzureOpenAiChatModel chatModel;
public String streamWithRecovery(Prompt prompt) {
StringBuilder result = new StringBuilder();
int maxAttempts = 3;
int attempt = 0;
while (attempt < maxAttempts) {
try {
CompletableFuture<String> future = new CompletableFuture<>();
chatModel.stream(prompt)
.doOnNext(chunk -> {
String token = chunk.getResult().getOutput().getText();
if (token != null) {
result.append(token);
}
})
.doOnError(future::completeExceptionally)
.doOnComplete(() -> future.complete(result.toString()))
.subscribe();
return future.get(60, TimeUnit.SECONDS);
} catch (TimeoutException | ExecutionException e) {
attempt++;
if (attempt < maxAttempts) {
// Resume from where we left off
String partialResult = result.toString();
if (!partialResult.isEmpty()) {
prompt = new Prompt(
"Continue from: " + partialResult
);
}
Thread.sleep(1000 * attempt);
} else {
// Return partial result if available
if (result.length() > 0) {
return result.toString() + "\n[Stream interrupted]";
}
throw new RuntimeException("Stream failed after retries", e);
}
}
}
return result.toString();
}
}Validate and handle edge cases in embeddings.
public class RobustEmbeddingService {
private final AzureOpenAiEmbeddingModel embeddingModel;
public float[] embedWithValidation(String text) {
// Validate input
if (text == null || text.trim().isEmpty()) {
throw new IllegalArgumentException("Text cannot be empty");
}
// Check token count
int tokenCount = estimateTokens(text);
if (tokenCount > 8191) {
text = truncateToTokens(text, 8191);
}
try {
EmbeddingResponse response = embeddingModel.call(
new EmbeddingRequest(List.of(text), null)
);
float[] embedding = response.getResults().get(0).getOutput();
// Validate embedding
if (embedding == null || embedding.length == 0) {
throw new RuntimeException("Received empty embedding");
}
// Check for NaN or Inf values
for (float value : embedding) {
if (Float.isNaN(value) || Float.isInfinite(value)) {
throw new RuntimeException("Invalid embedding values");
}
}
return embedding;
} catch (HttpResponseException e) {
if (e.getResponse().getStatusCode() == 400) {
// Try with cleaned text
String cleaned = cleanText(text);
return embedWithValidation(cleaned);
}
throw e;
}
}
private String cleanText(String text) {
// Remove special characters, normalize whitespace
return text.replaceAll("[^\\p{L}\\p{N}\\p{P}\\p{Z}]", " ")
.replaceAll("\\s+", " ")
.trim();
}
}Retry with fallback options.
public class ResilientImageService {
private final AzureOpenAiImageModel imageModel;
public String generateImageWithFallback(String prompt) {
// Try with DALL-E 3 HD first
try {
AzureOpenAiImageOptions hdOptions = AzureOpenAiImageOptions.builder()
.deploymentName("dall-e-3")
.width(1024)
.height(1024)
.style("vivid")
.build();
hdOptions.setQuality("hd");
ImageResponse response = imageModel.call(
new ImagePrompt(prompt, hdOptions)
);
return response.getResult().getOutput().getUrl();
} catch (HttpResponseException e) {
if (e.getResponse().getStatusCode() == 400 &&
e.getMessage().contains("content_policy_violation")) {
// Try with safer prompt
String safePrompt = makeSafePrompt(prompt);
try {
AzureOpenAiImageOptions standardOptions =
AzureOpenAiImageOptions.builder()
.deploymentName("dall-e-3")
.width(1024)
.height(1024)
.style("natural")
.build();
ImageResponse response = imageModel.call(
new ImagePrompt(safePrompt, standardOptions)
);
return response.getResult().getOutput().getUrl();
} catch (HttpResponseException e2) {
// Fall back to DALL-E 2
return generateWithDallE2(safePrompt);
}
}
throw e;
}
}
private String generateWithDallE2(String prompt) {
AzureOpenAiImageOptions options = AzureOpenAiImageOptions.builder()
.model("dall-e-2")
.width(512)
.height(512)
.build();
ImageResponse response = imageModel.call(
new ImagePrompt(prompt, options)
);
return response.getResult().getOutput().getUrl();
}
}Manage concurrent API calls efficiently.
public class ConcurrentRequestManager {
private final AzureOpenAiChatModel chatModel;
private final Semaphore rateLimiter;
private final ExecutorService executor;
public ConcurrentRequestManager(
AzureOpenAiChatModel chatModel,
int maxConcurrent
) {
this.chatModel = chatModel;
this.rateLimiter = new Semaphore(maxConcurrent);
this.executor = Executors.newFixedThreadPool(maxConcurrent);
}
public List<ChatResponse> processBatch(List<Prompt> prompts) {
List<CompletableFuture<ChatResponse>> futures = prompts.stream()
.map(prompt -> CompletableFuture.supplyAsync(() -> {
try {
rateLimiter.acquire();
try {
return chatModel.call(prompt);
} finally {
rateLimiter.release();
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
}
}, executor))
.collect(Collectors.toList());
return futures.stream()
.map(CompletableFuture::join)
.collect(Collectors.toList());
}
}Download and cache images before expiration.
public class ImageCacheService {
private final AzureOpenAiImageModel imageModel;
private final ImageStorage storage;
public String generateAndStoreImage(String prompt) {
// Generate with base64 to avoid expiration
AzureOpenAiImageOptions options = AzureOpenAiImageOptions.builder()
.responseFormat("b64_json")
.build();
ImageResponse response = imageModel.call(
new ImagePrompt(prompt, options)
);
String base64Image = response.getResult().getOutput().getB64Json();
byte[] imageBytes = Base64.getDecoder().decode(base64Image);
// Store permanently
String permanentUrl = storage.store(imageBytes, "generated.png");
return permanentUrl;
}
public String generateWithUrlCaching(String prompt) {
// Generate with URL
ImageResponse response = imageModel.call(new ImagePrompt(prompt));
String temporaryUrl = response.getResult().getOutput().getUrl();
// Download immediately (URL expires in 1 hour)
byte[] imageBytes = downloadImage(temporaryUrl);
// Store permanently
String permanentUrl = storage.store(imageBytes, "generated.png");
return permanentUrl;
}
}tessl i tessl/maven-org-springframework-ai--spring-ai-azure-openai@1.1.1