CtrlK

Community Documentation Log in Get started

tessl/maven-org-springframework-ai--spring-ai-azure-openai

Spring AI integration for Azure OpenAI services providing chat completion, text embeddings, image generation, and audio transcription with GPT, DALL-E, and Whisper models

Overview

Eval results

Files

Edge Cases and Advanced Scenarios

Name: tessl/maven-org-springframework-ai--spring-ai-azure-openai
Author: tessl

Handle complex situations and edge cases in production.

Rate Limiting and Retry Logic

Implement robust retry with exponential backoff.

public class ResilientAIService {
    
    private final AzureOpenAiChatModel chatModel;
    private final int maxRetries = 5;
    private final int baseDelayMs = 1000;
    
    public ChatResponse callWithRetry(Prompt prompt) {
        int attempt = 0;
        Exception lastException = null;
        
        while (attempt < maxRetries) {
            try {
                return chatModel.call(prompt);
            } catch (HttpResponseException e) {
                lastException = e;
                int statusCode = e.getResponse().getStatusCode();
                
                // Only retry on transient errors
                if (statusCode == 429 || statusCode == 500 || statusCode == 503) {
                    attempt++;
                    if (attempt < maxRetries) {
                        int delayMs = calculateBackoff(attempt);
                        Thread.sleep(delayMs);
                        continue;
                    }
                }
                
                // Non-retryable error
                throw e;
            }
        }
        
        throw new RuntimeException("Max retries exceeded", lastException);
    }
    
    private int calculateBackoff(int attempt) {
        // Exponential backoff with jitter
        int exponentialDelay = baseDelayMs * (1 << (attempt - 1));
        int jitter = ThreadLocalRandom.current().nextInt(0, exponentialDelay / 2);
        return Math.min(exponentialDelay + jitter, 60000); // Cap at 60s
    }
}

Handling Large Files (Audio > 25MB)

Split and process large audio files.

public class LargeAudioProcessor {
    
    private final AzureOpenAiAudioTranscriptionModel transcriptionModel;
    
    public String transcribeLargeFile(File audioFile) throws IOException {
        long fileSize = audioFile.length();
        long maxChunkSize = 20 * 1024 * 1024; // 20MB chunks
        
        if (fileSize <= maxChunkSize) {
            return transcriptionModel.call(new FileSystemResource(audioFile));
        }
        
        // Split file into chunks
        List<File> chunks = splitAudioFile(audioFile, maxChunkSize);
        List<String> transcriptions = new ArrayList<>();
        String previousContext = null;
        
        for (File chunk : chunks) {
            AzureOpenAiAudioTranscriptionOptions options =
                AzureOpenAiAudioTranscriptionOptions.builder()
                    .language("en")
                    .prompt(previousContext) // Use previous text for continuity
                    .build();
            
            AudioTranscriptionPrompt prompt = new AudioTranscriptionPrompt(
                new FileSystemResource(chunk),
                options
            );
            
            String transcription = transcriptionModel.call(prompt)
                .getResult()
                .getOutput();
            transcriptions.add(transcription);
            
            // Use last sentence as context for next chunk
            previousContext = getLastSentences(transcription, 2);
        }
        
        // Clean up temporary chunks
        chunks.forEach(File::delete);
        
        return String.join(" ", transcriptions);
    }
    
    private List<File> splitAudioFile(File input, long chunkSize) {
        // Implementation using audio processing library
        // Ensure splits at silence points for better transcription
        return AudioSplitter.split(input, chunkSize);
    }
}

Token Limit Management

Handle prompts that exceed token limits.

public class TokenAwareService {
    
    private final AzureOpenAiChatModel chatModel;
    private final TokenCounter tokenCounter;
    
    public ChatResponse handleLongPrompt(String userInput, List<String> context) {
        int maxTokens = 128000; // gpt-4o limit
        int reservedForResponse = 4000;
        int availableForPrompt = maxTokens - reservedForResponse;
        
        // Count tokens
        int userTokens = tokenCounter.count(userInput);
        int contextTokens = context.stream()
            .mapToInt(tokenCounter::count)
            .sum();
        
        // If within limits, proceed normally
        if (userTokens + contextTokens < availableForPrompt) {
            String fullPrompt = buildPrompt(userInput, context);
            return chatModel.call(new Prompt(fullPrompt));
        }
        
        // Truncate context to fit
        List<String> truncatedContext = truncateContext(
            context,
            availableForPrompt - userTokens
        );
        
        String prompt = buildPrompt(userInput, truncatedContext);
        return chatModel.call(new Prompt(prompt));
    }
    
    private List<String> truncateContext(List<String> context, int maxTokens) {
        List<String> result = new ArrayList<>();
        int currentTokens = 0;
        
        // Add most recent context first (usually more relevant)
        for (int i = context.size() - 1; i >= 0; i--) {
            String item = context.get(i);
            int tokens = tokenCounter.count(item);
            
            if (currentTokens + tokens <= maxTokens) {
                result.add(0, item);
                currentTokens += tokens;
            } else {
                break;
            }
        }
        
        return result;
    }
}

Handling Content Filter Violations

Gracefully handle content policy violations.

public class SafeContentService {
    
    private final AzureOpenAiChatModel chatModel;
    
    public String generateSafeContent(String prompt) {
        try {
            ChatResponse response = chatModel.call(new Prompt(prompt));
            return response.getResult().getOutput().getText();
            
        } catch (HttpResponseException e) {
            if (e.getResponse().getStatusCode() == 400) {
                String errorBody = e.getResponse().getBodyAsString().block();
                
                if (errorBody != null && errorBody.contains("content_filter")) {
                    // Content filtered - try rephrasing
                    String rephrasedPrompt = rephrasePrompt(prompt);
                    
                    try {
                        ChatResponse response = chatModel.call(
                            new Prompt(rephrasedPrompt)
                        );
                        return response.getResult().getOutput().getText();
                    } catch (HttpResponseException e2) {
                        // Still filtered - return safe fallback
                        return "I'm unable to generate content for that request. " +
                               "Please try rephrasing your question.";
                    }
                }
            }
            throw e;
        }
    }
    
    private String rephrasePrompt(String original) {
        // Add safety instructions
        return "Please provide a safe, appropriate response to: " + original;
    }
}

Handling Streaming Interruptions

Recover from stream failures.

public class ResilientStreamingService {
    
    private final AzureOpenAiChatModel chatModel;
    
    public String streamWithRecovery(Prompt prompt) {
        StringBuilder result = new StringBuilder();
        int maxAttempts = 3;
        int attempt = 0;
        
        while (attempt < maxAttempts) {
            try {
                CompletableFuture<String> future = new CompletableFuture<>();
                
                chatModel.stream(prompt)
                    .doOnNext(chunk -> {
                        String token = chunk.getResult().getOutput().getText();
                        if (token != null) {
                            result.append(token);
                        }
                    })
                    .doOnError(future::completeExceptionally)
                    .doOnComplete(() -> future.complete(result.toString()))
                    .subscribe();
                
                return future.get(60, TimeUnit.SECONDS);
                
            } catch (TimeoutException | ExecutionException e) {
                attempt++;
                if (attempt < maxAttempts) {
                    // Resume from where we left off
                    String partialResult = result.toString();
                    if (!partialResult.isEmpty()) {
                        prompt = new Prompt(
                            "Continue from: " + partialResult
                        );
                    }
                    Thread.sleep(1000 * attempt);
                } else {
                    // Return partial result if available
                    if (result.length() > 0) {
                        return result.toString() + "\n[Stream interrupted]";
                    }
                    throw new RuntimeException("Stream failed after retries", e);
                }
            }
        }
        
        return result.toString();
    }
}

Handling Empty or Invalid Embeddings

Validate and handle edge cases in embeddings.

public class RobustEmbeddingService {
    
    private final AzureOpenAiEmbeddingModel embeddingModel;
    
    public float[] embedWithValidation(String text) {
        // Validate input
        if (text == null || text.trim().isEmpty()) {
            throw new IllegalArgumentException("Text cannot be empty");
        }
        
        // Check token count
        int tokenCount = estimateTokens(text);
        if (tokenCount > 8191) {
            text = truncateToTokens(text, 8191);
        }
        
        try {
            EmbeddingResponse response = embeddingModel.call(
                new EmbeddingRequest(List.of(text), null)
            );
            
            float[] embedding = response.getResults().get(0).getOutput();
            
            // Validate embedding
            if (embedding == null || embedding.length == 0) {
                throw new RuntimeException("Received empty embedding");
            }
            
            // Check for NaN or Inf values
            for (float value : embedding) {
                if (Float.isNaN(value) || Float.isInfinite(value)) {
                    throw new RuntimeException("Invalid embedding values");
                }
            }
            
            return embedding;
            
        } catch (HttpResponseException e) {
            if (e.getResponse().getStatusCode() == 400) {
                // Try with cleaned text
                String cleaned = cleanText(text);
                return embedWithValidation(cleaned);
            }
            throw e;
        }
    }
    
    private String cleanText(String text) {
        // Remove special characters, normalize whitespace
        return text.replaceAll("[^\\p{L}\\p{N}\\p{P}\\p{Z}]", " ")
                   .replaceAll("\\s+", " ")
                   .trim();
    }
}

Handling Image Generation Failures

Retry with fallback options.

public class ResilientImageService {
    
    private final AzureOpenAiImageModel imageModel;
    
    public String generateImageWithFallback(String prompt) {
        // Try with DALL-E 3 HD first
        try {
            AzureOpenAiImageOptions hdOptions = AzureOpenAiImageOptions.builder()
                .deploymentName("dall-e-3")
                .width(1024)
                .height(1024)
                .style("vivid")
                .build();
            hdOptions.setQuality("hd");
            
            ImageResponse response = imageModel.call(
                new ImagePrompt(prompt, hdOptions)
            );
            return response.getResult().getOutput().getUrl();
            
        } catch (HttpResponseException e) {
            if (e.getResponse().getStatusCode() == 400 &&
                e.getMessage().contains("content_policy_violation")) {
                
                // Try with safer prompt
                String safePrompt = makeSafePrompt(prompt);
                
                try {
                    AzureOpenAiImageOptions standardOptions =
                        AzureOpenAiImageOptions.builder()
                            .deploymentName("dall-e-3")
                            .width(1024)
                            .height(1024)
                            .style("natural")
                            .build();
                    
                    ImageResponse response = imageModel.call(
                        new ImagePrompt(safePrompt, standardOptions)
                    );
                    return response.getResult().getOutput().getUrl();
                    
                } catch (HttpResponseException e2) {
                    // Fall back to DALL-E 2
                    return generateWithDallE2(safePrompt);
                }
            }
            throw e;
        }
    }
    
    private String generateWithDallE2(String prompt) {
        AzureOpenAiImageOptions options = AzureOpenAiImageOptions.builder()
            .model("dall-e-2")
            .width(512)
            .height(512)
            .build();
        
        ImageResponse response = imageModel.call(
            new ImagePrompt(prompt, options)
        );
        return response.getResult().getOutput().getUrl();
    }
}

Handling Concurrent Requests

Manage concurrent API calls efficiently.

public class ConcurrentRequestManager {
    
    private final AzureOpenAiChatModel chatModel;
    private final Semaphore rateLimiter;
    private final ExecutorService executor;
    
    public ConcurrentRequestManager(
        AzureOpenAiChatModel chatModel,
        int maxConcurrent
    ) {
        this.chatModel = chatModel;
        this.rateLimiter = new Semaphore(maxConcurrent);
        this.executor = Executors.newFixedThreadPool(maxConcurrent);
    }
    
    public List<ChatResponse> processBatch(List<Prompt> prompts) {
        List<CompletableFuture<ChatResponse>> futures = prompts.stream()
            .map(prompt -> CompletableFuture.supplyAsync(() -> {
                try {
                    rateLimiter.acquire();
                    try {
                        return chatModel.call(prompt);
                    } finally {
                        rateLimiter.release();
                    }
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    throw new RuntimeException(e);
                }
            }, executor))
            .collect(Collectors.toList());
        
        return futures.stream()
            .map(CompletableFuture::join)
            .collect(Collectors.toList());
    }
}

Handling Expired Image URLs

Download and cache images before expiration.

public class ImageCacheService {
    
    private final AzureOpenAiImageModel imageModel;
    private final ImageStorage storage;
    
    public String generateAndStoreImage(String prompt) {
        // Generate with base64 to avoid expiration
        AzureOpenAiImageOptions options = AzureOpenAiImageOptions.builder()
            .responseFormat("b64_json")
            .build();
        
        ImageResponse response = imageModel.call(
            new ImagePrompt(prompt, options)
        );
        
        String base64Image = response.getResult().getOutput().getB64Json();
        byte[] imageBytes = Base64.getDecoder().decode(base64Image);
        
        // Store permanently
        String permanentUrl = storage.store(imageBytes, "generated.png");
        return permanentUrl;
    }
    
    public String generateWithUrlCaching(String prompt) {
        // Generate with URL
        ImageResponse response = imageModel.call(new ImagePrompt(prompt));
        String temporaryUrl = response.getResult().getOutput().getUrl();
        
        // Download immediately (URL expires in 1 hour)
        byte[] imageBytes = downloadImage(temporaryUrl);
        
        // Store permanently
        String permanentUrl = storage.store(imageBytes, "generated.png");
        return permanentUrl;
    }
}

Next Steps

Real-World Scenarios - Production examples
Error Handling Reference - Complete error guide
Performance Guide - Optimization strategies

tessl i tessl/maven-org-springframework-ai--spring-ai-azure-openai@1.1.1

docs

examples

edge-cases.md

real-world-scenarios.md

guides

reference

index.md

tile.json