CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-openai

OpenAI models support for Spring AI, providing comprehensive integration for chat completion, embeddings, image generation, audio transcription, text-to-speech, and content moderation capabilities within Spring Boot applications.

Overview
Eval results
Files

edge-cases.mddocs/examples/

Edge Cases and Troubleshooting

Advanced scenarios, edge cases, and solutions for common issues.

Table of Contents

  • Streaming Edge Cases
  • Token Limit Handling
  • Multimodal Inputs
  • Tool Calling Edge Cases
  • Error Recovery
  • Performance Issues
  • Compatibility Issues

Streaming Edge Cases

Handling Incomplete Streams

import reactor.core.publisher.Flux;
import java.time.Duration;

public class StreamingEdgeCases {

    // Handle stream timeouts
    public void handleStreamTimeout() {
        chatModel.stream(new Prompt("Long response"))
            .timeout(Duration.ofSeconds(30))
            .onErrorResume(TimeoutException.class, e -> {
                log.warn("Stream timeout, returning partial response");
                return Flux.just(createFallbackResponse());
            })
            .subscribe(response -> processChunk(response));
    }

    // Handle stream interruptions
    public void handleStreamInterruption() {
        chatModel.stream(new Prompt("Generate content"))
            .doOnError(error -> log.error("Stream error", error))
            .retry(3)
            .onErrorReturn(createFallbackResponse())
            .subscribe(response -> processChunk(response));
    }

    // Collect full response from stream
    public String collectFullStream(String prompt) {
        StringBuilder fullResponse = new StringBuilder();

        chatModel.stream(new Prompt(prompt))
            .doOnNext(response -> {
                String content = response.getResult().getOutput().getContent();
                if (content != null) {
                    fullResponse.append(content);
                }
            })
            .blockLast();  // Wait for completion

        return fullResponse.toString();
    }
}

Stream Backpressure

import reactor.core.scheduler.Schedulers;

// Handle backpressure in high-throughput scenarios
public Flux<String> handleBackpressure(String prompt) {
    return chatModel.stream(new Prompt(prompt))
        .publishOn(Schedulers.boundedElastic())
        .onBackpressureBuffer(100, BufferOverflowStrategy.DROP_OLDEST)
        .map(response -> response.getResult().getOutput().getContent())
        .filter(Objects::nonNull);
}

Token Limit Handling

Automatic Truncation

public class TokenLimitHandler {
    private static final int MAX_TOKENS = 4096;

    public String generateWithTruncation(String longPrompt) {
        // Estimate tokens (rough: 1 token ≈ 4 characters)
        int estimatedTokens = longPrompt.length() / 4;

        if (estimatedTokens > MAX_TOKENS) {
            // Truncate to fit
            int maxChars = MAX_TOKENS * 4;
            longPrompt = longPrompt.substring(0, maxChars) + "...";
        }

        return chatModel.call(new Prompt(longPrompt))
            .getResult()
            .getOutput()
            .getContent();
    }
}

Chunking Long Documents

public class DocumentChunker {

    public List<String> processLongDocument(String document) {
        List<String> chunks = splitIntoChunks(document, 3000);
        List<String> summaries = new ArrayList<>();

        for (String chunk : chunks) {
            var response = chatModel.call(
                new Prompt("Summarize this text: " + chunk)
            );
            summaries.add(response.getResult().getOutput().getContent());
        }

        // Combine summaries
        String combinedSummary = String.join("\n\n", summaries);

        // Final summary of summaries
        var finalResponse = chatModel.call(
            new Prompt("Create a comprehensive summary from these summaries: " + combinedSummary)
        );

        return List.of(finalResponse.getResult().getOutput().getContent());
    }

    private List<String> splitIntoChunks(String text, int chunkSize) {
        List<String> chunks = new ArrayList<>();
        int start = 0;

        while (start < text.length()) {
            int end = Math.min(start + chunkSize, text.length());

            // Try to break at sentence boundary
            if (end < text.length()) {
                int lastPeriod = text.lastIndexOf('.', end);
                if (lastPeriod > start) {
                    end = lastPeriod + 1;
                }
            }

            chunks.add(text.substring(start, end).trim());
            start = end;
        }

        return chunks;
    }
}

Context Window Management

public class ContextWindowManager {
    private final Deque<String> conversationHistory = new ArrayDeque<>();
    private static final int MAX_HISTORY_TOKENS = 3000;

    public String chatWithHistory(String userMessage) {
        // Add user message
        conversationHistory.addLast("User: " + userMessage);

        // Build context from history
        String context = buildContext();

        // Generate response
        var response = chatModel.call(new Prompt(context));
        String assistantMessage = response.getResult().getOutput().getContent();

        // Add assistant response
        conversationHistory.addLast("Assistant: " + assistantMessage);

        // Trim history if too long
        trimHistory();

        return assistantMessage;
    }

    private String buildContext() {
        return String.join("\n", conversationHistory);
    }

    private void trimHistory() {
        while (estimateTokens(buildContext()) > MAX_HISTORY_TOKENS) {
            conversationHistory.removeFirst();  // Remove oldest messages
        }
    }

    private int estimateTokens(String text) {
        return text.length() / 4;
    }
}

Multimodal Inputs

Image + Text Input

import org.springframework.ai.chat.messages.UserMessage;
import org.springframework.ai.model.Media;
import org.springframework.core.io.ClassPathResource;
import org.springframework.util.MimeTypeUtils;

public class MultimodalHandler {

    public String analyzeImageWithText(String question, String imagePath) {
        var imageResource = new ClassPathResource(imagePath);

        var userMessage = new UserMessage(
            question,
            List.of(new Media(MimeTypeUtils.IMAGE_JPEG, imageResource))
        );

        var options = OpenAiChatOptions.builder()
            .model(OpenAiApi.ChatModel.GPT_4_O.getValue())
            .build();

        var response = chatModel.call(new Prompt(List.of(userMessage), options));
        return response.getResult().getOutput().getContent();
    }

    // Handle multiple images
    public String analyzeMultipleImages(String question, List<String> imagePaths) {
        List<Media> mediaList = imagePaths.stream()
            .map(path -> new Media(
                MimeTypeUtils.IMAGE_JPEG,
                new ClassPathResource(path)
            ))
            .toList();

        var userMessage = new UserMessage(question, mediaList);
        var response = chatModel.call(new Prompt(List.of(userMessage)));

        return response.getResult().getOutput().getContent();
    }
}

Audio Input Handling

public class AudioInputHandler {

    public String processAudioWithContext(String audioPath, String context) {
        // First, transcribe the audio
        var audioFile = new FileSystemResource(audioPath);
        String transcript = transcriptionModel.call(audioFile);

        // Then, process with context
        String prompt = String.format(
            "Context: %s\n\nTranscript: %s\n\nQuestion: What is the main topic?",
            context, transcript
        );

        var response = chatModel.call(new Prompt(prompt));
        return response.getResult().getOutput().getContent();
    }
}

Tool Calling Edge Cases

Handling Tool Call Failures

public class RobustToolCalling {

    public String callWithFallback(String prompt) {
        FunctionCallback weatherCallback = FunctionCallback.builder()
            .function("get_weather", "Get weather")
            .inputType(WeatherRequest.class)
            .apply(request -> {
                try {
                    return fetchWeather(request.location());
                } catch (Exception e) {
                    log.error("Weather API failed", e);
                    return new WeatherResponse(
                        request.location(),
                        "unavailable",
                        "Weather data temporarily unavailable"
                    );
                }
            })
            .build();

        var options = OpenAiChatOptions.builder()
            .tools(List.of(weatherTool))
            .toolCallbacks(Map.of("get_weather", weatherCallback))
            .build();

        return chatModel.call(new Prompt(prompt, options))
            .getResult()
            .getOutput()
            .getContent();
    }
}

Parallel Tool Calls

public class ParallelToolHandler {

    public String handleParallelTools(String prompt) {
        var options = OpenAiChatOptions.builder()
            .tools(List.of(weatherTool, newsTool, stockTool))
            .toolCallbacks(Map.of(
                "get_weather", weatherCallback,
                "get_news", newsCallback,
                "get_stock", stockCallback
            ))
            .parallelToolCalls(true)  // Enable parallel execution
            .build();

        return chatModel.call(new Prompt(prompt, options))
            .getResult()
            .getOutput()
            .getContent();
    }
}

Tool Call Timeout Handling

public class TimeoutAwareToolCalling {

    public String callWithTimeout(String prompt) {
        FunctionCallback slowCallback = FunctionCallback.builder()
            .function("slow_operation", "Slow operation")
            .inputType(SlowRequest.class)
            .apply(request -> {
                try {
                    return CompletableFuture.supplyAsync(() -> performSlowOperation(request))
                        .orTimeout(5, TimeUnit.SECONDS)
                        .get();
                } catch (TimeoutException e) {
                    return new SlowResponse("Operation timed out");
                } catch (Exception e) {
                    return new SlowResponse("Operation failed");
                }
            })
            .build();

        var options = OpenAiChatOptions.builder()
            .tools(List.of(slowTool))
            .toolCallbacks(Map.of("slow_operation", slowCallback))
            .build();

        return chatModel.call(new Prompt(prompt, options))
            .getResult()
            .getOutput()
            .getContent();
    }
}

Error Recovery

Graceful Degradation

@Service
public class GracefulDegradationService {
    private final OpenAiChatModel primaryModel;
    private final OpenAiChatModel fallbackModel;  // Cheaper model

    public String generateWithFallback(String prompt) {
        try {
            // Try primary model (GPT-4)
            return primaryModel.call(new Prompt(prompt))
                .getResult()
                .getOutput()
                .getContent();

        } catch (OpenAiApiClientErrorException e) {
            if (e.getStatusCode() == 429) {  // Rate limit
                log.warn("Primary model rate limited, using fallback");

                // Use cheaper fallback model (GPT-3.5)
                return fallbackModel.call(new Prompt(prompt))
                    .getResult()
                    .getOutput()
                    .getContent();
            }
            throw e;
        }
    }
}

Partial Response Recovery

public class PartialResponseHandler {

    public String handlePartialResponse(String prompt) {
        try {
            var response = chatModel.call(new Prompt(prompt));
            var finishReason = response.getMetadata().getFinishReason();

            if ("length".equals(finishReason)) {
                // Response was cut off due to max tokens
                String partial = response.getResult().getOutput().getContent();

                // Request continuation
                String continuation = chatModel.call(
                    new Prompt("Continue from: " + partial)
                ).getResult().getOutput().getContent();

                return partial + continuation;
            }

            return response.getResult().getOutput().getContent();

        } catch (Exception e) {
            log.error("Error generating response", e);
            return "Unable to generate complete response";
        }
    }
}

Performance Issues

Slow Response Times

public class PerformanceOptimizer {

    // Use streaming for perceived performance
    public void optimizeWithStreaming(String prompt) {
        chatModel.stream(new Prompt(prompt))
            .subscribe(response -> {
                // Display chunks as they arrive
                String content = response.getResult().getOutput().getContent();
                if (content != null) {
                    displayChunk(content);  // Update UI immediately
                }
            });
    }

    // Parallel requests for multiple prompts
    public List<String> parallelGeneration(List<String> prompts) {
        return prompts.parallelStream()
            .map(prompt -> chatModel.call(new Prompt(prompt)))
            .map(response -> response.getResult().getOutput().getContent())
            .toList();
    }

    // Cache frequent requests
    @Cacheable("ai-responses")
    public String cachedGeneration(String prompt) {
        return chatModel.call(new Prompt(prompt))
            .getResult()
            .getOutput()
            .getContent();
    }
}

Memory Issues with Large Responses

public class MemoryEfficientHandler {

    // Stream to file instead of memory
    public void streamToFile(String prompt, Path outputPath) throws IOException {
        try (var writer = Files.newBufferedWriter(outputPath)) {
            chatModel.stream(new Prompt(prompt))
                .doOnNext(response -> {
                    try {
                        String content = response.getResult().getOutput().getContent();
                        if (content != null) {
                            writer.write(content);
                            writer.flush();
                        }
                    } catch (IOException e) {
                        throw new UncheckedIOException(e);
                    }
                })
                .blockLast();
        }
    }
}

Compatibility Issues

Azure OpenAI Compatibility

// Configure for Azure OpenAI
var azureApi = OpenAiApi.builder()
    .apiKey(azureApiKey)
    .baseUrl("https://your-resource.openai.azure.com")
    .build();

var chatModel = OpenAiChatModel.builder()
    .openAiApi(azureApi)
    .build();

Custom OpenAI-Compatible Endpoints

// Use with LocalAI, Ollama, or other OpenAI-compatible services
var customApi = OpenAiApi.builder()
    .apiKey("not-needed-for-local")
    .baseUrl("http://localhost:8080/v1")
    .build();

var chatModel = OpenAiChatModel.builder()
    .openAiApi(customApi)
    .defaultOptions(OpenAiChatOptions.builder()
        .model("custom-model-name")
        .build())
    .build();

Version Compatibility

// Handle deprecated features
public class CompatibilityHandler {

    public String handleDeprecatedFeatures() {
        var options = OpenAiChatOptions.builder()
            .model(OpenAiApi.ChatModel.GPT_4_O.getValue())
            // Use 'tools' instead of deprecated 'functions'
            .tools(List.of(tool))
            .toolCallbacks(Map.of("tool_name", callback))
            .build();

        return chatModel.call(new Prompt("prompt", options))
            .getResult()
            .getOutput()
            .getContent();
    }
}

Common Troubleshooting

Issue: "Invalid API Key"

Cause: API key not set or incorrect

Solution:

// Verify API key is set
String apiKey = System.getenv("OPENAI_API_KEY");
if (apiKey == null || apiKey.isBlank()) {
    throw new IllegalStateException("OPENAI_API_KEY not set");
}

Issue: "Rate Limit Exceeded"

Cause: Too many requests in short time

Solution:

// Implement exponential backoff
var retryTemplate = RetryTemplate.builder()
    .maxAttempts(5)
    .exponentialBackoff(1000, 2.0, 30000)
    .build();

Issue: "Context Length Exceeded"

Cause: Prompt + response exceeds model's context window

Solution:

// Truncate prompt or use chunking
String truncated = truncateToFit(prompt, maxTokens);

Issue: "Connection Timeout"

Cause: Network issues or slow API response

Solution:

// Increase timeout
requestFactory.setReadTimeout(Duration.ofSeconds(120));

Issue: "Streaming Stops Prematurely"

Cause: Network interruption or client-side error

Solution:

// Add retry logic for streams
chatModel.stream(prompt)
    .retry(3)
    .onErrorResume(e -> Flux.just(fallbackResponse))
    .subscribe(response -> process(response));

Additional Resources

  • Real-World Scenarios
  • Chat Models Reference
  • API Clients Reference

Install with Tessl CLI

npx tessl i tessl/maven-org-springframework-ai--spring-ai-openai@1.1.0

docs

examples

edge-cases.md

real-world-scenarios.md

index.md

tile.json