OpenAI models support for Spring AI, providing comprehensive integration for chat completion, embeddings, image generation, audio transcription, text-to-speech, and content moderation capabilities within Spring Boot applications.
Advanced scenarios, edge cases, and solutions for common issues.
import reactor.core.publisher.Flux;
import java.time.Duration;
public class StreamingEdgeCases {
// Handle stream timeouts
public void handleStreamTimeout() {
chatModel.stream(new Prompt("Long response"))
.timeout(Duration.ofSeconds(30))
.onErrorResume(TimeoutException.class, e -> {
log.warn("Stream timeout, returning partial response");
return Flux.just(createFallbackResponse());
})
.subscribe(response -> processChunk(response));
}
// Handle stream interruptions
public void handleStreamInterruption() {
chatModel.stream(new Prompt("Generate content"))
.doOnError(error -> log.error("Stream error", error))
.retry(3)
.onErrorReturn(createFallbackResponse())
.subscribe(response -> processChunk(response));
}
// Collect full response from stream
public String collectFullStream(String prompt) {
StringBuilder fullResponse = new StringBuilder();
chatModel.stream(new Prompt(prompt))
.doOnNext(response -> {
String content = response.getResult().getOutput().getContent();
if (content != null) {
fullResponse.append(content);
}
})
.blockLast(); // Wait for completion
return fullResponse.toString();
}
}import reactor.core.scheduler.Schedulers;
// Handle backpressure in high-throughput scenarios
public Flux<String> handleBackpressure(String prompt) {
return chatModel.stream(new Prompt(prompt))
.publishOn(Schedulers.boundedElastic())
.onBackpressureBuffer(100, BufferOverflowStrategy.DROP_OLDEST)
.map(response -> response.getResult().getOutput().getContent())
.filter(Objects::nonNull);
}public class TokenLimitHandler {
private static final int MAX_TOKENS = 4096;
public String generateWithTruncation(String longPrompt) {
// Estimate tokens (rough: 1 token ≈ 4 characters)
int estimatedTokens = longPrompt.length() / 4;
if (estimatedTokens > MAX_TOKENS) {
// Truncate to fit
int maxChars = MAX_TOKENS * 4;
longPrompt = longPrompt.substring(0, maxChars) + "...";
}
return chatModel.call(new Prompt(longPrompt))
.getResult()
.getOutput()
.getContent();
}
}public class DocumentChunker {
public List<String> processLongDocument(String document) {
List<String> chunks = splitIntoChunks(document, 3000);
List<String> summaries = new ArrayList<>();
for (String chunk : chunks) {
var response = chatModel.call(
new Prompt("Summarize this text: " + chunk)
);
summaries.add(response.getResult().getOutput().getContent());
}
// Combine summaries
String combinedSummary = String.join("\n\n", summaries);
// Final summary of summaries
var finalResponse = chatModel.call(
new Prompt("Create a comprehensive summary from these summaries: " + combinedSummary)
);
return List.of(finalResponse.getResult().getOutput().getContent());
}
private List<String> splitIntoChunks(String text, int chunkSize) {
List<String> chunks = new ArrayList<>();
int start = 0;
while (start < text.length()) {
int end = Math.min(start + chunkSize, text.length());
// Try to break at sentence boundary
if (end < text.length()) {
int lastPeriod = text.lastIndexOf('.', end);
if (lastPeriod > start) {
end = lastPeriod + 1;
}
}
chunks.add(text.substring(start, end).trim());
start = end;
}
return chunks;
}
}public class ContextWindowManager {
private final Deque<String> conversationHistory = new ArrayDeque<>();
private static final int MAX_HISTORY_TOKENS = 3000;
public String chatWithHistory(String userMessage) {
// Add user message
conversationHistory.addLast("User: " + userMessage);
// Build context from history
String context = buildContext();
// Generate response
var response = chatModel.call(new Prompt(context));
String assistantMessage = response.getResult().getOutput().getContent();
// Add assistant response
conversationHistory.addLast("Assistant: " + assistantMessage);
// Trim history if too long
trimHistory();
return assistantMessage;
}
private String buildContext() {
return String.join("\n", conversationHistory);
}
private void trimHistory() {
while (estimateTokens(buildContext()) > MAX_HISTORY_TOKENS) {
conversationHistory.removeFirst(); // Remove oldest messages
}
}
private int estimateTokens(String text) {
return text.length() / 4;
}
}import org.springframework.ai.chat.messages.UserMessage;
import org.springframework.ai.model.Media;
import org.springframework.core.io.ClassPathResource;
import org.springframework.util.MimeTypeUtils;
public class MultimodalHandler {
public String analyzeImageWithText(String question, String imagePath) {
var imageResource = new ClassPathResource(imagePath);
var userMessage = new UserMessage(
question,
List.of(new Media(MimeTypeUtils.IMAGE_JPEG, imageResource))
);
var options = OpenAiChatOptions.builder()
.model(OpenAiApi.ChatModel.GPT_4_O.getValue())
.build();
var response = chatModel.call(new Prompt(List.of(userMessage), options));
return response.getResult().getOutput().getContent();
}
// Handle multiple images
public String analyzeMultipleImages(String question, List<String> imagePaths) {
List<Media> mediaList = imagePaths.stream()
.map(path -> new Media(
MimeTypeUtils.IMAGE_JPEG,
new ClassPathResource(path)
))
.toList();
var userMessage = new UserMessage(question, mediaList);
var response = chatModel.call(new Prompt(List.of(userMessage)));
return response.getResult().getOutput().getContent();
}
}public class AudioInputHandler {
public String processAudioWithContext(String audioPath, String context) {
// First, transcribe the audio
var audioFile = new FileSystemResource(audioPath);
String transcript = transcriptionModel.call(audioFile);
// Then, process with context
String prompt = String.format(
"Context: %s\n\nTranscript: %s\n\nQuestion: What is the main topic?",
context, transcript
);
var response = chatModel.call(new Prompt(prompt));
return response.getResult().getOutput().getContent();
}
}public class RobustToolCalling {
public String callWithFallback(String prompt) {
FunctionCallback weatherCallback = FunctionCallback.builder()
.function("get_weather", "Get weather")
.inputType(WeatherRequest.class)
.apply(request -> {
try {
return fetchWeather(request.location());
} catch (Exception e) {
log.error("Weather API failed", e);
return new WeatherResponse(
request.location(),
"unavailable",
"Weather data temporarily unavailable"
);
}
})
.build();
var options = OpenAiChatOptions.builder()
.tools(List.of(weatherTool))
.toolCallbacks(Map.of("get_weather", weatherCallback))
.build();
return chatModel.call(new Prompt(prompt, options))
.getResult()
.getOutput()
.getContent();
}
}public class ParallelToolHandler {
public String handleParallelTools(String prompt) {
var options = OpenAiChatOptions.builder()
.tools(List.of(weatherTool, newsTool, stockTool))
.toolCallbacks(Map.of(
"get_weather", weatherCallback,
"get_news", newsCallback,
"get_stock", stockCallback
))
.parallelToolCalls(true) // Enable parallel execution
.build();
return chatModel.call(new Prompt(prompt, options))
.getResult()
.getOutput()
.getContent();
}
}public class TimeoutAwareToolCalling {
public String callWithTimeout(String prompt) {
FunctionCallback slowCallback = FunctionCallback.builder()
.function("slow_operation", "Slow operation")
.inputType(SlowRequest.class)
.apply(request -> {
try {
return CompletableFuture.supplyAsync(() -> performSlowOperation(request))
.orTimeout(5, TimeUnit.SECONDS)
.get();
} catch (TimeoutException e) {
return new SlowResponse("Operation timed out");
} catch (Exception e) {
return new SlowResponse("Operation failed");
}
})
.build();
var options = OpenAiChatOptions.builder()
.tools(List.of(slowTool))
.toolCallbacks(Map.of("slow_operation", slowCallback))
.build();
return chatModel.call(new Prompt(prompt, options))
.getResult()
.getOutput()
.getContent();
}
}@Service
public class GracefulDegradationService {
private final OpenAiChatModel primaryModel;
private final OpenAiChatModel fallbackModel; // Cheaper model
public String generateWithFallback(String prompt) {
try {
// Try primary model (GPT-4)
return primaryModel.call(new Prompt(prompt))
.getResult()
.getOutput()
.getContent();
} catch (OpenAiApiClientErrorException e) {
if (e.getStatusCode() == 429) { // Rate limit
log.warn("Primary model rate limited, using fallback");
// Use cheaper fallback model (GPT-3.5)
return fallbackModel.call(new Prompt(prompt))
.getResult()
.getOutput()
.getContent();
}
throw e;
}
}
}public class PartialResponseHandler {
public String handlePartialResponse(String prompt) {
try {
var response = chatModel.call(new Prompt(prompt));
var finishReason = response.getMetadata().getFinishReason();
if ("length".equals(finishReason)) {
// Response was cut off due to max tokens
String partial = response.getResult().getOutput().getContent();
// Request continuation
String continuation = chatModel.call(
new Prompt("Continue from: " + partial)
).getResult().getOutput().getContent();
return partial + continuation;
}
return response.getResult().getOutput().getContent();
} catch (Exception e) {
log.error("Error generating response", e);
return "Unable to generate complete response";
}
}
}public class PerformanceOptimizer {
// Use streaming for perceived performance
public void optimizeWithStreaming(String prompt) {
chatModel.stream(new Prompt(prompt))
.subscribe(response -> {
// Display chunks as they arrive
String content = response.getResult().getOutput().getContent();
if (content != null) {
displayChunk(content); // Update UI immediately
}
});
}
// Parallel requests for multiple prompts
public List<String> parallelGeneration(List<String> prompts) {
return prompts.parallelStream()
.map(prompt -> chatModel.call(new Prompt(prompt)))
.map(response -> response.getResult().getOutput().getContent())
.toList();
}
// Cache frequent requests
@Cacheable("ai-responses")
public String cachedGeneration(String prompt) {
return chatModel.call(new Prompt(prompt))
.getResult()
.getOutput()
.getContent();
}
}public class MemoryEfficientHandler {
// Stream to file instead of memory
public void streamToFile(String prompt, Path outputPath) throws IOException {
try (var writer = Files.newBufferedWriter(outputPath)) {
chatModel.stream(new Prompt(prompt))
.doOnNext(response -> {
try {
String content = response.getResult().getOutput().getContent();
if (content != null) {
writer.write(content);
writer.flush();
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
})
.blockLast();
}
}
}// Configure for Azure OpenAI
var azureApi = OpenAiApi.builder()
.apiKey(azureApiKey)
.baseUrl("https://your-resource.openai.azure.com")
.build();
var chatModel = OpenAiChatModel.builder()
.openAiApi(azureApi)
.build();// Use with LocalAI, Ollama, or other OpenAI-compatible services
var customApi = OpenAiApi.builder()
.apiKey("not-needed-for-local")
.baseUrl("http://localhost:8080/v1")
.build();
var chatModel = OpenAiChatModel.builder()
.openAiApi(customApi)
.defaultOptions(OpenAiChatOptions.builder()
.model("custom-model-name")
.build())
.build();// Handle deprecated features
public class CompatibilityHandler {
public String handleDeprecatedFeatures() {
var options = OpenAiChatOptions.builder()
.model(OpenAiApi.ChatModel.GPT_4_O.getValue())
// Use 'tools' instead of deprecated 'functions'
.tools(List.of(tool))
.toolCallbacks(Map.of("tool_name", callback))
.build();
return chatModel.call(new Prompt("prompt", options))
.getResult()
.getOutput()
.getContent();
}
}Cause: API key not set or incorrect
Solution:
// Verify API key is set
String apiKey = System.getenv("OPENAI_API_KEY");
if (apiKey == null || apiKey.isBlank()) {
throw new IllegalStateException("OPENAI_API_KEY not set");
}Cause: Too many requests in short time
Solution:
// Implement exponential backoff
var retryTemplate = RetryTemplate.builder()
.maxAttempts(5)
.exponentialBackoff(1000, 2.0, 30000)
.build();Cause: Prompt + response exceeds model's context window
Solution:
// Truncate prompt or use chunking
String truncated = truncateToFit(prompt, maxTokens);Cause: Network issues or slow API response
Solution:
// Increase timeout
requestFactory.setReadTimeout(Duration.ofSeconds(120));Cause: Network interruption or client-side error
Solution:
// Add retry logic for streams
chatModel.stream(prompt)
.retry(3)
.onErrorResume(e -> Flux.just(fallbackResponse))
.subscribe(response -> process(response));Install with Tessl CLI
npx tessl i tessl/maven-org-springframework-ai--spring-ai-openai