Core model interfaces and abstractions for Spring AI framework providing portable API for chat, embeddings, images, audio, and tool calling across multiple AI providers
This document covers edge cases, error scenarios, and advanced usage patterns for Spring AI Model.
@Component
public class RobustTools {
@Tool(description = "Get data that might be malformed")
public String getData(@ToolParam(description = "Query") String query) {
try {
// Your implementation
Object result = queryData(query);
return new ObjectMapper().writeValueAsString(result);
} catch (JsonProcessingException e) {
// Return error as valid JSON
return "{\"error\": \"" + e.getMessage().replace("\"", "\\\"") + "\"}";
}
}
}public Flux<String> resilientStream(String message) {
return chatModel.stream(new Prompt(message))
.map(response -> response.getResult().getOutput().getText())
.onErrorResume(e -> {
log.error("Streaming error", e);
return Flux.just("[Error: " + e.getMessage() + "]");
})
.timeout(Duration.ofSeconds(30))
.retry(2);
}public String safeChat(String message) {
ChatResponse response = chatModel.call(new Prompt(message));
if (response == null || response.getResult() == null) {
return "No response received";
}
AssistantMessage output = response.getResult().getOutput();
String text = output.getText();
if (text == null || text.isBlank()) {
// Check if tool calls were made
if (output.hasToolCalls()) {
return "[Model requested tool calls]";
}
return "[Empty response]";
}
return text;
}public String handleLongInput(String longMessage) {
ChatOptions options = ChatOptions.builder()
.maxTokens(2000)
.build();
Prompt prompt = new Prompt(longMessage, options);
ChatResponse response = chatModel.call(prompt);
// Check if truncated
String finishReason = response.getResult().getMetadata().getFinishReason();
if ("length".equals(finishReason) || "max_tokens".equals(finishReason)) {
log.warn("Response was truncated due to token limit");
// Option 1: Continue from where it left off
List<Message> messages = List.of(
new UserMessage(longMessage),
response.getResult().getOutput(),
new UserMessage("Please continue")
);
ChatResponse continuation = chatModel.call(new Prompt(messages));
return response.getResult().getOutput().getText() +
continuation.getResult().getOutput().getText();
}
return response.getResult().getOutput().getText();
}public String adaptiveChat(String message) {
// Estimate input tokens (rough estimate: ~4 chars per token)
int estimatedInputTokens = message.length() / 4;
int modelMaxTokens = 4096; // Example: GPT-4 Turbo context
int desiredOutputTokens = 1000;
// Ensure we don't exceed context limit
if (estimatedInputTokens + desiredOutputTokens > modelMaxTokens) {
int allowedOutputTokens = modelMaxTokens - estimatedInputTokens - 100; // Safety margin
desiredOutputTokens = Math.max(100, allowedOutputTokens);
}
ChatOptions options = ChatOptions.builder()
.maxTokens(desiredOutputTokens)
.build();
return chatModel.call(new Prompt(message, options))
.getResult().getOutput().getText();
}public String executeWithFallback(String message) {
ChatResponse response = chatModel.call(new Prompt(message));
if (response.getResult().getOutput().hasToolCalls()) {
List<ToolResponseMessage.ToolResponse> responses = new ArrayList<>();
for (AssistantMessage.ToolCall toolCall :
response.getResult().getOutput().getToolCalls()) {
ToolCallback tool = toolResolver.resolve(toolCall.name());
String result;
if (tool == null) {
// Tool not found - provide error response
result = "{\"error\": \"Tool not found: " + toolCall.name() + "\"}";
log.warn("Tool not found: {}", toolCall.name());
} else {
try {
result = tool.call(toolCall.arguments());
} catch (Exception e) {
result = "{\"error\": \"" + e.getMessage() + "\"}";
log.error("Tool execution failed", e);
}
}
responses.add(new ToolResponseMessage.ToolResponse(
toolCall.id(),
toolCall.name(),
result
));
}
// Continue conversation with tool results
List<Message> messages = List.of(
response.getResult().getOutput(),
new ToolResponseMessage(responses)
);
ChatResponse finalResponse = chatModel.call(new Prompt(messages));
return finalResponse.getResult().getOutput().getText();
}
return response.getResult().getOutput().getText();
}public class CircularToolCallPrevention {
private static final int MAX_TOOL_ITERATIONS = 10;
public String executeWithCircularPrevention(String message) {
List<Message> messages = new ArrayList<>();
messages.add(new UserMessage(message));
Set<String> toolCallHistory = new HashSet<>();
int iteration = 0;
while (iteration < MAX_TOOL_ITERATIONS) {
Prompt prompt = new Prompt(messages);
ChatResponse response = chatModel.call(prompt);
AssistantMessage assistant = response.getResult().getOutput();
messages.add(assistant);
if (!assistant.hasToolCalls()) {
return assistant.getText();
}
// Check for circular calls
String callSignature = assistant.getToolCalls().stream()
.map(tc -> tc.name() + ":" + tc.arguments())
.sorted()
.collect(Collectors.joining("|"));
if (toolCallHistory.contains(callSignature)) {
log.warn("Circular tool call detected, breaking loop");
messages.add(new UserMessage(
"Previous tool calls didn't help. Please provide a direct answer."
));
// One more attempt without tools
ChatOptions noToolOptions = ChatOptions.builder()
.temperature(0.7)
.build();
ChatResponse finalResponse = chatModel.call(new Prompt(messages, noToolOptions));
return finalResponse.getResult().getOutput().getText();
}
toolCallHistory.add(callSignature);
// Execute tools...
List<ToolResponseMessage.ToolResponse> toolResponses = executeToolCalls(assistant);
messages.add(new ToolResponseMessage(toolResponses));
iteration++;
}
return "Maximum tool iterations reached";
}
}public List<float[]> safeEmbed(List<String> texts) {
// Filter and handle empty texts
List<String> validTexts = texts.stream()
.map(text -> text == null || text.isBlank() ? "[EMPTY]" : text)
.toList();
return embeddingModel.embed(validTexts);
}public float[] normalizeEmbedding(float[] embedding, int targetDimensions) {
if (embedding.length == targetDimensions) {
return embedding;
}
if (embedding.length < targetDimensions) {
// Pad with zeros
float[] padded = new float[targetDimensions];
System.arraycopy(embedding, 0, padded, 0, embedding.length);
return padded;
}
// Truncate
float[] truncated = new float[targetDimensions];
System.arraycopy(embedding, 0, truncated, 0, targetDimensions);
return truncated;
}public class CustomModerationService {
private final ModerationModel moderationModel;
public ModerationDecision moderateWithCustomThresholds(String content) {
ModerationResponse response = moderationModel.call(new ModerationPrompt(content));
Moderation moderation = response.getResult().getOutput();
if (moderation.isFlagged()) {
return ModerationDecision.BLOCKED;
}
// Check custom score thresholds
CategoryScores scores = moderation.getCategoryScores();
// High threshold for review (0.5-0.8)
if (scores.getHate() > 0.5 || scores.getViolence() > 0.5) {
return ModerationDecision.REQUIRES_REVIEW;
}
// Low threshold for warning (0.3-0.5)
if (scores.getHate() > 0.3 || scores.getViolence() > 0.3) {
return ModerationDecision.WARNING;
}
return ModerationDecision.APPROVED;
}
enum ModerationDecision {
APPROVED, WARNING, REQUIRES_REVIEW, BLOCKED
}
}public boolean moderateAcrossProviders(String content) {
ModerationResponse response = moderationModel.call(new ModerationPrompt(content));
Moderation moderation = response.getResult().getOutput();
Categories categories = moderation.getCategories();
// Some providers may not support all categories
// Use safe navigation
boolean hasConcerns =
(categories.isHate() != null && categories.isHate()) ||
(categories.isViolence() != null && categories.isViolence()) ||
(categories.isSexual() != null && categories.isSexual());
// Log unsupported categories
if (categories.isFinancial() == null) {
log.debug("Provider doesn't support financial category moderation");
}
return !hasConcerns;
}public String completeStreamingResponse(String message) {
StringBuilder fullResponse = new StringBuilder();
CountDownLatch latch = new CountDownLatch(1);
AtomicReference<Throwable> error = new AtomicReference<>();
chatModel.stream(new Prompt(message))
.map(response -> response.getResult().getOutput().getText())
.subscribe(
chunk -> fullResponse.append(chunk),
e -> {
error.set(e);
latch.countDown();
},
latch::countDown
);
try {
latch.await(60, TimeUnit.SECONDS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return fullResponse.toString() + " [INTERRUPTED]";
}
if (error.get() != null) {
log.error("Streaming failed", error.get());
return fullResponse.toString() + " [ERROR: " + error.get().getMessage() + "]";
}
return fullResponse.toString();
}public Flux<String> streamWithTimeout(String message) {
return chatModel.stream(new Prompt(message))
.map(response -> response.getResult().getOutput().getText())
.timeout(Duration.ofSeconds(30), Flux.just("[TIMEOUT]"))
.onErrorResume(e -> {
if (e instanceof TimeoutException) {
return Flux.just(" [Response timed out]");
}
return Flux.just(" [Error: " + e.getMessage() + "]");
});
}public String analyzeMultipleMediaTypes(List<String> imageUrls, String audioUrl) {
List<Media> media = new ArrayList<>();
// Add images
for (String url : imageUrls) {
media.add(new Media(MimeType.IMAGE_JPEG, url));
}
// Add audio if supported
if (audioUrl != null) {
media.add(new Media(MimeType.AUDIO_MP3, audioUrl));
}
UserMessage message = new UserMessage(
"Analyze all provided media",
media
);
return chatModel.call(new Prompt(message))
.getResult().getOutput().getText();
}public String analyzeImageSafely(byte[] imageBytes) {
// Check size limits (most providers have limits around 20MB)
int maxSize = 20 * 1024 * 1024; // 20MB
if (imageBytes.length > maxSize) {
// Compress or reject
throw new IllegalArgumentException("Image too large: " + imageBytes.length);
}
String base64 = Base64.getEncoder().encodeToString(imageBytes);
Media media = new Media(MimeType.IMAGE_JPEG, base64);
UserMessage message = new UserMessage("Analyze this image", List.of(media));
return chatModel.call(new Prompt(message))
.getResult().getOutput().getText();
}public class SmartChatMemory {
private final ChatMemory chatMemory;
private final ChatModel chatModel;
public String chatWithSmartMemory(String userId, String message) {
List<Message> history = chatMemory.get(userId);
// If history is at limit, summarize old messages
if (history.size() >= 50) {
// Summarize first half of conversation
List<Message> oldMessages = history.subList(0, 25);
String summary = summarizeConversation(oldMessages);
// Replace with summary
chatMemory.clear(userId);
chatMemory.add(userId, new SystemMessage(
"Previous conversation summary: " + summary
));
chatMemory.add(userId, history.subList(25, history.size()));
}
// Continue as normal
// ...
}
private String summarizeConversation(List<Message> messages) {
String conversationText = messages.stream()
.map(Message::getText)
.collect(Collectors.joining("\n"));
return chatModel.call(
"Summarize this conversation concisely: " + conversationText
);
}
}public class ThreadSafeChatMemory {
private final ChatMemory chatMemory;
private final ConcurrentHashMap<String, ReentrantLock> conversationLocks =
new ConcurrentHashMap<>();
public String threadSafeChat(String userId, String message) {
Lock lock = conversationLocks.computeIfAbsent(userId, k -> new ReentrantLock());
lock.lock();
try {
// Safe to access memory
List<Message> history = chatMemory.get(userId);
history.add(new UserMessage(message));
ChatResponse response = chatModel.call(new Prompt(history));
chatMemory.add(userId, new UserMessage(message));
chatMemory.add(userId, response.getResult().getOutput());
return response.getResult().getOutput().getText();
} finally {
lock.unlock();
}
}
}public <T> T robustConvert(String response, Class<T> clazz) {
BeanOutputConverter<T> converter = new BeanOutputConverter<>(clazz);
// Build composite cleaner
CompositeResponseTextCleaner cleaner = CompositeResponseTextCleaner.builder()
.add(new ThinkingTagCleaner())
.add(new MarkdownCodeBlockCleaner())
.add(new WhitespaceCleaner())
.build();
// Try cleaning and converting
try {
String cleaned = cleaner.clean(response);
return converter.convert(cleaned);
} catch (Exception e) {
log.warn("Initial conversion failed, attempting aggressive cleaning");
// Aggressive cleaning - extract only JSON objects
String jsonOnly = extractJsonObject(response);
try {
return converter.convert(jsonOnly);
} catch (Exception e2) {
log.error("Conversion failed even after aggressive cleaning", e2);
throw new ConversionException("Failed to parse response", e2);
}
}
}
private String extractJsonObject(String text) {
int start = text.indexOf('{');
int end = text.lastIndexOf('}');
if (start >= 0 && end > start) {
return text.substring(start, end + 1);
}
return text;
}record FlexiblePerson(
String name, // Required
@JsonProperty(required = false) Integer age, // Optional
@JsonProperty(required = false) String email // Optional
) {}
BeanOutputConverter<FlexiblePerson> converter = new BeanOutputConverter<>(FlexiblePerson.class);
// Configure ObjectMapper to handle nulls
ObjectMapper mapper = new ObjectMapper();
mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
BeanOutputConverter<FlexiblePerson> robustConverter =
new BeanOutputConverter<>(FlexiblePerson.class, mapper);public String chatWithProviderFeatures(String message) {
ChatOptions options = ChatOptions.builder()
.temperature(0.7)
.maxTokens(1000)
.build();
// Provider-specific options
if (options instanceof OpenAiChatOptions openAiOptions) {
openAiOptions.setResponseFormat(new ResponseFormat("json_object"));
openAiOptions.setSeed(42); // For reproducibility
} else if (options instanceof AnthropicChatOptions anthropicOptions) {
anthropicOptions.setTopK(40);
}
return chatModel.call(new Prompt(message, options))
.getResult().getOutput().getText();
}@Service
public class MultiProviderFallback {
@Autowired @Qualifier("openai")
private ChatModel primaryModel;
@Autowired @Qualifier("anthropic")
private ChatModel fallbackModel;
public String chatWithFallback(String message) {
try {
return primaryModel.call(message);
} catch (Exception e) {
log.warn("Primary model failed, using fallback", e);
try {
return fallbackModel.call(message);
} catch (Exception e2) {
log.error("Both models failed", e2);
throw new RuntimeException("All models unavailable", e2);
}
}
}
}public void processResponseSafely(ChatResponse response) {
ChatResponseMetadata metadata = response.getMetadata();
// Usage might be null for some providers/scenarios
Usage usage = metadata.getUsage();
if (usage != null) {
Integer tokens = usage.getTotalTokens();
if (tokens != null) {
log.info("Tokens used: {}", tokens);
}
}
// Rate limit might not be available
RateLimit rateLimit = metadata.getRateLimit();
if (rateLimit != null && rateLimit.getRequestsRemaining() != null) {
log.info("Requests remaining: {}", rateLimit.getRequestsRemaining());
}
}public void extractProviderMetadata(ChatResponse response) {
ChatResponseMetadata metadata = response.getMetadata();
// Access provider-specific metadata safely
Object nativeMetadata = metadata.get("providerMetadata");
if (nativeMetadata instanceof OpenAiMetadata openAiMeta) {
// Access OpenAI-specific fields
String systemFingerprint = openAiMeta.getSystemFingerprint();
log.info("System fingerprint: {}", systemFingerprint);
}
}@Service
public class RateLimitedChatService {
private final ChatModel chatModel;
private final Semaphore rateLimiter = new Semaphore(10); // Max 10 concurrent
public String concurrentSafeChat(String message) throws InterruptedException {
rateLimiter.acquire();
try {
return chatModel.call(message);
} finally {
rateLimiter.release();
}
}
public CompletableFuture<String> asyncChat(String message) {
return CompletableFuture.supplyAsync(() -> {
try {
return concurrentSafeChat(message);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException("Interrupted", e);
}
});
}
}public String transcribeWithCleanup(MultipartFile uploadedFile) {
File tempFile = null;
try {
// Save to temp file
tempFile = File.createTempFile("audio", ".mp3");
uploadedFile.transferTo(tempFile);
// Transcribe
Resource audioResource = new FileSystemResource(tempFile);
return transcriptionModel.transcribe(audioResource);
} catch (Exception e) {
log.error("Transcription failed", e);
throw new RuntimeException("Transcription error", e);
} finally {
// Always cleanup
if (tempFile != null && tempFile.exists()) {
boolean deleted = tempFile.delete();
if (!deleted) {
log.warn("Failed to delete temp file: {}", tempFile.getPath());
}
}
}
}public String handleLongPrompt(String longPrompt) {
// Estimate tokens
int estimatedTokens = longPrompt.length() / 4;
int maxContextTokens = 8000; // Example limit
if (estimatedTokens > maxContextTokens) {
// Strategy 1: Truncate
int maxChars = maxContextTokens * 4;
longPrompt = longPrompt.substring(0, Math.min(maxChars, longPrompt.length()));
// Strategy 2: Summarize first
String summary = chatModel.call(
"Summarize this in 500 words: " + longPrompt
);
return chatModel.call("Based on this summary, answer: " + summary);
}
return chatModel.call(longPrompt);
}public String safeTemplateRendering(String userInput) {
// Escape special characters in template variables
String escaped = userInput
.replace("{", "\\{")
.replace("}", "\\}");
PromptTemplate template = new PromptTemplate(
"Process this input: {input}"
);
Prompt prompt = template.create(Map.of("input", escaped));
return chatModel.call(prompt).getResult().getOutput().getText();
}public String chatWithSafeObservability(String message) {
try {
Prompt prompt = new Prompt(message);
ChatModelObservationContext context =
new ChatModelObservationContext(prompt, chatModel);
return Observation.createNotStarted("chat.call", context, observationRegistry)
.observe(() -> {
try {
ChatResponse response = chatModel.call(prompt);
context.setResponse(response);
return response.getResult().getOutput().getText();
} catch (Exception e) {
context.setError(e);
throw e;
}
});
} catch (Exception e) {
// Observation failed, but don't fail the operation
log.warn("Observation setup failed, proceeding without observation", e);
return chatModel.call(message);
}
}For more advanced scenarios, consult: