Spring Boot-compatible Ollama integration providing ChatModel and EmbeddingModel implementations for running large language models locally with support for streaming, tool calling, model management, and observability.
Enable models to understand and analyze images alongside text.
Ollama supports multimodal models that can process both text and images. Spring AI Ollama provides seamless integration for vision capabilities, allowing you to send images as base64-encoded data or file resources.
Vision/multimodal models in Ollama:
Use UserMessage.Builder to attach images:
import org.springframework.ai.chat.messages.UserMessage;
import org.springframework.ai.content.Media;
import org.springframework.core.io.ClassPathResource;
import org.springframework.util.MimeTypeUtils;
// Load image from classpath
var imageResource = new ClassPathResource("/images/photo.png");
// Create message with image
UserMessage message = UserMessage.builder()
.text("What do you see in this image?")
.media(List.of(new Media(MimeTypeUtils.IMAGE_PNG, imageResource)))
.build();
// Send to vision model
OllamaChatModel visionModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAVA)
.build())
.build();
ChatResponse response = visionModel.call(new Prompt(message));
System.out.println(response.getResult().getOutput().getText());
// Output: "I see a golden retriever dog playing in a park..."Send multiple images in a single message:
UserMessage message = UserMessage.builder()
.text("Compare these two images. What are the differences?")
.media(List.of(
new Media(MimeTypeUtils.IMAGE_PNG, new ClassPathResource("/image1.png")),
new Media(MimeTypeUtils.IMAGE_JPEG, new ClassPathResource("/image2.jpg"))
))
.build();
ChatResponse response = visionModel.call(new Prompt(message));var imageResource = new FileSystemResource("/path/to/image.jpg");
UserMessage message = UserMessage.builder()
.text("Describe this image")
.media(List.of(new Media(MimeTypeUtils.IMAGE_JPEG, imageResource)))
.build();var imageResource = new UrlResource("https://example.com/image.png");
UserMessage message = UserMessage.builder()
.text("What's in this image?")
.media(List.of(new Media(MimeTypeUtils.IMAGE_PNG, imageResource)))
.build();byte[] imageBytes = Files.readAllBytes(Path.of("/path/to/image.jpg"));
var imageResource = new ByteArrayResource(imageBytes);
UserMessage message = UserMessage.builder()
.text("Analyze this image")
.media(List.of(new Media(MimeTypeUtils.IMAGE_JPEG, imageResource)))
.build();Images are automatically converted to base64 when sent to Ollama:
// Spring AI handles base64 encoding automatically
// You provide Resource, it converts to base64 for the APICommon image formats supported:
MimeTypeUtils.IMAGE_PNG)MimeTypeUtils.IMAGE_JPEG)MimeTypeUtils.IMAGE_GIF)MimeType.valueOf("image/webp"))import org.springframework.util.MimeType;
// WebP support
UserMessage message = UserMessage.builder()
.text("Describe this image")
.media(List.of(new Media(
MimeType.valueOf("image/webp"),
imageResource
)))
.build();General-purpose vision model:
OllamaChatModel visionModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAVA)
.temperature(0.7)
.numPredict(512)
.build())
.build();High-quality vision with strong language understanding:
// 11B model (balanced)
OllamaChatModel llama32Vision = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAMA3_2_VISION_11b)
.temperature(0.7)
.build())
.build();
// 90B model (highest quality)
OllamaChatModel llama32VisionLarge = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAMA3_2_VISION_90b)
.temperature(0.7)
.numCtx(4096) // Larger context
.build())
.build();Multimodal with strong multilingual support:
OllamaChatModel qwenVision = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.QWEN2_5_VL)
.temperature(0.7)
.build())
.build();Efficient edge vision model:
OllamaChatModel moondream = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.MOONDREAM)
.temperature(0.7)
.build())
.build();@Service
public class ImageAnalysisService {
private final OllamaChatModel visionModel;
public ImageAnalysisService(OllamaApi ollamaApi) {
this.visionModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAVA)
.temperature(0.7)
.build())
.build();
}
public String describeImage(Resource imageResource) {
UserMessage message = UserMessage.builder()
.text("Provide a detailed description of this image.")
.media(List.of(new Media(MimeTypeUtils.IMAGE_PNG, imageResource)))
.build();
ChatResponse response = visionModel.call(new Prompt(message));
return response.getResult().getOutput().getText();
}
public String analyzeWithQuestion(Resource imageResource, String question) {
UserMessage message = UserMessage.builder()
.text(question)
.media(List.of(new Media(MimeTypeUtils.IMAGE_PNG, imageResource)))
.build();
ChatResponse response = visionModel.call(new Prompt(message));
return response.getResult().getOutput().getText();
}
public String compareImages(Resource image1, Resource image2) {
UserMessage message = UserMessage.builder()
.text("Compare these two images. What are the similarities and differences?")
.media(List.of(
new Media(MimeTypeUtils.IMAGE_PNG, image1),
new Media(MimeTypeUtils.IMAGE_PNG, image2)
))
.build();
ChatResponse response = visionModel.call(new Prompt(message));
return response.getResult().getOutput().getText();
}
}
// Usage
ImageAnalysisService service = new ImageAnalysisService(ollamaApi);
// Describe image
Resource image = new ClassPathResource("/photo.jpg");
String description = service.describeImage(image);
// Ask specific question
String answer = service.analyzeWithQuestion(
image,
"How many people are in this image?"
);
// Compare images
Resource before = new ClassPathResource("/before.jpg");
Resource after = new ClassPathResource("/after.jpg");
String comparison = service.compareImages(before, after);@Service
public class DocumentAnalysisService {
private final OllamaChatModel visionModel;
public DocumentAnalysisService(OllamaApi ollamaApi) {
this.visionModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAMA3_2_VISION_11b)
.temperature(0.2) // Lower for accuracy
.numPredict(1024) // More tokens for documents
.build())
.build();
}
public record ExtractedData(
String text,
Map<String, String> fields,
List<String> tables
) {}
public String extractText(Resource documentImage) {
UserMessage message = UserMessage.builder()
.text("Extract all text from this document. Maintain formatting.")
.media(List.of(new Media(MimeTypeUtils.IMAGE_PNG, documentImage)))
.build();
ChatResponse response = visionModel.call(new Prompt(message));
return response.getResult().getOutput().getText();
}
public Map<String, String> extractFields(Resource formImage, List<String> fieldNames) {
String fieldList = String.join(", ", fieldNames);
UserMessage message = UserMessage.builder()
.text(String.format(
"Extract these fields from the form: %s. Return as JSON.",
fieldList
))
.media(List.of(new Media(MimeTypeUtils.IMAGE_PNG, formImage)))
.build();
OllamaChatOptions options = OllamaChatOptions.builder()
.format("json") // Request JSON output
.build();
ChatResponse response = visionModel.call(new Prompt(message, options));
String json = response.getResult().getOutput().getText();
// Parse JSON response
return parseJson(json);
}
public String analyzeDiagram(Resource diagramImage) {
UserMessage message = UserMessage.builder()
.text("Explain this diagram in detail. Describe the components and their relationships.")
.media(List.of(new Media(MimeTypeUtils.IMAGE_PNG, diagramImage)))
.build();
ChatResponse response = visionModel.call(new Prompt(message));
return response.getResult().getOutput().getText();
}
}Multi-turn conversation about an image:
public class ImageConversation {
private final OllamaChatModel visionModel;
private final List<Message> conversationHistory;
public ImageConversation(OllamaApi ollamaApi) {
this.visionModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAVA)
.build())
.build();
this.conversationHistory = new ArrayList<>();
}
public String startConversation(Resource image, String initialQuestion) {
UserMessage message = UserMessage.builder()
.text(initialQuestion)
.media(List.of(new Media(MimeTypeUtils.IMAGE_PNG, image)))
.build();
conversationHistory.add(message);
ChatResponse response = visionModel.call(new Prompt(conversationHistory));
AssistantMessage assistantMessage = response.getResult().getOutput();
conversationHistory.add(assistantMessage);
return assistantMessage.getText();
}
public String continueConversation(String followUpQuestion) {
UserMessage message = new UserMessage(followUpQuestion);
conversationHistory.add(message);
ChatResponse response = visionModel.call(new Prompt(conversationHistory));
AssistantMessage assistantMessage = response.getResult().getOutput();
conversationHistory.add(assistantMessage);
return assistantMessage.getText();
}
public void reset() {
conversationHistory.clear();
}
}
// Usage
ImageConversation conversation = new ImageConversation(ollamaApi);
// Start with image
Resource photo = new ClassPathResource("/vacation.jpg");
String answer1 = conversation.startConversation(
photo,
"Where was this photo taken?"
);
System.out.println(answer1);
// "This appears to be taken at the Eiffel Tower in Paris, France..."
// Follow-up without sending image again
String answer2 = conversation.continueConversation(
"What season does it look like?"
);
System.out.println(answer2);
// "Based on the foliage and lighting, it appears to be autumn..."
String answer3 = conversation.continueConversation(
"What activities could someone do nearby?"
);
System.out.println(answer3);
// "Near the Eiffel Tower, you could..."@Service
public class ImageClassifier {
private final OllamaChatModel visionModel;
public ImageClassifier(OllamaApi ollamaApi) {
this.visionModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAVA)
.temperature(0.1) // Low temp for classification
.build())
.build();
}
public String classify(Resource image, List<String> categories) {
String categoryList = String.join(", ", categories);
UserMessage message = UserMessage.builder()
.text(String.format(
"Classify this image into one of these categories: %s. " +
"Respond with only the category name.",
categoryList
))
.media(List.of(new Media(MimeTypeUtils.IMAGE_PNG, image)))
.build();
ChatResponse response = visionModel.call(new Prompt(message));
return response.getResult().getOutput().getText().trim();
}
public Map<String, Double> classifyWithConfidence(
Resource image,
List<String> categories) {
String categoryList = String.join(", ", categories);
UserMessage message = UserMessage.builder()
.text(String.format(
"For this image, rate the likelihood (0-100%%) of each category: %s. " +
"Return as JSON with category names as keys and percentages as values.",
categoryList
))
.media(List.of(new Media(MimeTypeUtils.IMAGE_PNG, image)))
.build();
OllamaChatOptions options = OllamaChatOptions.builder()
.format("json")
.build();
ChatResponse response = visionModel.call(new Prompt(message, options));
String json = response.getResult().getOutput().getText();
return parseJsonToConfidenceMap(json);
}
}
// Usage
ImageClassifier classifier = new ImageClassifier(ollamaApi);
List<String> animalCategories = List.of(
"dog", "cat", "bird", "horse", "other"
);
Resource animalPhoto = new ClassPathResource("/animal.jpg");
String category = classifier.classify(animalPhoto, animalCategories);
System.out.println("Classification: " + category);
Map<String, Double> confidence = classifier.classifyWithConfidence(
animalPhoto,
animalCategories
);
confidence.forEach((cat, conf) ->
System.out.printf("%s: %.1f%%%n", cat, conf)
);"Describe this image in detail."
"What objects can you see in this picture?"
"Provide a caption for this image.""How many cars are in this image?"
"Are there any people in this photo?"
"List all the objects you can identify.""Extract all text from this document."
"What does the sign say in this image?"
"Read the text from this screenshot.""Where was this photo taken?"
"What time of day is it in this image?"
"What's the weather like in this picture?""Compare these two images."
"What changed between the before and after photos?"
"Which image shows better quality?""Is this image blurry or clear?"
"Assess the lighting quality in this photo."
"Rate the composition of this image."try {
UserMessage message = UserMessage.builder()
.text("Describe this")
.media(List.of(new Media(
MimeType.valueOf("application/pdf"), // Unsupported
pdfResource
)))
.build();
visionModel.call(new Prompt(message));
} catch (RuntimeException e) {
System.err.println("Unsupported media type: " + e.getMessage());
// Handle error - convert PDF to image, or reject request
}try {
Resource image = new ClassPathResource("/missing.jpg");
// Check if exists
if (!image.exists()) {
throw new FileNotFoundException("Image not found");
}
UserMessage message = UserMessage.builder()
.text("Describe this")
.media(List.of(new Media(MimeTypeUtils.IMAGE_JPEG, image)))
.build();
visionModel.call(new Prompt(message));
} catch (FileNotFoundException e) {
System.err.println("Image file not found: " + e.getMessage());
}tessl i tessl/maven-org-springframework-ai--spring-ai-ollama@1.1.1