Spring Boot-compatible Ollama integration providing ChatModel and EmbeddingModel implementations for running large language models locally with support for streaming, tool calling, model management, and observability.
This document provides comprehensive real-world usage examples for Spring AI Ollama.
Build a stateful chatbot that maintains conversation history.
@Service
public class ConversationalChatbot {
private final OllamaChatModel chatModel;
private final Map<String, List<Message>> conversationHistory = new ConcurrentHashMap<>();
public ConversationalChatbot(OllamaApi ollamaApi) {
this.chatModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAMA3.id())
.temperature(0.8)
.numCtx(4096) // Larger context for conversations
.build())
.build();
}
public String chat(String userId, String userMessage) {
// Get or create conversation history
List<Message> history = conversationHistory.computeIfAbsent(
userId,
k -> new ArrayList<>(List.of(
new SystemMessage("You are a helpful AI assistant.")
))
);
// Add user message
history.add(new UserMessage(userMessage));
// Get response
Prompt prompt = new Prompt(history);
ChatResponse response = chatModel.call(prompt);
// Add assistant response to history
AssistantMessage assistantMessage = response.getResult().getOutput();
history.add(assistantMessage);
// Limit history size (keep last 20 messages)
if (history.size() > 20) {
List<Message> systemMessages = history.stream()
.filter(m -> m instanceof SystemMessage)
.toList();
List<Message> recentMessages = history.stream()
.filter(m -> !(m instanceof SystemMessage))
.skip(Math.max(0, history.size() - 19))
.toList();
history.clear();
history.addAll(systemMessages);
history.addAll(recentMessages);
}
return assistantMessage.getContent();
}
public void clearHistory(String userId) {
conversationHistory.remove(userId);
}
public int getHistorySize(String userId) {
return conversationHistory.getOrDefault(userId, List.of()).size();
}
}Usage:
ConversationalChatbot chatbot = new ConversationalChatbot(ollamaApi);
// First message
String response1 = chatbot.chat("user123", "Hi, my name is Alice");
// "Hello Alice! Nice to meet you..."
// Follow-up (remembers context)
String response2 = chatbot.chat("user123", "What's my name?");
// "Your name is Alice!"
// Clear history
chatbot.clearHistory("user123");Answer questions about documents using embeddings and semantic search.
@Service
public class DocumentQASystem {
private final OllamaChatModel chatModel;
private final OllamaEmbeddingModel embeddingModel;
private final Map<String, DocumentChunk> documentStore = new ConcurrentHashMap<>();
public record DocumentChunk(String id, String content, float[] embedding) {}
public DocumentQASystem(OllamaApi ollamaApi) {
this.chatModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAMA3.id())
.temperature(0.2) // Lower for factual answers
.build())
.build();
this.embeddingModel = OllamaEmbeddingModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaEmbeddingOptions.builder()
.model(OllamaModel.NOMIC_EMBED_TEXT.id())
.build())
.build();
}
public void indexDocument(String documentId, String content) {
// Split into chunks (simple splitting by paragraphs)
String[] chunks = content.split("\n\n");
for (int i = 0; i < chunks.length; i++) {
String chunk = chunks[i].trim();
if (chunk.isEmpty()) continue;
// Generate embedding
float[] embedding = embeddingModel.embed(chunk);
// Store chunk with embedding
String chunkId = documentId + "_chunk_" + i;
documentStore.put(chunkId, new DocumentChunk(chunkId, chunk, embedding));
}
}
public String answerQuestion(String question) {
// 1. Generate question embedding
float[] questionEmbedding = embeddingModel.embed(question);
// 2. Find most relevant chunks (top 3)
List<DocumentChunk> relevantChunks = documentStore.values().stream()
.map(chunk -> new ScoredChunk(
chunk,
cosineSimilarity(questionEmbedding, chunk.embedding())
))
.sorted((a, b) -> Double.compare(b.score(), a.score()))
.limit(3)
.map(ScoredChunk::chunk)
.toList();
if (relevantChunks.isEmpty()) {
return "I don't have enough information to answer that question.";
}
// 3. Build context from relevant chunks
String context = relevantChunks.stream()
.map(DocumentChunk::content)
.collect(Collectors.joining("\n\n"));
// 4. Generate answer using context
String prompt = String.format("""
Answer the following question based on the provided context.
If the answer is not in the context, say so.
Context:
%s
Question: %s
Answer:
""", context, question);
ChatResponse response = chatModel.call(new Prompt(prompt));
return response.getResult().getOutput().getContent();
}
private record ScoredChunk(DocumentChunk chunk, double score) {}
private double cosineSimilarity(float[] a, float[] b) {
double dotProduct = 0.0;
double normA = 0.0;
double normB = 0.0;
for (int i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}
}Usage:
DocumentQASystem qaSystem = new DocumentQASystem(ollamaApi);
// Index documents
String springAiDoc = """
Spring AI is a framework for building AI-powered applications.
It provides abstractions for working with various AI models.
The framework supports chat models, embedding models, and more.
""";
qaSystem.indexDocument("spring-ai-intro", springAiDoc);
// Ask questions
String answer = qaSystem.answerQuestion("What is Spring AI?");
System.out.println(answer);Automated code review using thinking models.
@Service
public class CodeReviewAssistant {
private final OllamaChatModel chatModel;
public CodeReviewAssistant(OllamaApi ollamaApi) {
this.chatModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.QWEN3_4B_THINKING.id())
.enableThinking() // Enable reasoning
.temperature(0.3)
.build())
.build();
}
public record CodeReview(
String reasoning,
List<Issue> issues,
List<Suggestion> suggestions,
int overallScore
) {}
public record Issue(String severity, String description, int lineNumber) {}
public record Suggestion(String description, String improvement) {}
public CodeReview reviewCode(String code, String language) {
String prompt = String.format("""
Review the following %s code and provide:
1. Potential bugs or errors
2. Code quality issues
3. Performance concerns
4. Security vulnerabilities
5. Best practice violations
6. Suggestions for improvement
Code:
```%s
%s
```
Provide a structured review with severity levels (CRITICAL, HIGH, MEDIUM, LOW).
""", language, language, code);
ChatResponse response = chatModel.call(new Prompt(prompt));
AssistantMessage message = response.getResult().getOutput();
// Extract reasoning if available
String reasoning = message.getMetadata().get("thinking");
String review = message.getContent();
// Parse review (simplified - in production, use structured output)
return parseReview(review, reasoning);
}
private CodeReview parseReview(String review, String reasoning) {
// Simplified parsing - in production, use JSON format
List<Issue> issues = new ArrayList<>();
List<Suggestion> suggestions = new ArrayList<>();
// Parse the review text
// ... (implementation details)
return new CodeReview(reasoning, issues, suggestions, 75);
}
}Usage:
CodeReviewAssistant reviewer = new CodeReviewAssistant(ollamaApi);
String code = """
public int divide(int a, int b) {
return a / b;
}
""";
CodeReview review = reviewer.reviewCode(code, "java");
System.out.println("Reasoning: " + review.reasoning());
review.issues().forEach(issue ->
System.out.println(issue.severity() + ": " + issue.description())
);Summarize long documents with streaming progress.
@Service
public class ContentSummarizationService {
private final OllamaChatModel chatModel;
public ContentSummarizationService(OllamaApi ollamaApi) {
this.chatModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAMA3.id())
.temperature(0.5)
.numCtx(8192) // Large context for long documents
.build())
.build();
}
public String summarize(String content, int maxWords) {
String prompt = String.format("""
Summarize the following content in approximately %d words.
Focus on the key points and main ideas.
Content:
%s
Summary:
""", maxWords, content);
ChatResponse response = chatModel.call(new Prompt(prompt));
return response.getResult().getOutput().getContent();
}
public Flux<String> summarizeStreaming(String content, int maxWords) {
String prompt = String.format("""
Summarize the following content in approximately %d words.
Content:
%s
Summary:
""", maxWords, content);
return chatModel.stream(new Prompt(prompt))
.map(chunk -> chunk.getResult().getOutput().getContent());
}
public record BulletPointSummary(List<String> keyPoints, String conclusion) {}
public BulletPointSummary summarizeToBulletPoints(String content) {
String prompt = String.format("""
Summarize the following content as bullet points.
Extract 3-5 key points and provide a brief conclusion.
Format:
- Key point 1
- Key point 2
...
Conclusion: [brief conclusion]
Content:
%s
""", content);
ChatResponse response = chatModel.call(new Prompt(prompt));
String summary = response.getResult().getOutput().getContent();
// Parse bullet points (simplified)
List<String> keyPoints = new ArrayList<>();
String conclusion = "";
// ... parse the response
return new BulletPointSummary(keyPoints, conclusion);
}
}Build a semantic search engine with embeddings.
@Service
public class SemanticSearchEngine {
private final OllamaEmbeddingModel embeddingModel;
private final List<SearchableDocument> documents = new CopyOnWriteArrayList<>();
public record SearchableDocument(
String id,
String title,
String content,
float[] embedding,
Map<String, String> metadata
) {}
public record SearchResult(
SearchableDocument document,
double relevanceScore
) {}
public SemanticSearchEngine(OllamaApi ollamaApi) {
this.embeddingModel = OllamaEmbeddingModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaEmbeddingOptions.builder()
.model(OllamaModel.NOMIC_EMBED_TEXT.id())
.build())
.build();
}
public void indexDocument(String id, String title, String content, Map<String, String> metadata) {
// Combine title and content for embedding
String textToEmbed = title + "\n\n" + content;
float[] embedding = embeddingModel.embed(textToEmbed);
documents.add(new SearchableDocument(id, title, content, embedding, metadata));
}
public List<SearchResult> search(String query, int topK) {
// Generate query embedding
float[] queryEmbedding = embeddingModel.embed(query);
// Calculate similarity scores
return documents.stream()
.map(doc -> new SearchResult(
doc,
cosineSimilarity(queryEmbedding, doc.embedding())
))
.sorted((a, b) -> Double.compare(b.relevanceScore(), a.relevanceScore()))
.limit(topK)
.toList();
}
public List<SearchResult> searchWithFilter(
String query,
int topK,
Predicate<SearchableDocument> filter
) {
float[] queryEmbedding = embeddingModel.embed(query);
return documents.stream()
.filter(filter)
.map(doc -> new SearchResult(
doc,
cosineSimilarity(queryEmbedding, doc.embedding())
))
.sorted((a, b) -> Double.compare(b.relevanceScore(), a.relevanceScore()))
.limit(topK)
.toList();
}
private double cosineSimilarity(float[] a, float[] b) {
double dotProduct = 0.0;
double normA = 0.0;
double normB = 0.0;
for (int i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}
}Usage:
SemanticSearchEngine searchEngine = new SemanticSearchEngine(ollamaApi);
// Index documents
searchEngine.indexDocument(
"doc1",
"Spring AI Introduction",
"Spring AI is a framework for AI applications...",
Map.of("category", "documentation", "author", "Spring Team")
);
// Search
List<SearchResult> results = searchEngine.search("AI framework for Java", 5);
results.forEach(result ->
System.out.println(result.document().title() + " (score: " + result.relevanceScore() + ")")
);
// Search with filter
List<SearchResult> filtered = searchEngine.searchWithFilter(
"AI framework",
5,
doc -> "documentation".equals(doc.metadata().get("category"))
);Use tool calling to integrate external services.
@Service
public class WeatherChatService {
private final OllamaChatModel chatModel;
private final WeatherService weatherService;
public WeatherChatService(OllamaApi ollamaApi) {
this.weatherService = new WeatherService();
// Register weather tool
FunctionToolCallback weatherTool = FunctionToolCallback.builder(
"getWeather",
weatherService
)
.description("Get current weather for a location")
.inputType(WeatherRequest.class)
.build();
this.chatModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAMA3.id())
.toolCallbacks(List.of(weatherTool))
.build())
.build();
}
public String chat(String message) {
ChatResponse response = chatModel.call(new Prompt(message));
return response.getResult().getOutput().getContent();
}
}
// Weather service implementation
class WeatherService {
public record WeatherRequest(String location, String unit) {}
public record WeatherResponse(String location, double temperature, String unit, String conditions) {}
public WeatherResponse getWeather(WeatherRequest request) {
// Simulate weather API call
return new WeatherResponse(
request.location(),
getTemperature(request.location(), request.unit()),
request.unit(),
getConditions(request.location())
);
}
private double getTemperature(String location, String unit) {
// Simulated data
double celsius = switch (location.toLowerCase()) {
case "san francisco" -> 18.3;
case "tokyo" -> 10.5;
case "paris" -> 15.2;
default -> 22.0;
};
return "fahrenheit".equalsIgnoreCase(unit) ? (celsius * 9/5) + 32 : celsius;
}
private String getConditions(String location) {
return switch (location.toLowerCase()) {
case "san francisco" -> "Foggy";
case "tokyo" -> "Clear";
case "paris" -> "Partly Cloudy";
default -> "Sunny";
};
}
}Usage:
WeatherChatService chatService = new WeatherChatService(ollamaApi);
// The model will automatically call the weather tool
String response = chatService.chat("What's the weather in San Francisco?");
System.out.println(response);
// "The weather in San Francisco is currently 64.9°F and foggy."Analyze images using vision models.
@Service
public class ImageAnalysisService {
private final OllamaChatModel visionModel;
public ImageAnalysisService(OllamaApi ollamaApi) {
this.visionModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAVA.id())
.temperature(0.7)
.build())
.build();
}
public String describeImage(Resource imageResource) {
UserMessage message = UserMessage.builder()
.text("Provide a detailed description of this image.")
.media(List.of(new Media(MimeTypeUtils.IMAGE_PNG, imageResource)))
.build();
ChatResponse response = visionModel.call(new Prompt(message));
return response.getResult().getOutput().getContent();
}
public String analyzeWithQuestion(Resource imageResource, String question) {
UserMessage message = UserMessage.builder()
.text(question)
.media(List.of(new Media(MimeTypeUtils.IMAGE_PNG, imageResource)))
.build();
ChatResponse response = visionModel.call(new Prompt(message));
return response.getResult().getOutput().getContent();
}
public record ImageComparison(
List<String> similarities,
List<String> differences,
String summary
) {}
public ImageComparison compareImages(Resource image1, Resource image2) {
UserMessage message = UserMessage.builder()
.text("Compare these two images. List similarities and differences.")
.media(List.of(
new Media(MimeTypeUtils.IMAGE_PNG, image1),
new Media(MimeTypeUtils.IMAGE_PNG, image2)
))
.build();
ChatResponse response = visionModel.call(new Prompt(message));
String comparison = response.getResult().getOutput().getContent();
// Parse comparison (simplified)
return parseComparison(comparison);
}
private ImageComparison parseComparison(String comparison) {
// Simplified parsing
return new ImageComparison(
List.of("similarity1", "similarity2"),
List.of("difference1", "difference2"),
comparison
);
}
}Process large batches of text with progress tracking.
@Service
public class BatchProcessingPipeline {
private final OllamaChatModel chatModel;
private final OllamaEmbeddingModel embeddingModel;
public record BatchResult(
List<ProcessedItem> successful,
Map<Integer, String> failed,
Duration totalTime
) {}
public record ProcessedItem(
String originalText,
String processed,
float[] embedding,
Duration processingTime
) {}
public BatchProcessingPipeline(OllamaApi ollamaApi) {
this.chatModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAMA3.id())
.build())
.build();
this.embeddingModel = OllamaEmbeddingModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaEmbeddingOptions.builder()
.model(OllamaModel.NOMIC_EMBED_TEXT.id())
.build())
.build();
}
public BatchResult processBatch(List<String> texts, String instruction) {
Instant start = Instant.now();
List<ProcessedItem> successful = new ArrayList<>();
Map<Integer, String> failed = new HashMap<>();
for (int i = 0; i < texts.size(); i++) {
String text = texts.get(i);
try {
Instant itemStart = Instant.now();
// Process text
String prompt = instruction + "\n\nText: " + text;
ChatResponse response = chatModel.call(new Prompt(prompt));
String processed = response.getResult().getOutput().getContent();
// Generate embedding
float[] embedding = embeddingModel.embed(processed);
Duration itemTime = Duration.between(itemStart, Instant.now());
successful.add(new ProcessedItem(text, processed, embedding, itemTime));
// Progress logging
if ((i + 1) % 10 == 0) {
System.out.println("Processed " + (i + 1) + "/" + texts.size());
}
} catch (Exception e) {
failed.put(i, e.getMessage());
}
}
Duration totalTime = Duration.between(start, Instant.now());
return new BatchResult(successful, failed, totalTime);
}
public Flux<ProcessedItem> processBatchReactive(List<String> texts, String instruction) {
return Flux.fromIterable(texts)
.flatMap(text -> processItemReactive(text, instruction), 5); // Concurrency: 5
}
private Mono<ProcessedItem> processItemReactive(String text, String instruction) {
return Mono.fromCallable(() -> {
Instant start = Instant.now();
String prompt = instruction + "\n\nText: " + text;
ChatResponse response = chatModel.call(new Prompt(prompt));
String processed = response.getResult().getOutput().getContent();
float[] embedding = embeddingModel.embed(processed);
Duration time = Duration.between(start, Instant.now());
return new ProcessedItem(text, processed, embedding, time);
});
}
}Usage:
BatchProcessingPipeline pipeline = new BatchProcessingPipeline(ollamaApi);
List<String> texts = List.of(
"Text 1 to process",
"Text 2 to process",
// ... more texts
);
BatchResult result = pipeline.processBatch(texts, "Summarize this text:");
System.out.println("Processed: " + result.successful().size());
System.out.println("Failed: " + result.failed().size());
System.out.println("Total time: " + result.totalTime());For more examples, see:
tessl i tessl/maven-org-springframework-ai--spring-ai-ollama@1.1.1