Spring Boot-compatible Ollama integration providing ChatModel and EmbeddingModel implementations for running large language models locally with support for streaming, tool calling, model management, and observability.
Spring Boot-compatible Ollama integration providing ChatModel and EmbeddingModel implementations for running large language models locally with support for streaming, tool calling, model management, and observability.
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-ollama</artifactId>
<version>1.1.2</version>
</dependency>import org.springframework.ai.ollama.OllamaChatModel;
import org.springframework.ai.ollama.api.OllamaApi;
import org.springframework.ai.ollama.api.OllamaChatOptions;
import org.springframework.ai.ollama.api.OllamaModel;
// Create API client
OllamaApi api = OllamaApi.builder()
.baseUrl("http://localhost:11434")
.build();
// Build chat model
OllamaChatModel chatModel = OllamaChatModel.builder()
.ollamaApi(api)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAMA3.id())
.temperature(0.7)
.build())
.build();
// Synchronous call
ChatResponse response = chatModel.call(new Prompt("What is Spring AI?"));
String content = response.getResult().getOutput().getContent();
// Streaming call
Flux<ChatResponse> stream = chatModel.stream(new Prompt("Tell me a story"));
stream.subscribe(chunk -> System.out.print(chunk.getResult().getOutput().getContent()));import org.springframework.ai.ollama.OllamaEmbeddingModel;
import org.springframework.ai.ollama.api.OllamaEmbeddingOptions;
// Build embedding model
OllamaEmbeddingModel embeddingModel = OllamaEmbeddingModel.builder()
.ollamaApi(api)
.defaultOptions(OllamaEmbeddingOptions.builder()
.model(OllamaModel.NOMIC_EMBED_TEXT.id())
.build())
.build();
// Generate embedding
float[] embedding = embeddingModel.embed("Hello world");
// Batch embeddings
EmbeddingRequest request = new EmbeddingRequest(
List.of("text1", "text2", "text3"),
EmbeddingOptions.EMPTY
);
EmbeddingResponse response = embeddingModel.call(request);| Component | Purpose | Reference |
|---|---|---|
| OllamaChatModel | Chat completions with streaming support | Chat Model |
| OllamaEmbeddingModel | Vector embeddings for text | Embedding Model |
| OllamaApi | Low-level REST API client | API Client |
| OllamaChatOptions | Chat model configuration | Chat Options |
| OllamaEmbeddingOptions | Embedding model configuration | Embedding Options |
| OllamaModelManager | Model lifecycle management | Model Management |
| OllamaModel | Pre-configured model constants | Models |
| Feature | Description | Documentation |
|---|---|---|
| Synchronous & Streaming | Both blocking and reactive (Flux) responses | Chat Model |
| Embeddings | Single or batch embedding generation | Embedding Model |
| Tool Calling | Automatic function execution with callbacks | Tool Calling |
| Multimodal | Image input support for vision models | Multimodal |
| Thinking Models | Reasoning traces for complex problems | Thinking |
| Model Management | Automatic pulling with strategies | Model Management |
| Observability | Micrometer integration for metrics | Observability |
| Retry Logic | Built-in retry with exponential backoff | Error Handling |
| Native Image | GraalVM native image ready | - |
| Spring Boot | Full Spring Boot compatibility | - |
public class OllamaChatModel implements ChatModel {
public static Builder builder();
public ChatResponse call(Prompt prompt);
public Flux<ChatResponse> stream(Prompt prompt);
public ChatOptions getDefaultOptions();
public void setObservationConvention(ChatModelObservationConvention observationConvention);
}Builder:
public static final class Builder {
public Builder ollamaApi(OllamaApi ollamaApi);
public Builder defaultOptions(OllamaChatOptions defaultOptions);
public Builder toolCallingManager(ToolCallingManager toolCallingManager);
public Builder toolExecutionEligibilityPredicate(ToolExecutionEligibilityPredicate predicate);
public Builder observationRegistry(ObservationRegistry observationRegistry);
public Builder modelManagementOptions(ModelManagementOptions modelManagementOptions);
public Builder retryTemplate(RetryTemplate retryTemplate);
public OllamaChatModel build();
}public class OllamaEmbeddingModel extends AbstractEmbeddingModel {
public static Builder builder();
public float[] embed(Document document);
public EmbeddingResponse call(EmbeddingRequest request);
public void setObservationConvention(EmbeddingModelObservationConvention observationConvention);
}Builder:
public static final class Builder {
public Builder ollamaApi(OllamaApi ollamaApi);
public Builder defaultOptions(OllamaEmbeddingOptions defaultOptions);
public Builder observationRegistry(ObservationRegistry observationRegistry);
public Builder modelManagementOptions(ModelManagementOptions modelManagementOptions);
public OllamaEmbeddingModel build();
}public final class OllamaApi {
public static Builder builder();
// Chat operations
public ChatResponse chat(ChatRequest chatRequest);
public Flux<ChatResponse> streamingChat(ChatRequest chatRequest);
// Embedding operations
public EmbeddingsResponse embed(EmbeddingsRequest embeddingsRequest);
// Model management operations
public ListModelResponse listModels();
public ShowModelResponse showModel(ShowModelRequest showModelRequest);
public ResponseEntity<Void> copyModel(CopyModelRequest copyModelRequest);
public ResponseEntity<Void> deleteModel(DeleteModelRequest deleteModelRequest);
public Flux<ProgressResponse> pullModel(PullModelRequest pullModelRequest);
}Builder:
public static final class Builder {
public Builder baseUrl(String baseUrl);
public Builder restClientBuilder(RestClient.Builder restClientBuilder);
public Builder webClientBuilder(WebClient.Builder webClientBuilder);
public Builder responseErrorHandler(ResponseErrorHandler responseErrorHandler);
public OllamaApi build();
}public class OllamaChatOptions implements ToolCallingChatOptions {
public static Builder builder();
public static OllamaChatOptions fromOptions(OllamaChatOptions options);
public Map<String, Object> toMap();
public OllamaChatOptions copy();
// Getters and setters for all configuration properties
public String getModel();
public void setModel(String model);
public Double getTemperature();
public void setTemperature(Double temperature);
// ... and many more
}Key Categories: Model selection, generation parameters, sampling control, memory/GPU management, tool calling, thinking options.
Complete Chat Options Reference
public class OllamaEmbeddingOptions implements EmbeddingOptions {
public static Builder builder();
public static OllamaEmbeddingOptions fromOptions(OllamaEmbeddingOptions options);
public Map<String, Object> toMap();
public OllamaEmbeddingOptions copy();
public Integer getDimensions();
// Getters and setters for configuration properties
public String getModel();
public void setModel(String model);
// ... and more
}Key Categories: Model selection, memory/GPU management, processing options.
Complete Embedding Options Reference
public class OllamaModelManager {
public OllamaModelManager(OllamaApi ollamaApi);
public OllamaModelManager(OllamaApi ollamaApi, ModelManagementOptions options);
public boolean isModelAvailable(String modelName);
public void deleteModel(String modelName);
public void pullModel(String modelName);
public void pullModel(String modelName, PullModelStrategy strategy);
}Configuration:
public record ModelManagementOptions(
PullModelStrategy pullModelStrategy,
List<String> additionalModels,
Duration timeout,
Integer maxRetries
) {
public static ModelManagementOptions defaults();
public static Builder builder();
}Pull Strategy:
public enum PullModelStrategy {
ALWAYS,
WHEN_MISSING,
NEVER
}public enum OllamaModel implements ChatModelDescription {
// Qwen series
QWEN_2_5_3B, QWEN_2_5_7B, QWEN2_5_VL, QWEN3_7B, QWEN3_4B,
QWEN3_4B_THINKING, QWEN_3_1_7_B, QWEN_3_06B, QWQ,
// Llama series
LLAMA2, LLAMA3, LLAMA3_1, LLAMA3_2, LLAMA3_2_VISION_11b,
LLAMA3_2_VISION_90b, LLAMA3_2_1B, LLAMA3_2_3B,
// Mistral
MISTRAL, MISTRAL_NEMO,
// Other models
MOONDREAM, DOLPHIN_PHI, PHI, PHI3, NEURAL_CHAT, STARLING_LM,
CODELLAMA, ORCA_MINI, LLAVA, GEMMA, GEMMA3, LLAMA2_UNCENSORED,
// Embedding models
NOMIC_EMBED_TEXT, MXBAI_EMBED_LARGE;
public String id();
public String getName();
}public record Message(
Role role,
String content,
List<String> images,
List<ToolCall> toolCalls,
String toolName,
String thinking
);
public enum Role {
SYSTEM, USER, ASSISTANT, TOOL
}
public record ChatRequest(
String model,
List<Message> messages,
Boolean stream,
Object format,
String keepAlive,
List<Tool> tools,
Map<String, Object> options,
ThinkOption think
);
public record ChatResponse(
String model,
Instant createdAt,
Message message,
String doneReason,
Boolean done,
Long totalDuration,
Long loadDuration,
Integer promptEvalCount,
Long promptEvalDuration,
Integer evalCount,
Long evalDuration
);public record EmbeddingsRequest(
String model,
List<String> input,
String keepAlive,
Map<String, Object> options,
Boolean truncate
);
public record EmbeddingsResponse(
String model,
List<float[]> embeddings,
Long totalDuration,
Long loadDuration,
Integer promptEvalCount
);Tool Definition in ChatRequest:
public record Tool(
Type type,
Function function
);
public record Function(
String name,
String description,
Map<String, Object> parameters
);Tool Calling in OllamaChatOptions:
List<ToolCallback> toolCallbacks - Callbacks for tool executionSet<String> toolNames - Names of tools to useMap<String, Object> toolContext - Context for tool executionBoolean internalToolExecutionEnabled - Enable automatic executionpublic sealed interface ThinkOption {
Object toJsonValue();
}
public record ThinkBoolean(boolean enabled) implements ThinkOption {
public static final ThinkBoolean ENABLED;
public static final ThinkBoolean DISABLED;
}
public record ThinkLevel(String level) implements ThinkOption {
public static final ThinkLevel LOW;
public static final ThinkLevel MEDIUM;
public static final ThinkLevel HIGH;
}Image Input in Messages:
// In Message record
List<String> images // Base64-encoded imagesCompatible models: LLaVA, Llama 3.2 Vision, Qwen2.5-VL, Moondream
Observation Setup:
// In OllamaChatModel
public void setObservationConvention(ChatModelObservationConvention observationConvention);
// In OllamaEmbeddingModel
public void setObservationConvention(EmbeddingModelObservationConvention observationConvention);
// Builder methods
Builder observationRegistry(ObservationRegistry observationRegistry);Available Metrics:
// In OllamaChatModel.Builder
public Builder retryTemplate(RetryTemplate retryTemplate);
// In OllamaApi.Builder
public Builder responseErrorHandler(ResponseErrorHandler responseErrorHandler);public final class OllamaApiConstants {
public static final String DEFAULT_BASE_URL = "http://localhost:11434";
public static final String PROVIDER_NAME = "ollama";
}public final class OllamaApiHelper {
public static boolean isStreamingToolCall(OllamaApi.ChatResponse response);
public static boolean isStreamingDone(OllamaApi.ChatResponse response);
public static OllamaApi.ChatResponse merge(OllamaApi.ChatResponse previous, OllamaApi.ChatResponse current);
}Usage:
Flux<OllamaApi.ChatResponse> stream = ollamaApi.streamingChat(request);
stream.subscribe(chunk -> {
if (OllamaApiHelper.isStreamingToolCall(chunk)) {
// Handle tool call chunk
}
if (OllamaApiHelper.isStreamingDone(chunk)) {
// Streaming complete
}
});public class OllamaRuntimeHints implements RuntimeHintsRegistrar {
public void registerHints(RuntimeHints hints, ClassLoader classLoader);
}The library automatically registers reflection hints for native image compilation.
Required:
Optional:
tessl i tessl/maven-org-springframework-ai--spring-ai-ollama@1.1.1