Spring Boot-compatible Ollama integration providing ChatModel and EmbeddingModel implementations for running large language models locally with support for streaming, tool calling, model management, and observability.
The OllamaApi class provides a low-level REST API client for direct interaction with the Ollama server, offering fine-grained control over requests and responses.
package org.springframework.ai.ollama.api;
public final class OllamaApi {
// Constants
public static final String REQUEST_BODY_NULL_ERROR = "The request body can not be null.";
// Factory method
public static Builder builder();
// Chat operations
public ChatResponse chat(ChatRequest chatRequest);
public Flux<ChatResponse> streamingChat(ChatRequest chatRequest);
// Embedding operations
public EmbeddingsResponse embed(EmbeddingsRequest embeddingsRequest);
// Model management operations
public ListModelResponse listModels();
public ShowModelResponse showModel(ShowModelRequest showModelRequest);
public ResponseEntity<Void> copyModel(CopyModelRequest copyModelRequest);
public ResponseEntity<Void> deleteModel(DeleteModelRequest deleteModelRequest);
public Flux<ProgressResponse> pullModel(PullModelRequest pullModelRequest);
}public static final class Builder {
public Builder baseUrl(String baseUrl);
public Builder restClientBuilder(RestClient.Builder restClientBuilder);
public Builder webClientBuilder(WebClient.Builder webClientBuilder);
public Builder responseErrorHandler(ResponseErrorHandler responseErrorHandler);
public OllamaApi build();
}import org.springframework.ai.ollama.api.OllamaApi;
// Default configuration (localhost:11434)
OllamaApi api = OllamaApi.builder().build();
// Custom base URL
OllamaApi api = OllamaApi.builder()
.baseUrl("http://192.168.1.100:11434")
.build();import org.springframework.web.client.RestClient;
RestClient.Builder restClientBuilder = RestClient.builder()
.requestInterceptor((request, body, execution) -> {
// Custom request interceptor
request.getHeaders().add("X-Custom-Header", "value");
return execution.execute(request, body);
});
OllamaApi api = OllamaApi.builder()
.baseUrl("http://localhost:11434")
.restClientBuilder(restClientBuilder)
.build();import org.springframework.web.reactive.function.client.WebClient;
WebClient.Builder webClientBuilder = WebClient.builder()
.codecs(configurer ->
configurer.defaultCodecs().maxInMemorySize(16 * 1024 * 1024));
OllamaApi api = OllamaApi.builder()
.webClientBuilder(webClientBuilder)
.build();import org.springframework.web.client.ResponseErrorHandler;
import org.springframework.web.client.DefaultResponseErrorHandler;
ResponseErrorHandler errorHandler = new DefaultResponseErrorHandler() {
@Override
public void handleError(ClientHttpResponse response) throws IOException {
// Custom error handling
logger.error("Ollama API error: {}", response.getStatusCode());
super.handleError(response);
}
};
OllamaApi api = OllamaApi.builder()
.responseErrorHandler(errorHandler)
.build();import org.springframework.ai.ollama.api.OllamaApi.*;
// Build request
ChatRequest request = ChatRequest.builder("llama3")
.messages(List.of(
Message.builder(Role.SYSTEM)
.content("You are a helpful assistant.")
.build(),
Message.builder(Role.USER)
.content("What is Java?")
.build()
))
.stream(false)
.build();
// Execute request
ChatResponse response = api.chat(request);
// Access response
String content = response.message().content();
Integer promptTokens = response.promptEvalCount();
Integer completionTokens = response.evalCount();
String model = response.model();import reactor.core.publisher.Flux;
// Build streaming request
ChatRequest request = ChatRequest.builder("llama3")
.messages(List.of(
Message.builder(Role.USER)
.content("Tell me a story")
.build()
))
.stream(true)
.build();
// Execute streaming request
Flux<ChatResponse> stream = api.streamingChat(request);
// Subscribe to stream
stream.subscribe(
chunk -> {
String content = chunk.message().content();
System.out.print(content);
// Check if done
if (Boolean.TRUE.equals(chunk.done())) {
System.out.println("\nTotal tokens: " + chunk.evalCount());
}
},
error -> System.err.println("Error: " + error.getMessage()),
() -> System.out.println("\nStreaming complete")
);import org.springframework.ai.ollama.api.OllamaChatOptions;
import java.util.Map;
// Create options
OllamaChatOptions options = OllamaChatOptions.builder()
.temperature(0.7)
.topP(0.9)
.topK(40)
.numPredict(500)
.build();
// Build request with options
ChatRequest request = ChatRequest.builder("llama3")
.messages(messages)
.options(options.toMap())
.stream(false)
.build();
// Or inline options
ChatRequest request = ChatRequest.builder("llama3")
.messages(messages)
.options(Map.of(
"temperature", 0.7,
"top_p", 0.9
))
.build();import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Base64;
// Read and encode image
byte[] imageBytes = Files.readAllBytes(Path.of("image.jpg"));
String base64Image = Base64.getEncoder().encodeToString(imageBytes);
// Create message with image
Message message = Message.builder(Role.USER)
.content("What's in this image?")
.images(List.of(base64Image))
.build();
ChatRequest request = ChatRequest.builder("llava")
.messages(List.of(message))
.stream(false)
.build();
ChatResponse response = api.chat(request);// Define tool function
ChatRequest.Tool.Function weatherFunction = new ChatRequest.Tool.Function(
"get_weather",
"Get the current weather for a location",
Map.of(
"type", "object",
"properties", Map.of(
"location", Map.of(
"type", "string",
"description", "The city and state"
)
),
"required", List.of("location")
)
);
ChatRequest.Tool tool = new ChatRequest.Tool(weatherFunction);
// Request with tools
ChatRequest request = ChatRequest.builder("llama3")
.messages(messages)
.tools(List.of(tool))
.stream(false)
.build();
ChatResponse response = api.chat(request);
// Check for tool calls
if (response.message().toolCalls() != null) {
for (Message.ToolCall toolCall : response.message().toolCalls()) {
String functionName = toolCall.function().name();
Map<String, Object> arguments = toolCall.function().arguments();
// Execute tool and send result back
}
}ChatRequest request = ChatRequest.builder("llama3")
.messages(List.of(
Message.builder(Role.USER)
.content("List three colors as a JSON array")
.build()
))
.format("json")
.stream(false)
.build();
ChatResponse response = api.chat(request);
String jsonContent = response.message().content();import org.springframework.ai.ollama.api.ThinkOption;
ChatRequest request = ChatRequest.builder("qwen3:4b-thinking")
.messages(messages)
.think(ThinkOption.ThinkBoolean.ENABLED)
.stream(false)
.build();
ChatResponse response = api.chat(request);
String thinking = response.message().thinking();// Single text
EmbeddingsRequest request = new EmbeddingsRequest(
"nomic-embed-text",
"Hello world"
);
EmbeddingsResponse response = api.embed(request);
float[] embedding = response.embeddings().get(0);
// Multiple texts
EmbeddingsRequest request = new EmbeddingsRequest(
"nomic-embed-text",
List.of("text1", "text2", "text3"),
null, // keepAlive
null, // options
true // truncate
);
EmbeddingsResponse response = api.embed(request);
for (float[] emb : response.embeddings()) {
System.out.println("Embedding dimension: " + emb.length);
}ListModelResponse response = api.listModels();
for (Model model : response.models()) {
System.out.println("Name: " + model.name());
System.out.println("Size: " + model.size());
System.out.println("Modified: " + model.modifiedAt());
if (model.details() != null) {
System.out.println("Format: " + model.details().format());
System.out.println("Family: " + model.details().family());
System.out.println("Parameters: " + model.details().parameterSize());
}
}ShowModelRequest request = new ShowModelRequest("llama3");
ShowModelResponse response = api.showModel(request);
System.out.println("License: " + response.license());
System.out.println("Modelfile: " + response.modelfile());
System.out.println("Template: " + response.template());
System.out.println("Parameters: " + response.parameters());
if (response.details() != null) {
System.out.println("Family: " + response.details().family());
System.out.println("Parameter size: " + response.details().parameterSize());
}import reactor.core.publisher.Flux;
PullModelRequest request = new PullModelRequest("llama3");
Flux<ProgressResponse> progress = api.pullModel(request);
progress.subscribe(
p -> {
System.out.println("Status: " + p.status());
if (p.total() != null && p.completed() != null) {
double percent = (p.completed() * 100.0) / p.total();
System.out.printf("Progress: %.2f%%\n", percent);
}
},
error -> System.err.println("Pull failed: " + error.getMessage()),
() -> System.out.println("Pull complete")
);import org.springframework.http.ResponseEntity;
CopyModelRequest request = new CopyModelRequest("llama3", "llama3-backup");
ResponseEntity<Void> response = api.copyModel(request);
if (response.getStatusCode().is2xxSuccessful()) {
System.out.println("Model copied successfully");
}DeleteModelRequest request = new DeleteModelRequest("llama3-backup");
ResponseEntity<Void> response = api.deleteModel(request);
if (response.getStatusCode().is2xxSuccessful()) {
System.out.println("Model deleted successfully");
}Complete API for building chat requests.
public static final class Builder {
// Factory method
public static Builder builder(String model);
// Builder methods
public Builder messages(List<Message> messages);
public Builder stream(boolean stream);
public Builder format(Object format);
public Builder keepAlive(String keepAlive);
public Builder tools(List<Tool> tools);
// Options overloads
public Builder options(Map<String, Object> options);
public Builder options(OllamaChatOptions options);
// Think option methods
public Builder think(ThinkOption think);
public Builder enableThinking();
public Builder disableThinking();
public Builder thinkLow();
public Builder thinkMedium();
public Builder thinkHigh();
public ChatRequest build();
}Usage Examples:
// Basic request
ChatRequest request = ChatRequest.builder("llama3")
.messages(messages)
.stream(false)
.format("json")
.keepAlive("10m")
.tools(tools)
.build();
// With options as Map
request = ChatRequest.builder("llama3")
.messages(messages)
.options(Map.of("temperature", 0.7, "top_p", 0.9))
.build();
// With options as OllamaChatOptions (alternative)
OllamaChatOptions chatOptions = OllamaChatOptions.builder()
.temperature(0.7)
.topP(0.9)
.build();
request = ChatRequest.builder("llama3")
.messages(messages)
.options(chatOptions)
.build();
// Convenience methods for thinking
request = ChatRequest.builder("llama3")
.messages(messages)
.enableThinking() // Enable boolean thinking
.build();
request = ChatRequest.builder("llama3")
.messages(messages)
.thinkLow() // Low thinking level
.build();Complete API for building Message instances.
public static final class Builder {
// Constructor
public Builder(Role role);
// Builder methods
public Builder content(String content);
public Builder images(List<String> images);
public Builder toolCalls(List<ToolCall> toolCalls);
public Builder toolName(String toolName);
public Builder thinking(String thinking);
public Message build();
}
// Static factory method
public static Builder builder(Role role);Usage Examples:
// Basic user message
Message message = Message.builder(Role.USER)
.content("Hello")
.build();
// Message with images
Message imageMessage = Message.builder(Role.USER)
.content("What's in this image?")
.images(List.of(base64Image))
.build();
// Tool response message
Message toolResponse = Message.builder(Role.TOOL)
.content("{\"result\": \"success\"}")
.toolName("getWeather")
.build();
// Assistant message with tool calls
Message assistantWithTools = Message.builder(Role.ASSISTANT)
.content("I'll check the weather")
.toolCalls(List.of(
new Message.ToolCall(
new Message.ToolCallFunction(
"get_weather",
Map.of("location", "San Francisco")
)
)
))
.build();
// Message with thinking trace
Message thinkingMessage = Message.builder(Role.ASSISTANT)
.content("The answer is 42")
.thinking("Let me reason through this step by step...")
.build();import org.springframework.ai.ollama.api.common.OllamaApiConstants;
import org.springframework.ai.ollama.api.OllamaApi;
// OllamaApiConstants
String defaultUrl = OllamaApiConstants.DEFAULT_BASE_URL; // "http://localhost:11434"
String providerName = OllamaApiConstants.PROVIDER_NAME; // "ollama"
// OllamaApi error messages
String errorMsg = OllamaApi.REQUEST_BODY_NULL_ERROR; // "The request body can not be null."try {
ChatResponse response = api.chat(request);
} catch (RestClientException e) {
// Connection or HTTP errors
System.err.println("API error: " + e.getMessage());
} catch (IllegalArgumentException e) {
// Invalid request parameters
System.err.println("Invalid request: " + e.getMessage());
}OllamaApi instances are thread-safe and can be shared across multiple threads.
tessl i tessl/maven-org-springframework-ai--spring-ai-ollama@1.1.1