CtrlK
CommunityDocumentationLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-ollama

Spring Boot-compatible Ollama integration providing ChatModel and EmbeddingModel implementations for running large language models locally with support for streaming, tool calling, model management, and observability.

Overview
Eval results
Files

api-client.mddocs/reference/

API Client

The OllamaApi class provides a low-level REST API client for direct interaction with the Ollama server, offering fine-grained control over requests and responses.

Core API

package org.springframework.ai.ollama.api;

public final class OllamaApi {
    // Constants
    public static final String REQUEST_BODY_NULL_ERROR = "The request body can not be null.";

    // Factory method
    public static Builder builder();

    // Chat operations
    public ChatResponse chat(ChatRequest chatRequest);
    public Flux<ChatResponse> streamingChat(ChatRequest chatRequest);

    // Embedding operations
    public EmbeddingsResponse embed(EmbeddingsRequest embeddingsRequest);

    // Model management operations
    public ListModelResponse listModels();
    public ShowModelResponse showModel(ShowModelRequest showModelRequest);
    public ResponseEntity<Void> copyModel(CopyModelRequest copyModelRequest);
    public ResponseEntity<Void> deleteModel(DeleteModelRequest deleteModelRequest);
    public Flux<ProgressResponse> pullModel(PullModelRequest pullModelRequest);
}

Builder API

public static final class Builder {
    public Builder baseUrl(String baseUrl);
    public Builder restClientBuilder(RestClient.Builder restClientBuilder);
    public Builder webClientBuilder(WebClient.Builder webClientBuilder);
    public Builder responseErrorHandler(ResponseErrorHandler responseErrorHandler);
    public OllamaApi build();
}

Creating the Client

Basic Client

import org.springframework.ai.ollama.api.OllamaApi;

// Default configuration (localhost:11434)
OllamaApi api = OllamaApi.builder().build();

// Custom base URL
OllamaApi api = OllamaApi.builder()
    .baseUrl("http://192.168.1.100:11434")
    .build();

Custom REST Client

import org.springframework.web.client.RestClient;

RestClient.Builder restClientBuilder = RestClient.builder()
    .requestInterceptor((request, body, execution) -> {
        // Custom request interceptor
        request.getHeaders().add("X-Custom-Header", "value");
        return execution.execute(request, body);
    });

OllamaApi api = OllamaApi.builder()
    .baseUrl("http://localhost:11434")
    .restClientBuilder(restClientBuilder)
    .build();

Custom WebClient (for Streaming)

import org.springframework.web.reactive.function.client.WebClient;

WebClient.Builder webClientBuilder = WebClient.builder()
    .codecs(configurer ->
        configurer.defaultCodecs().maxInMemorySize(16 * 1024 * 1024));

OllamaApi api = OllamaApi.builder()
    .webClientBuilder(webClientBuilder)
    .build();

Custom Error Handler

import org.springframework.web.client.ResponseErrorHandler;
import org.springframework.web.client.DefaultResponseErrorHandler;

ResponseErrorHandler errorHandler = new DefaultResponseErrorHandler() {
    @Override
    public void handleError(ClientHttpResponse response) throws IOException {
        // Custom error handling
        logger.error("Ollama API error: {}", response.getStatusCode());
        super.handleError(response);
    }
};

OllamaApi api = OllamaApi.builder()
    .responseErrorHandler(errorHandler)
    .build();

Chat Operations

Non-Streaming Chat

import org.springframework.ai.ollama.api.OllamaApi.*;

// Build request
ChatRequest request = ChatRequest.builder("llama3")
    .messages(List.of(
        Message.builder(Role.SYSTEM)
            .content("You are a helpful assistant.")
            .build(),
        Message.builder(Role.USER)
            .content("What is Java?")
            .build()
    ))
    .stream(false)
    .build();

// Execute request
ChatResponse response = api.chat(request);

// Access response
String content = response.message().content();
Integer promptTokens = response.promptEvalCount();
Integer completionTokens = response.evalCount();
String model = response.model();

Streaming Chat

import reactor.core.publisher.Flux;

// Build streaming request
ChatRequest request = ChatRequest.builder("llama3")
    .messages(List.of(
        Message.builder(Role.USER)
            .content("Tell me a story")
            .build()
    ))
    .stream(true)
    .build();

// Execute streaming request
Flux<ChatResponse> stream = api.streamingChat(request);

// Subscribe to stream
stream.subscribe(
    chunk -> {
        String content = chunk.message().content();
        System.out.print(content);

        // Check if done
        if (Boolean.TRUE.equals(chunk.done())) {
            System.out.println("\nTotal tokens: " + chunk.evalCount());
        }
    },
    error -> System.err.println("Error: " + error.getMessage()),
    () -> System.out.println("\nStreaming complete")
);

Chat with Options

import org.springframework.ai.ollama.api.OllamaChatOptions;
import java.util.Map;

// Create options
OllamaChatOptions options = OllamaChatOptions.builder()
    .temperature(0.7)
    .topP(0.9)
    .topK(40)
    .numPredict(500)
    .build();

// Build request with options
ChatRequest request = ChatRequest.builder("llama3")
    .messages(messages)
    .options(options.toMap())
    .stream(false)
    .build();

// Or inline options
ChatRequest request = ChatRequest.builder("llama3")
    .messages(messages)
    .options(Map.of(
        "temperature", 0.7,
        "top_p", 0.9
    ))
    .build();

Chat with Images (Multimodal)

import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Base64;

// Read and encode image
byte[] imageBytes = Files.readAllBytes(Path.of("image.jpg"));
String base64Image = Base64.getEncoder().encodeToString(imageBytes);

// Create message with image
Message message = Message.builder(Role.USER)
    .content("What's in this image?")
    .images(List.of(base64Image))
    .build();

ChatRequest request = ChatRequest.builder("llava")
    .messages(List.of(message))
    .stream(false)
    .build();

ChatResponse response = api.chat(request);

Chat with Tools

// Define tool function
ChatRequest.Tool.Function weatherFunction = new ChatRequest.Tool.Function(
    "get_weather",
    "Get the current weather for a location",
    Map.of(
        "type", "object",
        "properties", Map.of(
            "location", Map.of(
                "type", "string",
                "description", "The city and state"
            )
        ),
        "required", List.of("location")
    )
);

ChatRequest.Tool tool = new ChatRequest.Tool(weatherFunction);

// Request with tools
ChatRequest request = ChatRequest.builder("llama3")
    .messages(messages)
    .tools(List.of(tool))
    .stream(false)
    .build();

ChatResponse response = api.chat(request);

// Check for tool calls
if (response.message().toolCalls() != null) {
    for (Message.ToolCall toolCall : response.message().toolCalls()) {
        String functionName = toolCall.function().name();
        Map<String, Object> arguments = toolCall.function().arguments();
        // Execute tool and send result back
    }
}

Chat with JSON Format

ChatRequest request = ChatRequest.builder("llama3")
    .messages(List.of(
        Message.builder(Role.USER)
            .content("List three colors as a JSON array")
            .build()
    ))
    .format("json")
    .stream(false)
    .build();

ChatResponse response = api.chat(request);
String jsonContent = response.message().content();

Chat with Thinking

import org.springframework.ai.ollama.api.ThinkOption;

ChatRequest request = ChatRequest.builder("qwen3:4b-thinking")
    .messages(messages)
    .think(ThinkOption.ThinkBoolean.ENABLED)
    .stream(false)
    .build();

ChatResponse response = api.chat(request);
String thinking = response.message().thinking();

Embedding Operations

Generate Embeddings

// Single text
EmbeddingsRequest request = new EmbeddingsRequest(
    "nomic-embed-text",
    "Hello world"
);

EmbeddingsResponse response = api.embed(request);
float[] embedding = response.embeddings().get(0);

// Multiple texts
EmbeddingsRequest request = new EmbeddingsRequest(
    "nomic-embed-text",
    List.of("text1", "text2", "text3"),
    null,  // keepAlive
    null,  // options
    true   // truncate
);

EmbeddingsResponse response = api.embed(request);
for (float[] emb : response.embeddings()) {
    System.out.println("Embedding dimension: " + emb.length);
}

Model Management Operations

List Models

ListModelResponse response = api.listModels();

for (Model model : response.models()) {
    System.out.println("Name: " + model.name());
    System.out.println("Size: " + model.size());
    System.out.println("Modified: " + model.modifiedAt());

    if (model.details() != null) {
        System.out.println("Format: " + model.details().format());
        System.out.println("Family: " + model.details().family());
        System.out.println("Parameters: " + model.details().parameterSize());
    }
}

Show Model Information

ShowModelRequest request = new ShowModelRequest("llama3");
ShowModelResponse response = api.showModel(request);

System.out.println("License: " + response.license());
System.out.println("Modelfile: " + response.modelfile());
System.out.println("Template: " + response.template());
System.out.println("Parameters: " + response.parameters());

if (response.details() != null) {
    System.out.println("Family: " + response.details().family());
    System.out.println("Parameter size: " + response.details().parameterSize());
}

Pull Model

import reactor.core.publisher.Flux;

PullModelRequest request = new PullModelRequest("llama3");

Flux<ProgressResponse> progress = api.pullModel(request);

progress.subscribe(
    p -> {
        System.out.println("Status: " + p.status());
        if (p.total() != null && p.completed() != null) {
            double percent = (p.completed() * 100.0) / p.total();
            System.out.printf("Progress: %.2f%%\n", percent);
        }
    },
    error -> System.err.println("Pull failed: " + error.getMessage()),
    () -> System.out.println("Pull complete")
);

Copy Model

import org.springframework.http.ResponseEntity;

CopyModelRequest request = new CopyModelRequest("llama3", "llama3-backup");
ResponseEntity<Void> response = api.copyModel(request);

if (response.getStatusCode().is2xxSuccessful()) {
    System.out.println("Model copied successfully");
}

Delete Model

DeleteModelRequest request = new DeleteModelRequest("llama3-backup");
ResponseEntity<Void> response = api.deleteModel(request);

if (response.getStatusCode().is2xxSuccessful()) {
    System.out.println("Model deleted successfully");
}

Request Builders

ChatRequest Builder

Complete API for building chat requests.

public static final class Builder {
    // Factory method
    public static Builder builder(String model);

    // Builder methods
    public Builder messages(List<Message> messages);
    public Builder stream(boolean stream);
    public Builder format(Object format);
    public Builder keepAlive(String keepAlive);
    public Builder tools(List<Tool> tools);

    // Options overloads
    public Builder options(Map<String, Object> options);
    public Builder options(OllamaChatOptions options);

    // Think option methods
    public Builder think(ThinkOption think);
    public Builder enableThinking();
    public Builder disableThinking();
    public Builder thinkLow();
    public Builder thinkMedium();
    public Builder thinkHigh();

    public ChatRequest build();
}

Usage Examples:

// Basic request
ChatRequest request = ChatRequest.builder("llama3")
    .messages(messages)
    .stream(false)
    .format("json")
    .keepAlive("10m")
    .tools(tools)
    .build();

// With options as Map
request = ChatRequest.builder("llama3")
    .messages(messages)
    .options(Map.of("temperature", 0.7, "top_p", 0.9))
    .build();

// With options as OllamaChatOptions (alternative)
OllamaChatOptions chatOptions = OllamaChatOptions.builder()
    .temperature(0.7)
    .topP(0.9)
    .build();

request = ChatRequest.builder("llama3")
    .messages(messages)
    .options(chatOptions)
    .build();

// Convenience methods for thinking
request = ChatRequest.builder("llama3")
    .messages(messages)
    .enableThinking()     // Enable boolean thinking
    .build();

request = ChatRequest.builder("llama3")
    .messages(messages)
    .thinkLow()          // Low thinking level
    .build();

Message Builder

Complete API for building Message instances.

public static final class Builder {
    // Constructor
    public Builder(Role role);

    // Builder methods
    public Builder content(String content);
    public Builder images(List<String> images);
    public Builder toolCalls(List<ToolCall> toolCalls);
    public Builder toolName(String toolName);
    public Builder thinking(String thinking);
    public Message build();
}

// Static factory method
public static Builder builder(Role role);

Usage Examples:

// Basic user message
Message message = Message.builder(Role.USER)
    .content("Hello")
    .build();

// Message with images
Message imageMessage = Message.builder(Role.USER)
    .content("What's in this image?")
    .images(List.of(base64Image))
    .build();

// Tool response message
Message toolResponse = Message.builder(Role.TOOL)
    .content("{\"result\": \"success\"}")
    .toolName("getWeather")
    .build();

// Assistant message with tool calls
Message assistantWithTools = Message.builder(Role.ASSISTANT)
    .content("I'll check the weather")
    .toolCalls(List.of(
        new Message.ToolCall(
            new Message.ToolCallFunction(
                "get_weather",
                Map.of("location", "San Francisco")
            )
        )
    ))
    .build();

// Message with thinking trace
Message thinkingMessage = Message.builder(Role.ASSISTANT)
    .content("The answer is 42")
    .thinking("Let me reason through this step by step...")
    .build();

Constants

import org.springframework.ai.ollama.api.common.OllamaApiConstants;
import org.springframework.ai.ollama.api.OllamaApi;

// OllamaApiConstants
String defaultUrl = OllamaApiConstants.DEFAULT_BASE_URL;  // "http://localhost:11434"
String providerName = OllamaApiConstants.PROVIDER_NAME;   // "ollama"

// OllamaApi error messages
String errorMsg = OllamaApi.REQUEST_BODY_NULL_ERROR;  // "The request body can not be null."

Error Handling

try {
    ChatResponse response = api.chat(request);
} catch (RestClientException e) {
    // Connection or HTTP errors
    System.err.println("API error: " + e.getMessage());
} catch (IllegalArgumentException e) {
    // Invalid request parameters
    System.err.println("Invalid request: " + e.getMessage());
}

Thread Safety

OllamaApi instances are thread-safe and can be shared across multiple threads.

Related Documentation

  • API Types - Complete request/response type reference
  • Chat Model - High-level chat interface
  • Embedding Model - High-level embedding interface
  • Model Management - Model lifecycle management
tessl i tessl/maven-org-springframework-ai--spring-ai-ollama@1.1.1

docs

index.md

tile.json