CtrlK
CommunityDocumentationLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-ollama

Spring Boot-compatible Ollama integration providing ChatModel and EmbeddingModel implementations for running large language models locally with support for streaming, tool calling, model management, and observability.

Overview
Eval results
Files

quick-start.mddocs/guides/

Quick Start Guide

This guide will help you get started with Spring AI Ollama in minutes.

Prerequisites

  1. Ollama Server: Install and run Ollama locally

    # Download from https://ollama.ai
    # Or use brew on macOS
    brew install ollama
    
    # Start Ollama server
    ollama serve
  2. Java 17+: Spring AI Ollama requires Java 17 or later

  3. Maven/Gradle: Add the dependency to your project

Step 1: Add Dependency

Maven

<dependency>
    <groupId>org.springframework.ai</groupId>
    <artifactId>spring-ai-ollama</artifactId>
    <version>1.1.2</version>
</dependency>

Gradle

implementation 'org.springframework.ai:spring-ai-ollama:1.1.2'

Step 2: Pull a Model

Before using Ollama, pull a model:

# Pull Llama 3 (recommended for general use)
ollama pull llama3

# Or pull a smaller model for testing
ollama pull qwen3:0.6b

# For embeddings
ollama pull nomic-embed-text

Step 3: Create Your First Chat

Simple Chat Example

import org.springframework.ai.ollama.OllamaChatModel;
import org.springframework.ai.ollama.api.OllamaApi;
import org.springframework.ai.ollama.api.OllamaChatOptions;
import org.springframework.ai.ollama.api.OllamaModel;
import org.springframework.ai.chat.prompt.Prompt;
import org.springframework.ai.chat.model.ChatResponse;

public class SimpleChatExample {
    public static void main(String[] args) {
        // 1. Create API client
        OllamaApi api = OllamaApi.builder()
            .baseUrl("http://localhost:11434")
            .build();

        // 2. Build chat model
        OllamaChatModel chatModel = OllamaChatModel.builder()
            .ollamaApi(api)
            .defaultOptions(OllamaChatOptions.builder()
                .model(OllamaModel.LLAMA3.id())
                .temperature(0.7)
                .build())
            .build();

        // 3. Send a message
        ChatResponse response = chatModel.call(new Prompt("What is Spring AI?"));
        
        // 4. Get the response
        String answer = response.getResult().getOutput().getContent();
        System.out.println(answer);
    }
}

Streaming Chat Example

import reactor.core.publisher.Flux;

public class StreamingChatExample {
    public static void main(String[] args) {
        // Setup (same as above)
        OllamaApi api = OllamaApi.builder().build();
        OllamaChatModel chatModel = OllamaChatModel.builder()
            .ollamaApi(api)
            .defaultOptions(OllamaChatOptions.builder()
                .model(OllamaModel.LLAMA3.id())
                .build())
            .build();

        // Stream the response
        Flux<ChatResponse> stream = chatModel.stream(new Prompt("Tell me a short story"));
        
        stream.subscribe(
            chunk -> System.out.print(chunk.getResult().getOutput().getContent()),
            error -> System.err.println("Error: " + error),
            () -> System.out.println("\n[Complete]")
        );
        
        // Keep main thread alive for streaming
        try {
            Thread.sleep(10000);
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
        }
    }
}

Step 4: Generate Embeddings

import org.springframework.ai.ollama.OllamaEmbeddingModel;
import org.springframework.ai.ollama.api.OllamaEmbeddingOptions;
import org.springframework.ai.embedding.EmbeddingRequest;
import org.springframework.ai.embedding.EmbeddingResponse;
import org.springframework.ai.embedding.EmbeddingOptions;
import java.util.List;

public class EmbeddingExample {
    public static void main(String[] args) {
        // 1. Create API client
        OllamaApi api = OllamaApi.builder().build();

        // 2. Build embedding model
        OllamaEmbeddingModel embeddingModel = OllamaEmbeddingModel.builder()
            .ollamaApi(api)
            .defaultOptions(OllamaEmbeddingOptions.builder()
                .model(OllamaModel.NOMIC_EMBED_TEXT.id())
                .build())
            .build();

        // 3. Generate single embedding
        float[] embedding = embeddingModel.embed("Hello, world!");
        System.out.println("Embedding dimension: " + embedding.length);

        // 4. Generate batch embeddings
        List<String> texts = List.of(
            "First document",
            "Second document",
            "Third document"
        );
        
        EmbeddingRequest request = new EmbeddingRequest(texts, EmbeddingOptions.EMPTY);
        EmbeddingResponse response = embeddingModel.call(request);
        
        System.out.println("Generated " + response.getResults().size() + " embeddings");
    }
}

Step 5: Auto-Pull Models (Optional)

Configure automatic model pulling:

import org.springframework.ai.ollama.management.ModelManagementOptions;
import org.springframework.ai.ollama.management.PullModelStrategy;
import java.time.Duration;

public class AutoPullExample {
    public static void main(String[] args) {
        OllamaApi api = OllamaApi.builder().build();

        // Configure auto-pull
        ModelManagementOptions modelMgmt = ModelManagementOptions.builder()
            .pullModelStrategy(PullModelStrategy.WHEN_MISSING)
            .timeout(Duration.ofMinutes(10))
            .build();

        // Model will be pulled automatically if not available
        OllamaChatModel chatModel = OllamaChatModel.builder()
            .ollamaApi(api)
            .defaultOptions(OllamaChatOptions.builder()
                .model("llama3")
                .build())
            .modelManagementOptions(modelMgmt)
            .build();

        // Use the model (will auto-pull if missing)
        ChatResponse response = chatModel.call(new Prompt("Hello!"));
        System.out.println(response.getResult().getOutput().getContent());
    }
}

Spring Boot Integration

Configuration Class

import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

@Configuration
public class OllamaConfig {

    @Bean
    public OllamaApi ollamaApi() {
        return OllamaApi.builder()
            .baseUrl("http://localhost:11434")
            .build();
    }

    @Bean
    public OllamaChatModel chatModel(OllamaApi ollamaApi) {
        return OllamaChatModel.builder()
            .ollamaApi(ollamaApi)
            .defaultOptions(OllamaChatOptions.builder()
                .model(OllamaModel.LLAMA3.id())
                .temperature(0.7)
                .build())
            .build();
    }

    @Bean
    public OllamaEmbeddingModel embeddingModel(OllamaApi ollamaApi) {
        return OllamaEmbeddingModel.builder()
            .ollamaApi(ollamaApi)
            .defaultOptions(OllamaEmbeddingOptions.builder()
                .model(OllamaModel.NOMIC_EMBED_TEXT.id())
                .build())
            .build();
    }
}

Service Class

import org.springframework.stereotype.Service;

@Service
public class ChatService {

    private final OllamaChatModel chatModel;

    public ChatService(OllamaChatModel chatModel) {
        this.chatModel = chatModel;
    }

    public String chat(String userMessage) {
        ChatResponse response = chatModel.call(new Prompt(userMessage));
        return response.getResult().getOutput().getContent();
    }

    public Flux<String> chatStream(String userMessage) {
        return chatModel.stream(new Prompt(userMessage))
            .map(chunk -> chunk.getResult().getOutput().getContent());
    }
}

REST Controller

import org.springframework.web.bind.annotation.*;
import reactor.core.publisher.Flux;

@RestController
@RequestMapping("/api/chat")
public class ChatController {

    private final ChatService chatService;

    public ChatController(ChatService chatService) {
        this.chatService = chatService;
    }

    @PostMapping
    public String chat(@RequestBody String message) {
        return chatService.chat(message);
    }

    @PostMapping("/stream")
    public Flux<String> chatStream(@RequestBody String message) {
        return chatService.chatStream(message);
    }
}

Common Issues

Issue: Connection Refused

Problem: Cannot connect to Ollama server

Solution:

# Check if Ollama is running
curl http://localhost:11434/api/version

# Start Ollama if not running
ollama serve

Issue: Model Not Found

Problem: 404 error when calling the model

Solution:

# Pull the model first
ollama pull llama3

# Or use auto-pull in code
ModelManagementOptions.builder()
    .pullModelStrategy(PullModelStrategy.WHEN_MISSING)
    .build()

Issue: Out of Memory

Problem: Model too large for available RAM

Solution:

// Use a smaller model
.model(OllamaModel.QWEN_3_06B.id())  // 0.6B parameters

// Or use CPU-only mode
OllamaChatOptions.builder()
    .model("llama3")
    .numGPU(0)  // CPU only
    .build()

Next Steps

  • Learn More: Explore the complete reference documentation
  • Real-World Examples: See real-world scenarios
  • Advanced Features: Check out tool calling, multimodal, and thinking models
  • Configuration: Deep dive into chat options and embedding options

Resources

  • Ollama Website
  • Ollama Model Library
  • Spring AI Documentation
  • Spring AI GitHub
tessl i tessl/maven-org-springframework-ai--spring-ai-ollama@1.1.1

docs

index.md

tile.json