CtrlK
CommunityDocumentationLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-ollama

Spring Boot-compatible Ollama integration providing ChatModel and EmbeddingModel implementations for running large language models locally with support for streaming, tool calling, model management, and observability.

Overview
Eval results
Files

embedding-model.mddocs/reference/

Embedding Model

The OllamaEmbeddingModel class provides the implementation of Spring AI's EmbeddingModel interface for Ollama, enabling generation of vector embeddings for text documents.

Core API

package org.springframework.ai.ollama;

public class OllamaEmbeddingModel extends AbstractEmbeddingModel {
    // Constructor
    public OllamaEmbeddingModel(OllamaApi ollamaApi, OllamaEmbeddingOptions defaultOptions,
        ObservationRegistry observationRegistry, ModelManagementOptions modelManagementOptions);

    // Factory method
    public static Builder builder();

    // Embedding operations
    public float[] embed(Document document);
    public EmbeddingResponse call(EmbeddingRequest request);

    // Configuration
    public void setObservationConvention(EmbeddingModelObservationConvention observationConvention);
}

Builder API

public static final class Builder {
    public Builder ollamaApi(OllamaApi ollamaApi);
    public Builder defaultOptions(OllamaEmbeddingOptions defaultOptions);
    public Builder observationRegistry(ObservationRegistry observationRegistry);
    public Builder modelManagementOptions(ModelManagementOptions modelManagementOptions);
    public OllamaEmbeddingModel build();
}

Builder Defaults:

  • defaultOptions: OllamaEmbeddingOptions with model set to OllamaModel.MXBAI_EMBED_LARGE.id() ("mxbai-embed-large") if not provided
  • observationRegistry: ObservationRegistry.NOOP if not provided
  • modelManagementOptions: ModelManagementOptions.defaults() if not provided

Note: The default embedding model is MXBAI_EMBED_LARGE (mxbai-embed-large).

Usage

Basic Embedding

import org.springframework.ai.ollama.OllamaEmbeddingModel;
import org.springframework.ai.ollama.api.OllamaApi;
import org.springframework.ai.ollama.api.OllamaEmbeddingOptions;
import org.springframework.ai.ollama.api.OllamaModel;

// Create API client
OllamaApi api = OllamaApi.builder()
    .baseUrl("http://localhost:11434")
    .build();

// Build embedding model
OllamaEmbeddingModel embeddingModel = OllamaEmbeddingModel.builder()
    .ollamaApi(api)
    .defaultOptions(OllamaEmbeddingOptions.builder()
        .model(OllamaModel.NOMIC_EMBED_TEXT.id())
        .build())
    .build();

// Generate single embedding
float[] embedding = embeddingModel.embed("Hello world");
System.out.println("Embedding dimension: " + embedding.length);

Embedding a Document

import org.springframework.ai.document.Document;

// Create document
Document document = new Document("This is my document content");

// Generate embedding
float[] embedding = embeddingModel.embed(document);

Batch Embeddings

import org.springframework.ai.embedding.EmbeddingRequest;
import org.springframework.ai.embedding.EmbeddingResponse;
import org.springframework.ai.embedding.Embedding;
import java.util.List;

// Multiple texts
List<String> texts = List.of(
    "First document",
    "Second document",
    "Third document"
);

// Create request
EmbeddingRequest request = new EmbeddingRequest(texts, EmbeddingOptions.EMPTY);

// Generate embeddings
EmbeddingResponse response = embeddingModel.call(request);

// Access individual embeddings
for (Embedding emb : response.getResults()) {
    float[] vector = emb.getOutput();
    int index = emb.getIndex();
    System.out.println("Embedding " + index + " dimension: " + vector.length);
}

Runtime Options Override

// Override options for specific request
OllamaEmbeddingOptions requestOptions = OllamaEmbeddingOptions.builder()
    .truncate(true)
    .keepAlive("10m")
    .build();

EmbeddingRequest request = new EmbeddingRequest(texts, requestOptions);
EmbeddingResponse response = embeddingModel.call(request);

Model Management

import org.springframework.ai.ollama.management.ModelManagementOptions;
import org.springframework.ai.ollama.management.PullModelStrategy;
import java.time.Duration;

// Configure automatic model management
ModelManagementOptions modelMgmt = ModelManagementOptions.builder()
    .pullModelStrategy(PullModelStrategy.WHEN_MISSING)
    .timeout(Duration.ofMinutes(10))
    .build();

OllamaEmbeddingModel embeddingModel = OllamaEmbeddingModel.builder()
    .ollamaApi(api)
    .defaultOptions(OllamaEmbeddingOptions.builder()
        .model("nomic-embed-text")
        .build())
    .modelManagementOptions(modelMgmt)
    .build();

// Model will be automatically pulled if not available

Observability

import io.micrometer.observation.ObservationRegistry;

// Set up observability
ObservationRegistry registry = ObservationRegistry.create();

OllamaEmbeddingModel embeddingModel = OllamaEmbeddingModel.builder()
    .ollamaApi(api)
    .defaultOptions(options)
    .observationRegistry(registry)
    .build();

// Optional: custom observation convention
embeddingModel.setObservationConvention(new CustomEmbeddingModelObservationConvention());

// Metrics will be automatically tracked:
// - Token usage
// - Duration (total, load)
// - Model information

Return Types

EmbeddingResponse

// From Spring AI Core
public class EmbeddingResponse implements ModelResponse<Embedding> {
    public List<Embedding> getResults();
    public EmbeddingResponseMetadata getMetadata();
}

public class Embedding {
    public float[] getOutput();
    public int getIndex();
}

// Metadata includes:
// - Model name
// - Token usage
// - Duration information

Embedding Models

Ollama supports several embedding models:

  • nomic-embed-text: High-performing embedding model with large context (default in builder if not specified)
  • mxbai-embed-large: State-of-the-art large embedding model from mixedbread.ai
  • all-minilm: Compact embedding model (requires manual pull)
// Using different embedding models
OllamaEmbeddingOptions nomicOptions = OllamaEmbeddingOptions.builder()
    .model(OllamaModel.NOMIC_EMBED_TEXT.id())
    .build();

OllamaEmbeddingOptions mxbaiOptions = OllamaEmbeddingOptions.builder()
    .model(OllamaModel.MXBAI_EMBED_LARGE.id())
    .build();

Truncation Behavior

Control how inputs that exceed the model's context length are handled:

OllamaEmbeddingOptions options = OllamaEmbeddingOptions.builder()
    .model("nomic-embed-text")
    .truncate(true)  // Truncate long inputs (default: true)
    .build();

// If truncate=false, an error will be thrown for inputs exceeding context length

Keep Alive

Control how long the model stays in memory after use:

OllamaEmbeddingOptions options = OllamaEmbeddingOptions.builder()
    .model("nomic-embed-text")
    .keepAlive("5m")  // Keep model loaded for 5 minutes (default)
    // Other values: "10m", "1h", "-1" (indefinite), "0" (unload immediately)
    .build();

GPU and Memory Configuration

Configure GPU usage and memory settings:

OllamaEmbeddingOptions options = OllamaEmbeddingOptions.builder()
    .model("nomic-embed-text")
    .numGPU(1)           // Number of GPU layers
    .mainGPU(0)          // Primary GPU index
    .lowVRAM(false)      // Low VRAM mode
    .useMMap(true)       // Use memory mapping
    .useMLock(false)     // Lock model in memory
    .numThread(8)        // Number of threads
    .build();

See Embedding Options Documentation for complete configuration reference.

Metadata and Usage Information

The EmbeddingResponse includes metadata:

EmbeddingResponse response = embeddingModel.call(request);

// Usage information
EmbeddingResponseMetadata metadata = response.getMetadata();
Integer promptTokens = metadata.getUsage().getPromptTokens();
String modelName = metadata.getModel();

// Each embedding has an index
for (Embedding emb : response.getResults()) {
    int index = emb.getIndex();  // Position in input list
    float[] vector = emb.getOutput();
}

Similarity Computation

Once you have embeddings, compute similarity:

// Generate embeddings
float[] embedding1 = embeddingModel.embed("cat");
float[] embedding2 = embeddingModel.embed("kitten");
float[] embedding3 = embeddingModel.embed("car");

// Cosine similarity
double sim12 = cosineSimilarity(embedding1, embedding2);  // High similarity
double sim13 = cosineSimilarity(embedding1, embedding3);  // Low similarity

// Helper method for cosine similarity
private double cosineSimilarity(float[] a, float[] b) {
    double dotProduct = 0.0;
    double normA = 0.0;
    double normB = 0.0;
    for (int i = 0; i < a.length; i++) {
        dotProduct += a[i] * b[i];
        normA += a[i] * a[i];
        normB += b[i] * b[i];
    }
    return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}

Vector Store Integration

Embeddings are commonly used with vector stores:

import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.ai.document.Document;
import java.util.List;

// Assume you have a VectorStore configured with OllamaEmbeddingModel
VectorStore vectorStore = createVectorStore(embeddingModel);

// Add documents
List<Document> documents = List.of(
    new Document("Content 1"),
    new Document("Content 2")
);
vectorStore.add(documents);

// Search similar documents
List<Document> similar = vectorStore.similaritySearch("query text");

Error Handling

try {
    EmbeddingResponse response = embeddingModel.call(request);
} catch (RestClientException e) {
    // Handle connection errors
} catch (IllegalArgumentException e) {
    // Handle invalid requests (e.g., empty input)
} catch (Exception e) {
    // Handle other errors
}

Thread Safety

OllamaEmbeddingModel instances are thread-safe and can be reused across multiple requests. It's recommended to create a single instance and share it.

Dimensions

Note that OllamaEmbeddingModel does not support the getDimensions() method from EmbeddingOptions interface (it returns null). Embedding dimensions are determined by the model itself:

  • nomic-embed-text: 768 dimensions
  • mxbai-embed-large: 1024 dimensions
OllamaEmbeddingOptions options = embeddingModel.getDefaultOptions();
Integer dims = options.getDimensions();  // Returns null

Related Documentation

tessl i tessl/maven-org-springframework-ai--spring-ai-ollama@1.1.1

docs

index.md

tile.json