CtrlK
CommunityDocumentationLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-transformers

ONNX-based Transformer models for text embeddings within the Spring AI framework

Overview
Eval results
Files

model-configuration.mddocs/reference/

Model Configuration

Configure the TransformersEmbeddingModel with custom ONNX models, tokenizers, GPU settings, and resource caching options. All configuration must be done before calling afterPropertiesSet() for initialization.

Critical Configuration Rule

All configuration methods must be called BEFORE afterPropertiesSet(). Changing configuration after initialization has no effect or may cause unpredictable behavior.

TransformersEmbeddingModel model = new TransformersEmbeddingModel();

// ✅ CORRECT: Configure before initialization
model.setGpuDeviceId(0);
model.setModelResource("classpath:/models/custom.onnx");
model.afterPropertiesSet(); // Initialization uses configuration

// ❌ WRONG: Configuration after initialization has no effect
model.setGpuDeviceId(1); // This will NOT change the GPU device

Capabilities

Model Resource Configuration

Specify custom ONNX model files to use instead of the default all-MiniLM-L6-v2 model. Models can be loaded from classpath, filesystem, or remote HTTP/HTTPS URLs.

/**
 * Set the ONNX model resource by URI string.
 * Must be called before afterPropertiesSet().
 *
 * @param modelResourceUri URI string (classpath:, file:, http:, https:)
 * @throws IllegalArgumentException if modelResourceUri is null
 */
void setModelResource(String modelResourceUri);

/**
 * Set the ONNX model resource using Spring Resource object.
 * Must be called before afterPropertiesSet().
 *
 * @param modelResource Spring Resource pointing to ONNX model file
 * @throws IllegalArgumentException if modelResource is null
 */
void setModelResource(Resource modelResource);

Usage:

import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.UrlResource;
import java.net.URL;

TransformersEmbeddingModel model = new TransformersEmbeddingModel();

// From classpath
model.setModelResource("classpath:/models/custom-model.onnx");

// From filesystem (absolute path)
model.setModelResource("file:///path/to/model.onnx");

// From filesystem (using File object)
model.setModelResource("file://" + new File("/path/to/model.onnx").getAbsolutePath());

// From URL
model.setModelResource("https://example.com/models/model.onnx");

// Using Spring Resource
Resource resource = new ClassPathResource("/models/custom-model.onnx");
model.setModelResource(resource);

// Using FileSystemResource
Resource fileResource = new FileSystemResource("/path/to/model.onnx");
model.setModelResource(fileResource);

// Using UrlResource
Resource urlResource = new UrlResource(new URL("https://example.com/model.onnx"));
model.setModelResource(urlResource);

// Initialize after configuration
model.afterPropertiesSet();

Default Model: https://github.com/spring-projects/spring-ai/raw/main/models/spring-ai-transformers/src/main/resources/onnx/all-MiniLM-L6-v2/model.onnx

Model Requirements:

  • Must be ONNX format (.onnx file)
  • Must accept input named "input_ids" (token IDs) and "attention_mask"
  • Must produce output tensor (default name: "last_hidden_state")
  • Compatible with ONNX opset 13 or higher

Edge Cases:

// Null resource - throws IllegalArgumentException
try {
    model.setModelResource((String) null);
} catch (IllegalArgumentException e) {
    // Exception: "Model resource must not be null"
}

// Invalid URL - error during afterPropertiesSet(), not during setter
model.setModelResource("https://invalid-domain-12345.com/model.onnx");
// No error here
try {
    model.afterPropertiesSet();
} catch (Exception e) {
    // Exception: Network error, timeout, or 404
}

// Non-ONNX file - error during afterPropertiesSet()
model.setModelResource("classpath:/models/wrong-format.txt");
try {
    model.afterPropertiesSet();
} catch (Exception e) {
    // Exception: Invalid ONNX format
}

// Classpath resource that doesn't exist - error during afterPropertiesSet()
model.setModelResource("classpath:/models/nonexistent.onnx");
try {
    model.afterPropertiesSet();
} catch (Exception e) {
    // Exception: Resource not found
}

// File URI with spaces or special characters - handle with proper encoding
String pathWithSpaces = "/path/with spaces/model.onnx";
String encodedUri = "file://" + pathWithSpaces.replace(" ", "%20");
model.setModelResource(encodedUri);

Tokenizer Resource Configuration

Specify custom HuggingFace tokenizer files. The tokenizer must be compatible with the ONNX model and provided in JSON format.

/**
 * Set the tokenizer resource by URI string.
 * Must be called before afterPropertiesSet().
 *
 * @param tokenizerResourceUri URI string (classpath:, file:, http:, https:)
 * @throws IllegalArgumentException if tokenizerResourceUri is null
 */
void setTokenizerResource(String tokenizerResourceUri);

/**
 * Set the tokenizer resource using Spring Resource object.
 * Must be called before afterPropertiesSet().
 *
 * @param tokenizerResource Spring Resource pointing to tokenizer.json file
 * @throws IllegalArgumentException if tokenizerResource is null
 */
void setTokenizerResource(Resource tokenizerResource);

Usage:

TransformersEmbeddingModel model = new TransformersEmbeddingModel();

// From classpath
model.setTokenizerResource("classpath:/tokenizers/custom-tokenizer.json");

// From filesystem
model.setTokenizerResource("file:///path/to/tokenizer.json");

// From URL
model.setTokenizerResource("https://example.com/tokenizers/tokenizer.json");

// Using Spring Resource
Resource tokenizerResource = new ClassPathResource("/tokenizers/custom-tokenizer.json");
model.setTokenizerResource(tokenizerResource);

model.afterPropertiesSet();

Default Tokenizer: https://raw.githubusercontent.com/spring-projects/spring-ai/main/models/spring-ai-transformers/src/main/resources/onnx/all-MiniLM-L6-v2/tokenizer.json

Tokenizer Requirements:

  • Must be HuggingFace tokenizer JSON format
  • Must be compatible with the ONNX model (same vocabulary)
  • Typically exported from transformers library: tokenizer.save_pretrained()

Model and Tokenizer Compatibility:

// CRITICAL: Model and tokenizer must match
TransformersEmbeddingModel model = new TransformersEmbeddingModel();

// ✅ CORRECT: Matching model and tokenizer
model.setModelResource("https://example.com/bert-base/model.onnx");
model.setTokenizerResource("https://example.com/bert-base/tokenizer.json");
model.afterPropertiesSet(); // Works correctly

// ❌ WRONG: Mismatched model and tokenizer
TransformersEmbeddingModel wrongModel = new TransformersEmbeddingModel();
wrongModel.setModelResource("https://example.com/bert-base/model.onnx");
wrongModel.setTokenizerResource("https://example.com/roberta/tokenizer.json");
try {
    wrongModel.afterPropertiesSet(); // May succeed but produce incorrect embeddings
    float[] embedding = wrongModel.embed("test");
    // Embeddings will be incorrect/meaningless
} catch (Exception e) {
    // Or may fail with ONNX runtime error
}

Edge Cases:

// Null tokenizer - throws IllegalArgumentException
try {
    model.setTokenizerResource((String) null);
} catch (IllegalArgumentException e) {
    // Exception thrown
}

// Invalid JSON format - error during afterPropertiesSet()
model.setTokenizerResource("classpath:/tokenizers/invalid.json");
try {
    model.afterPropertiesSet();
} catch (Exception e) {
    // Exception: JSON parsing error
}

// Tokenizer file too large - may cause memory issues
// HuggingFace tokenizers are typically 0.5-5 MB
model.setTokenizerResource("https://example.com/huge-tokenizer.json");
try {
    model.afterPropertiesSet();
    // May succeed but consume excessive memory
} catch (OutOfMemoryError e) {
    // Increase heap size or use smaller tokenizer
}

Tokenizer Options Configuration

Provide custom options to the HuggingFace tokenizer. Options are passed as a map of key-value pairs. The tokenizerOptions field is public and can be accessed or modified directly, or set via the setter method.

/**
 * Public field for tokenizer options (can be directly accessed/modified).
 * Mutable field allowing direct manipulation before initialization.
 */
public Map<String, String> tokenizerOptions = Map.of();

/**
 * Set tokenizer options.
 * Must be called before afterPropertiesSet().
 *
 * @param tokenizerOptions Map of option key-value pairs
 */
void setTokenizerOptions(Map<String, String> tokenizerOptions);

Usage:

import java.util.Map;
import java.util.HashMap;

TransformersEmbeddingModel model = new TransformersEmbeddingModel();

// Method 1: Using the setter method
Map<String, String> options = new HashMap<>();
options.put("addSpecialTokens", "true");
options.put("modelMaxLength", "512");
options.put("truncation", "true");
options.put("padding", "max_length");
model.setTokenizerOptions(options);

// Method 2: Direct field access (Java 9+)
model.tokenizerOptions = Map.of(
    "addSpecialTokens", "true",
    "modelMaxLength", "512"
);

// Method 3: Direct field modification (for mutable map)
Map<String, String> mutableOptions = new HashMap<>();
mutableOptions.put("addSpecialTokens", "true");
model.tokenizerOptions = mutableOptions;
// Can still add more
model.tokenizerOptions.put("truncation", "true");

model.afterPropertiesSet();

Common Tokenizer Options:

// Common option keys (string values):
// - addSpecialTokens: "true" | "false"
// - modelMaxLength: numeric string (e.g., "512")
// - truncation: "true" | "false"
// - padding: "true" | "false" | "max_length" | "longest"
// - maxLength: numeric string
// - stride: numeric string
// - padToMultipleOf: numeric string

Option Examples:

// Control special tokens (CLS, SEP, etc.)
model.tokenizerOptions = Map.of("addSpecialTokens", "false");
// Embeddings without special tokens

// Set maximum length
model.tokenizerOptions = Map.of("modelMaxLength", "256");
// Sequences truncated at 256 tokens instead of default

// Enable truncation
model.tokenizerOptions = Map.of(
    "truncation", "true",
    "maxLength", "512"
);
// Long sequences automatically truncated

// Enable padding
model.tokenizerOptions = Map.of(
    "padding", "max_length",
    "maxLength", "512"
);
// Short sequences padded to 512 tokens

// Stride for sliding window
model.tokenizerOptions = Map.of(
    "stride", "128",
    "maxLength", "512"
);
// Overlapping windows with 128-token stride

// Pad to multiple
model.tokenizerOptions = Map.of(
    "padToMultipleOf", "8"
);
// Padding length rounded up to multiple of 8 (hardware optimization)

Default: Empty map (Map.of()) - uses tokenizer's built-in defaults

Edge Cases:

// Empty options - valid, uses defaults
model.setTokenizerOptions(Map.of());

// Null options - sets to empty map (implementation-dependent)
model.setTokenizerOptions(null);
// May throw exception or be treated as empty map

// Invalid option values - behavior depends on HuggingFace tokenizer
model.tokenizerOptions = Map.of("modelMaxLength", "invalid");
// May throw exception during afterPropertiesSet() or be ignored

// Conflicting options - last one wins or error
model.tokenizerOptions = Map.of(
    "padding", "false",
    "padding", "true" // Duplicate key - compile error with Map.of()
);

// Use mutable map for conflicting options
Map<String, String> opts = new HashMap<>();
opts.put("padding", "false");
opts.put("padding", "true"); // Overwrites previous value
model.tokenizerOptions = opts;

// Case sensitivity - keys are case-sensitive
model.tokenizerOptions = Map.of("addspecialtokens", "true");
// Wrong case - option ignored (correct: "addSpecialTokens")

GPU Device Configuration

Enable GPU acceleration for ONNX model inference by specifying a CUDA device ID. Requires ONNX Runtime with CUDA support.

/**
 * Set the GPU device ID for CUDA acceleration.
 * Must be called before afterPropertiesSet().
 *
 * @param gpuDeviceId GPU device ID (0, 1, 2, etc.), or -1 for CPU (default)
 */
void setGpuDeviceId(int gpuDeviceId);

Usage:

TransformersEmbeddingModel model = new TransformersEmbeddingModel();

// Use GPU 0 (first GPU)
model.setGpuDeviceId(0);

// Use GPU 1 (second GPU in multi-GPU system)
model.setGpuDeviceId(1);

// Use CPU (default)
model.setGpuDeviceId(-1);

// Initialize
try {
    model.afterPropertiesSet();
} catch (Exception e) {
    // GPU initialization may fail - fall back to CPU
    if (e.getMessage().contains("CUDA") || e.getMessage().contains("GPU")) {
        model = new TransformersEmbeddingModel();
        model.setGpuDeviceId(-1); // CPU fallback
        model.afterPropertiesSet();
    }
}

Default: -1 (CPU inference)

Requirements:

// For GPU support, you need:
// 1. ONNX Runtime with CUDA support
//    Maven: onnxruntime-gpu instead of onnxruntime
// 2. Compatible CUDA drivers
//    CUDA 11.x or 12.x depending on ONNX Runtime version
// 3. Compatible NVIDIA GPU
//    Compute capability 3.5+ (most NVIDIA GPUs from 2012+)

// Check GPU availability programmatically
TransformersEmbeddingModel testModel = new TransformersEmbeddingModel();
testModel.setGpuDeviceId(0);
try {
    testModel.afterPropertiesSet();
    System.out.println("GPU available");
} catch (Exception e) {
    System.out.println("GPU not available, using CPU");
    testModel = new TransformersEmbeddingModel();
    testModel.setGpuDeviceId(-1);
    testModel.afterPropertiesSet();
}

Performance Impact:

// Typical speedup: 2-5x for embedding generation
// Larger speedup for larger batches

// CPU model
TransformersEmbeddingModel cpuModel = new TransformersEmbeddingModel();
cpuModel.setGpuDeviceId(-1);
cpuModel.afterPropertiesSet();

// GPU model
TransformersEmbeddingModel gpuModel = new TransformersEmbeddingModel();
gpuModel.setGpuDeviceId(0);
gpuModel.afterPropertiesSet();

List<String> texts = /* 1000 texts */;

// CPU benchmark
long start = System.currentTimeMillis();
cpuModel.embed(texts);
long cpuTime = System.currentTimeMillis() - start;

// GPU benchmark
start = System.currentTimeMillis();
gpuModel.embed(texts);
long gpuTime = System.currentTimeMillis() - start;

System.out.println("CPU time: " + cpuTime + "ms");
System.out.println("GPU time: " + gpuTime + "ms");
System.out.println("Speedup: " + (cpuTime / (double) gpuTime) + "x");

Edge Cases:

// Invalid GPU ID (>= number of available GPUs) - error during init
model.setGpuDeviceId(10); // Only 2 GPUs available
try {
    model.afterPropertiesSet();
} catch (Exception e) {
    // Exception: Invalid device ID or CUDA error
}

// Negative value other than -1 - treated as CPU (implementation-dependent)
model.setGpuDeviceId(-5);
// May be treated as CPU or throw error

// GPU out of memory - error during init or first inference
model.setGpuDeviceId(0);
try {
    model.afterPropertiesSet(); // May succeed
    // Large model loaded on GPU with insufficient memory
    List<String> hugeBatch = /* 10000 texts */;
    model.embed(hugeBatch); // May fail here
} catch (Exception e) {
    // CUDA out of memory error
    // Solutions:
    // 1. Use smaller batches
    // 2. Use smaller model
    // 3. Use CPU
}

// Multi-GPU configuration
// Use different GPU for different model instances
TransformersEmbeddingModel model0 = new TransformersEmbeddingModel();
model0.setGpuDeviceId(0);
model0.afterPropertiesSet();

TransformersEmbeddingModel model1 = new TransformersEmbeddingModel();
model1.setGpuDeviceId(1);
model1.afterPropertiesSet();

// Both models can run concurrently on different GPUs

Resource Cache Directory Configuration

Specify a custom directory for caching downloaded models and tokenizers. If not set, uses system temp directory.

/**
 * Set the cache directory for storing downloaded resources.
 * Must be called before afterPropertiesSet().
 *
 * @param resourceCacheDir Path to cache directory
 */
void setResourceCacheDirectory(String resourceCacheDir);

Usage:

import java.io.File;

TransformersEmbeddingModel model = new TransformersEmbeddingModel();

// Custom cache directory
model.setResourceCacheDirectory("/var/cache/spring-ai-models");

// User home directory
model.setResourceCacheDirectory(
    System.getProperty("user.home") + "/.spring-ai/cache"
);

// Using File API
File cacheDir = new File(System.getProperty("user.home"), ".spring-ai/cache");
model.setResourceCacheDirectory(cacheDir.getAbsolutePath());

// Relative path (relative to current working directory)
model.setResourceCacheDirectory("./cache");

// Temporary directory with custom subdirectory
model.setResourceCacheDirectory(
    System.getProperty("java.io.tmpdir") + "/my-model-cache"
);

model.afterPropertiesSet();

Default: {java.io.tmpdir}/spring-ai-onnx-generative

Cache Directory Behavior:

// Directory is created automatically if it doesn't exist
model.setResourceCacheDirectory("/path/to/new/cache");
model.afterPropertiesSet();
// /path/to/new/cache is created with subdirectories

// Cached resources persist across application restarts
// First run: Downloads model (~50MB)
// Subsequent runs: Uses cached model (fast startup)

// Multiple applications can share the same cache directory
// But ensure proper file locking if running concurrently

Edge Cases:

// Null cache directory - uses default
model.setResourceCacheDirectory(null);
// May throw exception or use default

// Empty string - may use current directory or throw error
model.setResourceCacheDirectory("");

// Directory with no write permissions - error during init
model.setResourceCacheDirectory("/root/cache"); // No permission
try {
    model.afterPropertiesSet();
} catch (Exception e) {
    // Exception: Permission denied
    // Solution: Use directory with write permissions
    model.setResourceCacheDirectory(
        System.getProperty("user.home") + "/.spring-ai/cache"
    );
    model.afterPropertiesSet();
}

// Directory on full disk - error during model download
model.setResourceCacheDirectory("/path/to/full/disk");
try {
    model.afterPropertiesSet();
} catch (Exception e) {
    // Exception: No space left on device
    // Solution: Free up space or use different directory
}

// Directory with special characters
model.setResourceCacheDirectory("/path/with spaces/and-special_chars/");
// Usually works fine on modern filesystems

// Very long path (>255 characters on some systems)
String longPath = "/very/long/path/".repeat(50);
model.setResourceCacheDirectory(longPath);
// May fail on Windows or older filesystems

// Network-mounted directory - works but slower
model.setResourceCacheDirectory("/mnt/network-share/cache");
// Higher latency for cache access

// Directory inside application JAR - not writable
model.setResourceCacheDirectory("classpath:/cache");
// Will fail - classpath resources are read-only

Cache Management:

// Clear cache manually
File cacheDir = new File("/var/cache/spring-ai-models");
if (cacheDir.exists()) {
    deleteDirectory(cacheDir);
}
// Next run will re-download models

// Check cache size
long cacheSize = calculateDirectorySize(cacheDir);
System.out.println("Cache size: " + (cacheSize / 1024 / 1024) + " MB");

// Helper method
void deleteDirectory(File dir) {
    File[] files = dir.listFiles();
    if (files != null) {
        for (File file : files) {
            if (file.isDirectory()) {
                deleteDirectory(file);
            } else {
                file.delete();
            }
        }
    }
    dir.delete();
}

long calculateDirectorySize(File dir) {
    long size = 0;
    File[] files = dir.listFiles();
    if (files != null) {
        for (File file : files) {
            if (file.isDirectory()) {
                size += calculateDirectorySize(file);
            } else {
                size += file.length();
            }
        }
    }
    return size;
}

Disable Caching Configuration

Disable resource caching to always load models and tokenizers from the original source. Useful for development or when resources should not be cached.

/**
 * Disable resource caching.
 * Must be called before afterPropertiesSet().
 *
 * @param disableCaching true to disable caching, false to enable (default)
 */
void setDisableCaching(boolean disableCaching);

Usage:

TransformersEmbeddingModel model = new TransformersEmbeddingModel();

// Disable caching (always load from source)
model.setDisableCaching(true);

// Enable caching (default)
model.setDisableCaching(false);

model.afterPropertiesSet();

Default: false (caching enabled)

Use Cases:

// 1. Development with frequently changing models
TransformersEmbeddingModel devModel = new TransformersEmbeddingModel();
devModel.setModelResource("file:///dev/models/experimental-model.onnx");
devModel.setDisableCaching(true); // Always load latest version
devModel.afterPropertiesSet();

// 2. Security requirements preventing local caching
TransformersEmbeddingModel secureModel = new TransformersEmbeddingModel();
secureModel.setDisableCaching(true); // Don't store on local disk
secureModel.afterPropertiesSet();

// 3. Models already on local filesystem
TransformersEmbeddingModel localModel = new TransformersEmbeddingModel();
localModel.setModelResource("file:///local/models/model.onnx");
localModel.setDisableCaching(true); // No need to cache local files
localModel.afterPropertiesSet();

// 4. Classpath resources (already cached)
TransformersEmbeddingModel classpathModel = new TransformersEmbeddingModel();
classpathModel.setModelResource("classpath:/models/model.onnx");
classpathModel.setDisableCaching(true); // Classpath not cached anyway
classpathModel.afterPropertiesSet();

Impact on Performance:

// With caching enabled (default)
TransformersEmbeddingModel cachedModel = new TransformersEmbeddingModel();
cachedModel.setDisableCaching(false);

long start = System.currentTimeMillis();
cachedModel.afterPropertiesSet(); // First run: slow (download)
long firstInit = System.currentTimeMillis() - start;

start = System.currentTimeMillis();
TransformersEmbeddingModel cachedModel2 = new TransformersEmbeddingModel();
cachedModel2.setDisableCaching(false);
cachedModel2.afterPropertiesSet(); // Second run: fast (cached)
long secondInit = System.currentTimeMillis() - start;

System.out.println("First init (with download): " + firstInit + "ms");
System.out.println("Second init (cached): " + secondInit + "ms");
// First: ~5000-30000ms (network dependent)
// Second: ~100-500ms

// With caching disabled
TransformersEmbeddingModel uncachedModel = new TransformersEmbeddingModel();
uncachedModel.setDisableCaching(true);

start = System.currentTimeMillis();
uncachedModel.afterPropertiesSet(); // Always slow
long uncachedInit = System.currentTimeMillis() - start;

System.out.println("Uncached init: " + uncachedInit + "ms");
// Always: ~5000-30000ms (network dependent)

Edge Cases:

// Disable caching with classpath resource
// (Classpath resources are not cached anyway)
model.setModelResource("classpath:/models/model.onnx");
model.setDisableCaching(true); // No effect
model.setDisableCaching(false); // No effect
// Classpath resources are never cached (already local)

// Disable caching with file: resource
model.setModelResource("file:///local/model.onnx");
model.setDisableCaching(true); // No effect
// File resources are not cached (already local)

// Disable caching with HTTP resource
model.setModelResource("https://example.com/model.onnx");
model.setDisableCaching(true);
// Downloads every time - significant impact

// Disable caching but specify cache directory
model.setDisableCaching(true);
model.setResourceCacheDirectory("/custom/cache");
// Cache directory setting is ignored when caching is disabled

Model Output Name Configuration

Specify the name of the ONNX model output tensor to use for embeddings. Different models may use different output names.

/**
 * Set the ONNX model output tensor name.
 * Must be called before afterPropertiesSet().
 *
 * @param modelOutputName Name of the output tensor
 */
void setModelOutputName(String modelOutputName);

Usage:

TransformersEmbeddingModel model = new TransformersEmbeddingModel();

// For models with different output names
model.setModelOutputName("last_hidden_state"); // Default
model.setModelOutputName("pooler_output");
model.setModelOutputName("sentence_embedding");
model.setModelOutputName("token_embeddings");

model.afterPropertiesSet();

Default: "last_hidden_state"

Common Output Names:

// BERT-based models
model.setModelOutputName("last_hidden_state"); // Token-level embeddings
model.setModelOutputName("pooler_output");     // CLS token embedding

// Sentence-BERT models
model.setModelOutputName("sentence_embedding"); // Sentence-level
model.setModelOutputName("token_embeddings");   // Token-level

// Custom models - check ONNX model metadata
// Use Netron (https://netron.app/) to visualize ONNX model
// and identify output tensor names

Validation:

// The model validates that the specified output name exists in the ONNX model
// during initialization and throws IllegalArgumentException if not found
TransformersEmbeddingModel model = new TransformersEmbeddingModel();
model.setModelOutputName("nonexistent_output");

try {
    model.afterPropertiesSet();
} catch (IllegalArgumentException e) {
    // Exception: "Output 'nonexistent_output' not found in model"
    // Available outputs are logged
}

Inspecting Model Outputs:

// To find available output names, check ONNX model metadata
// or catch the initialization exception:

TransformersEmbeddingModel model = new TransformersEmbeddingModel();
model.setModelResource("classpath:/models/unknown-model.onnx");
model.setModelOutputName("invalid");

try {
    model.afterPropertiesSet();
} catch (Exception e) {
    // Error message includes available outputs:
    // "Available outputs: [last_hidden_state, pooler_output]"
    System.err.println(e.getMessage());
    
    // Try with correct output name
    model = new TransformersEmbeddingModel();
    model.setModelResource("classpath:/models/unknown-model.onnx");
    model.setModelOutputName("last_hidden_state");
    model.afterPropertiesSet();
}

Edge Cases:

// Null output name - uses default
model.setModelOutputName(null);
// May throw exception or use default

// Empty string - likely error during init
model.setModelOutputName("");
try {
    model.afterPropertiesSet();
} catch (Exception e) {
    // Exception: Invalid output name
}

// Case-sensitive matching
model.setModelOutputName("Last_Hidden_State"); // Wrong case
try {
    model.afterPropertiesSet();
} catch (Exception e) {
    // Exception: Output not found
    // Correct: "last_hidden_state" (lowercase)
}

// Whitespace in output name
model.setModelOutputName(" last_hidden_state "); // Leading/trailing space
// May work or may fail depending on implementation
// Better to trim: model.setModelOutputName(name.trim())

Observation Convention Configuration

Configure the Micrometer observation convention for monitoring and metrics collection.

/**
 * Set the observation convention for monitoring.
 * Must be called before or after afterPropertiesSet() (takes effect immediately).
 *
 * @param observationConvention The observation convention (must not be null)
 * @throws IllegalArgumentException if observationConvention is null
 */
void setObservationConvention(EmbeddingModelObservationConvention observationConvention);

Usage:

import org.springframework.ai.embedding.observation.EmbeddingModelObservationConvention;
import org.springframework.ai.embedding.observation.DefaultEmbeddingModelObservationConvention;

TransformersEmbeddingModel model = new TransformersEmbeddingModel();

// Custom observation convention
EmbeddingModelObservationConvention convention = new CustomObservationConvention();
model.setObservationConvention(convention);

// Or use default
model.setObservationConvention(new DefaultEmbeddingModelObservationConvention());

model.afterPropertiesSet();

Default: DefaultEmbeddingModelObservationConvention

Custom Convention Example:

import org.springframework.ai.embedding.observation.EmbeddingModelObservationConvention;
import org.springframework.ai.embedding.observation.EmbeddingModelObservationContext;
import io.micrometer.common.KeyValues;

public class CustomObservationConvention 
    implements EmbeddingModelObservationConvention {
    
    @Override
    public String getName() {
        return "custom.embedding.operation";
    }
    
    @Override
    public String getContextualName(EmbeddingModelObservationContext context) {
        return "custom_embedding";
    }
    
    @Override
    public KeyValues getLowCardinalityKeyValues(EmbeddingModelObservationContext context) {
        return KeyValues.of(
            "model.provider", "onnx",
            "model.type", "transformers",
            "environment", System.getenv("ENV")
        );
    }
    
    @Override
    public KeyValues getHighCardinalityKeyValues(EmbeddingModelObservationContext context) {
        return KeyValues.of(
            "text.count", String.valueOf(context.getRequest().getInstructions().size()),
            "model.dimensions", String.valueOf(context.getModel().dimensions()),
            "timestamp", String.valueOf(System.currentTimeMillis())
        );
    }
}

// Use custom convention
TransformersEmbeddingModel model = new TransformersEmbeddingModel(
    MetadataMode.NONE,
    ObservationRegistry.create()
);
model.setObservationConvention(new CustomObservationConvention());
model.afterPropertiesSet();

Edge Cases:

// Null convention - throws IllegalArgumentException
try {
    model.setObservationConvention(null);
} catch (IllegalArgumentException e) {
    // Exception thrown
}

// Setting convention without observation registry
TransformersEmbeddingModel noObservationModel = new TransformersEmbeddingModel();
// No observation registry in constructor
noObservationModel.setObservationConvention(new CustomObservationConvention());
// No error, but observations are not recorded (no registry)

// Changing convention after initialization
model.afterPropertiesSet();
model.setObservationConvention(new DifferentConvention());
// Takes effect immediately for subsequent embedding calls

Constructor Options

The TransformersEmbeddingModel class provides multiple constructors for different configuration scenarios.

/**
 * Default constructor.
 * Uses MetadataMode.NONE and no observation registry.
 */
public TransformersEmbeddingModel();

/**
 * Constructor with metadata mode.
 *
 * @param metadataMode How to handle document metadata in embeddings
 */
public TransformersEmbeddingModel(MetadataMode metadataMode);

/**
 * Full constructor with metadata mode and observation registry.
 *
 * @param metadataMode How to handle document metadata in embeddings
 * @param observationRegistry Registry for monitoring and metrics
 */
public TransformersEmbeddingModel(
    MetadataMode metadataMode,
    ObservationRegistry observationRegistry
);

Usage:

import org.springframework.ai.document.MetadataMode;
import io.micrometer.observation.ObservationRegistry;

// Default constructor
TransformersEmbeddingModel model1 = new TransformersEmbeddingModel();
// MetadataMode.NONE, no observations

// With metadata mode
TransformersEmbeddingModel model2 =
    new TransformersEmbeddingModel(MetadataMode.EMBED);
// Metadata included in embeddings, no observations

// With monitoring
ObservationRegistry registry = ObservationRegistry.create();
TransformersEmbeddingModel model3 =
    new TransformersEmbeddingModel(MetadataMode.NONE, registry);
// No metadata, observations enabled

// Full configuration
TransformersEmbeddingModel model4 =
    new TransformersEmbeddingModel(MetadataMode.ALL, registry);
// All metadata, observations enabled

Constructor Choice Guide:

// Use default constructor when:
// - Using default settings
// - Configuring everything via setters
// - No monitoring needed
TransformersEmbeddingModel simple = new TransformersEmbeddingModel();

// Use metadata mode constructor when:
// - Need to embed document metadata
// - No monitoring needed
TransformersEmbeddingModel withMeta = 
    new TransformersEmbeddingModel(MetadataMode.EMBED);

// Use full constructor when:
// - Need monitoring and metrics
// - May or may not need metadata embedding
ObservationRegistry registry = ObservationRegistry.create();
TransformersEmbeddingModel monitored = 
    new TransformersEmbeddingModel(MetadataMode.NONE, registry);

Initialization

After configuration, call afterPropertiesSet() to initialize the model. This method:

  • Creates the resource cache service
  • Loads and initializes the tokenizer
  • Creates the ONNX Runtime environment
  • Loads the ONNX model and creates an inference session
  • Validates model inputs and outputs

afterPropertiesSet Method

Initialize the embedding model after all configuration is complete. This is a lifecycle method from the Spring InitializingBean interface.

/**
 * Initialize the model after all configuration is complete.
 * Must be called before using embedding methods.
 * Spring Framework automatically calls this when the bean is initialized.
 *
 * @throws Exception if initialization fails (model loading, ONNX session creation, etc.)
 */
void afterPropertiesSet() throws Exception;

Usage:

TransformersEmbeddingModel model = new TransformersEmbeddingModel();

// Configure model
model.setModelResource("classpath:/models/custom-model.onnx");
model.setTokenizerResource("classpath:/tokenizers/custom-tokenizer.json");
model.setGpuDeviceId(0);
model.setResourceCacheDirectory("/var/cache/models");

// Initialize (required before use)
try {
    model.afterPropertiesSet();
} catch (Exception e) {
    // Handle initialization errors
    System.err.println("Failed to initialize model: " + e.getMessage());
    throw e;
}

// Now ready for embedding
float[] embedding = model.embed("Hello world");

Initialization Steps (Internal):

// 1. Create resource cache service
//    - Uses cache directory setting
//    - Creates directory if needed

// 2. Load tokenizer
//    - Downloads if remote and not cached
//    - Parses JSON format
//    - Applies tokenizer options

// 3. Create ONNX Runtime environment
//    - Initializes ONNX Runtime library
//    - Configures logging

// 4. Load ONNX model
//    - Downloads if remote and not cached
//    - Creates ONNX session
//    - Configures GPU if enabled

// 5. Validate model
//    - Checks input requirements (input_ids, attention_mask)
//    - Validates output name exists
//    - Logs model metadata

// Any failure in these steps throws Exception

Spring Integration: When using Spring's dependency injection, the framework automatically calls afterPropertiesSet() if the bean implements InitializingBean.

import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

@Configuration
public class EmbeddingConfig {
    
    @Bean
    public TransformersEmbeddingModel embeddingModel() throws Exception {
        TransformersEmbeddingModel model = new TransformersEmbeddingModel();
        model.setGpuDeviceId(0);
        // Must call manually in @Bean method
        model.afterPropertiesSet();
        return model;
    }
}

// In other beans
@Service
public class MyService {
    @Autowired
    private TransformersEmbeddingModel model;
    // model is already initialized by Spring
}

Error Handling:

TransformersEmbeddingModel model = new TransformersEmbeddingModel();
model.setModelResource("https://example.com/model.onnx");

try {
    model.afterPropertiesSet();
} catch (IOException e) {
    // Network error, file not found, etc.
    System.err.println("I/O error: " + e.getMessage());
} catch (IllegalArgumentException e) {
    // Invalid configuration (e.g., wrong output name)
    System.err.println("Configuration error: " + e.getMessage());
} catch (IllegalStateException e) {
    // Resource caching failure
    System.err.println("Cache error: " + e.getMessage());
} catch (Exception e) {
    // ONNX Runtime errors, GPU errors, etc.
    System.err.println("Initialization error: " + e.getMessage());
    
    // Common error patterns:
    if (e.getMessage().contains("CUDA") || e.getMessage().contains("GPU")) {
        System.err.println("GPU initialization failed, falling back to CPU");
        model = new TransformersEmbeddingModel();
        model.setGpuDeviceId(-1);
        model.afterPropertiesSet();
    } else if (e.getMessage().contains("Connection") || e.getMessage().contains("timeout")) {
        System.err.println("Network error, check connectivity or use cached model");
    } else if (e.getMessage().contains("Permission")) {
        System.err.println("Permission denied, check cache directory permissions");
    }
}

Configuration Example: Custom Model

Complete example configuring a custom model with GPU acceleration:

import org.springframework.ai.transformers.TransformersEmbeddingModel;
import org.springframework.ai.document.MetadataMode;
import io.micrometer.observation.ObservationRegistry;

public class EmbeddingConfig {

    public TransformersEmbeddingModel createCustomModel() throws Exception {
        // Create model with metadata support and monitoring
        ObservationRegistry registry = ObservationRegistry.create();
        TransformersEmbeddingModel model = new TransformersEmbeddingModel(
            MetadataMode.EMBED,
            registry
        );

        // Configure custom model and tokenizer
        model.setModelResource("https://example.com/models/custom-bert.onnx");
        model.setTokenizerResource("https://example.com/tokenizers/custom-bert-tokenizer.json");

        // Configure tokenizer options
        model.setTokenizerOptions(Map.of(
            "addSpecialTokens", "true",
            "modelMaxLength", "512",
            "truncation", "true"
        ));

        // Enable GPU acceleration with error handling
        model.setGpuDeviceId(0);

        // Configure caching
        model.setResourceCacheDirectory("/var/cache/ml-models");
        model.setDisableCaching(false);

        // Configure model output
        model.setModelOutputName("last_hidden_state");

        // Initialize with error handling
        try {
            model.afterPropertiesSet();
        } catch (Exception e) {
            // GPU fallback
            if (e.getMessage().contains("CUDA") || e.getMessage().contains("GPU")) {
                System.err.println("GPU not available, falling back to CPU");
                model = new TransformersEmbeddingModel(MetadataMode.EMBED, registry);
                model.setModelResource("https://example.com/models/custom-bert.onnx");
                model.setTokenizerResource("https://example.com/tokenizers/custom-bert-tokenizer.json");
                model.setTokenizerOptions(Map.of("addSpecialTokens", "true", "modelMaxLength", "512"));
                model.setGpuDeviceId(-1); // CPU
                model.setResourceCacheDirectory("/var/cache/ml-models");
                model.setModelOutputName("last_hidden_state");
                model.afterPropertiesSet();
            } else {
                throw e;
            }
        }

        return model;
    }
}

Types

Resource

package org.springframework.core.io;

/**
 * Spring Framework Resource interface for accessing resources.
 */
public interface Resource {
    /**
     * Get an InputStream for reading the resource.
     */
    InputStream getInputStream() throws IOException;
    
    /**
     * Get the URI of this resource.
     */
    URI getURI() throws IOException;
    
    /**
     * Get the filename of this resource.
     */
    String getFilename();
    
    /**
     * Get the content as a byte array.
     */
    byte[] getContentAsByteArray() throws IOException;
    
    /**
     * Check if this resource exists.
     */
    boolean exists();
    
    /**
     * Check if this resource is readable.
     */
    boolean isReadable();
}

ObservationRegistry

package io.micrometer.observation;

/**
 * Micrometer ObservationRegistry for monitoring and metrics.
 */
public interface ObservationRegistry {
    /**
     * Create a new observation registry.
     */
    static ObservationRegistry create();

    /**
     * No-op registry that does nothing (default).
     */
    static ObservationRegistry NOOP;

    /**
     * Create and start an observation.
     */
    Observation observationBuilder(String name);

    /**
     * Check if observations are enabled.
     */
    boolean isEnabled();
}

EmbeddingModelObservationConvention

package org.springframework.ai.embedding.observation;

/**
 * Convention for observation naming and tagging.
 */
public interface EmbeddingModelObservationConvention {
    
    /**
     * Get the observation name.
     */
    String getName();
    
    /**
     * Get the contextual name for this observation.
     */
    String getContextualName(EmbeddingModelObservationContext context);
    
    /**
     * Get low-cardinality key-values (for metrics grouping).
     */
    KeyValues getLowCardinalityKeyValues(EmbeddingModelObservationContext context);
    
    /**
     * Get high-cardinality key-values (for detailed tracing).
     */
    KeyValues getHighCardinalityKeyValues(EmbeddingModelObservationContext context);
}

Validation

The model performs validation during initialization:

Output Name Validation

// Validates that modelOutputName exists in the ONNX model outputs
model.setModelOutputName("nonexistent_output");
try {
    model.afterPropertiesSet();
} catch (IllegalArgumentException e) {
    // Error message includes available outputs
    System.err.println(e.getMessage());
    // "Output 'nonexistent_output' not found. Available outputs: [last_hidden_state, pooler_output]"
}

Not Null Validation

// Configuration setters use Assert.notNull() to validate parameters
try {
    model.setModelResource((String) null);
} catch (IllegalArgumentException e) {
    // "Model resource must not be null"
}

try {
    model.setTokenizerResource((String) null);
} catch (IllegalArgumentException e) {
    // "Tokenizer resource must not be null"
}

Cache Directory Validation

// Creates cache directory if it doesn't exist
model.setResourceCacheDirectory("/new/cache/dir");
model.afterPropertiesSet();
// /new/cache/dir is created

// Validates write permissions
model.setResourceCacheDirectory("/read-only/dir");
try {
    model.afterPropertiesSet();
} catch (Exception e) {
    // "Permission denied" or similar
}

Model Input Validation

// Validates that model has required inputs: input_ids, attention_mask
// Logs available model inputs during initialization

model.setModelResource("classpath:/models/invalid-model.onnx");
try {
    model.afterPropertiesSet();
} catch (Exception e) {
    // "Model missing required input 'input_ids'" or similar
}

Common Configuration Patterns

Production Configuration

TransformersEmbeddingModel productionModel() throws Exception {
    TransformersEmbeddingModel model = new TransformersEmbeddingModel(
        MetadataMode.NONE,
        ObservationRegistry.create()
    );
    
    // Use GPU if available
    model.setGpuDeviceId(0);
    
    // Persistent cache location
    model.setResourceCacheDirectory("/var/cache/spring-ai-models");
    model.setDisableCaching(false);
    
    // Custom model
    model.setModelResource("https://example.com/models/production-model.onnx");
    model.setTokenizerResource("https://example.com/tokenizers/production-tokenizer.json");
    
    // Initialize with fallback
    try {
        model.afterPropertiesSet();
    } catch (Exception e) {
        if (e.getMessage().contains("GPU")) {
            model.setGpuDeviceId(-1);
            model.afterPropertiesSet();
        } else {
            throw e;
        }
    }
    
    return model;
}

Development Configuration

TransformersEmbeddingModel developmentModel() throws Exception {
    TransformersEmbeddingModel model = new TransformersEmbeddingModel();
    
    // Use CPU (more compatible)
    model.setGpuDeviceId(-1);
    
    // Local models for faster iteration
    model.setModelResource("file:///local/dev/models/model.onnx");
    model.setTokenizerResource("file:///local/dev/tokenizers/tokenizer.json");
    
    // Disable caching for development
    model.setDisableCaching(true);
    
    model.afterPropertiesSet();
    return model;
}

Test Configuration

TransformersEmbeddingModel testModel() throws Exception {
    TransformersEmbeddingModel model = new TransformersEmbeddingModel();
    
    // Use classpath resources
    model.setModelResource("classpath:/test-models/model.onnx");
    model.setTokenizerResource("classpath:/test-tokenizers/tokenizer.json");
    
    // CPU for consistent test results
    model.setGpuDeviceId(-1);
    
    // Use temporary cache
    model.setResourceCacheDirectory(System.getProperty("java.io.tmpdir") + "/test-cache");
    
    model.afterPropertiesSet();
    return model;
}
tessl i tessl/maven-org-springframework-ai--spring-ai-transformers@1.1.1

docs

index.md

tile.json