ONNX-based Transformer models for text embeddings within the Spring AI framework
Complete guide to configuring Spring AI Transformers.
Configure via application.properties or application.yaml
Configure via Java code before initialization
Set metadata mode and observation registry
# Model URI (classpath, file, or URL)
spring.ai.embedding.transformer.onnx.model-uri=classpath:/models/model.onnx
# Model output tensor name
spring.ai.embedding.transformer.onnx.model-output-name=last_hidden_state
# GPU device ID (-1 for CPU, 0+ for GPU)
spring.ai.embedding.transformer.onnx.gpu-device-id=0# Tokenizer URI
spring.ai.embedding.transformer.tokenizer.uri=classpath:/tokenizers/tokenizer.json
# Tokenizer options
spring.ai.embedding.transformer.tokenizer.options.modelMaxLength=512
spring.ai.embedding.transformer.tokenizer.options.truncation=true
spring.ai.embedding.transformer.tokenizer.options.padding=max_length
spring.ai.embedding.transformer.tokenizer.options.addSpecialTokens=true# Enable/disable caching
spring.ai.embedding.transformer.cache.enabled=true
# Cache directory
spring.ai.embedding.transformer.cache.directory=/var/cache/spring-ai# Metadata mode: NONE, EMBED, or ALL
spring.ai.embedding.transformer.metadata-mode=NONETransformersEmbeddingModel model = new TransformersEmbeddingModel();
// Configure model
model.setModelResource("classpath:/models/custom-model.onnx");
model.setTokenizerResource("classpath:/tokenizers/custom-tokenizer.json");
// Configure GPU
model.setGpuDeviceId(0);
// Configure cache
model.setResourceCacheDirectory("/var/cache/models");
model.setDisableCaching(false);
// Initialize
model.afterPropertiesSet();import org.springframework.ai.transformers.TransformersEmbeddingModel;
import org.springframework.ai.document.MetadataMode;
import io.micrometer.observation.ObservationRegistry;
import java.util.Map;
public TransformersEmbeddingModel createAdvancedModel() throws Exception {
// Create with metadata and monitoring
ObservationRegistry registry = ObservationRegistry.create();
TransformersEmbeddingModel model = new TransformersEmbeddingModel(
MetadataMode.EMBED,
registry
);
// Model configuration
model.setModelResource("https://example.com/models/bert.onnx");
model.setModelOutputName("last_hidden_state");
// Tokenizer configuration
model.setTokenizerResource("https://example.com/tokenizers/bert-tokenizer.json");
model.setTokenizerOptions(Map.of(
"addSpecialTokens", "true",
"modelMaxLength", "512",
"truncation", "true",
"padding", "max_length"
));
// Hardware configuration
model.setGpuDeviceId(0);
// Cache configuration
model.setResourceCacheDirectory("/var/cache/ml-models");
model.setDisableCaching(false);
// Initialize
model.afterPropertiesSet();
return model;
}# application-dev.properties
spring.ai.embedding.transformer.onnx.gpu-device-id=-1
spring.ai.embedding.transformer.cache.enabled=false
spring.ai.embedding.transformer.onnx.model-uri=file:///local/dev/models/model.onnx# application-prod.properties
spring.ai.embedding.transformer.onnx.gpu-device-id=0
spring.ai.embedding.transformer.cache.enabled=true
spring.ai.embedding.transformer.cache.directory=/var/cache/spring-ai-models
spring.ai.embedding.transformer.onnx.model-uri=https://models.example.com/prod-model.onnx# application-test.properties
spring.ai.embedding.transformer.onnx.gpu-device-id=-1
spring.ai.embedding.transformer.cache.enabled=true
spring.ai.embedding.transformer.cache.directory=${java.io.tmpdir}/test-cache
spring.ai.embedding.transformer.onnx.model-uri=classpath:/test-models/model.onnxspring.ai.embedding.transformer.onnx.gpu-device-id=0Requirements:
TransformersEmbeddingModel model = new TransformersEmbeddingModel();
model.setGpuDeviceId(0);
try {
model.afterPropertiesSet();
System.out.println("Using GPU");
} catch (Exception e) {
if (e.getMessage().contains("CUDA") || e.getMessage().contains("GPU")) {
System.out.println("GPU failed, falling back to CPU");
model = new TransformersEmbeddingModel();
model.setGpuDeviceId(-1);
model.afterPropertiesSet();
} else {
throw e;
}
}# Use second GPU
spring.ai.embedding.transformer.onnx.gpu-device-id=1# From classpath
spring.ai.embedding.transformer.onnx.model-uri=classpath:/models/my-model.onnx
spring.ai.embedding.transformer.tokenizer.uri=classpath:/tokenizers/my-tokenizer.json
# From filesystem
spring.ai.embedding.transformer.onnx.model-uri=file:///path/to/model.onnx
spring.ai.embedding.transformer.tokenizer.uri=file:///path/to/tokenizer.json
# From URL
spring.ai.embedding.transformer.onnx.model-uri=https://example.com/model.onnx
spring.ai.embedding.transformer.tokenizer.uri=https://example.com/tokenizer.jsonDifferent models use different output tensor names:
# BERT models
spring.ai.embedding.transformer.onnx.model-output-name=last_hidden_state
# or
spring.ai.embedding.transformer.onnx.model-output-name=pooler_output
# Sentence-BERT models
spring.ai.embedding.transformer.onnx.model-output-name=sentence_embedding# Persistent cache
spring.ai.embedding.transformer.cache.directory=/var/cache/spring-ai
# User home directory
spring.ai.embedding.transformer.cache.directory=${user.home}/.spring-ai/cache
# Temporary directory
spring.ai.embedding.transformer.cache.directory=${java.io.tmpdir}/spring-ai-cachespring.ai.embedding.transformer.cache.enabled=falseUse cases:
import org.springframework.ai.transformers.ResourceCacheService;
ResourceCacheService cache = new ResourceCacheService("/var/cache/spring-ai");
cache.deleteCacheFolder();Embed text content only:
spring.ai.embedding.transformer.metadata-mode=NONEDocument doc = new Document("Text content", Map.of("key", "value"));
float[] embedding = model.embed(doc);
// Embeds: "Text content"Include metadata in embedding:
spring.ai.embedding.transformer.metadata-mode=EMBEDDocument doc = new Document("Text content", Map.of("title", "Doc", "author", "John"));
float[] embedding = model.embed(doc);
// Embeds: "title: Doc\nauthor: John\nText content"Include all metadata fields:
spring.ai.embedding.transformer.metadata-mode=ALLProperties can be set via environment variables:
# Property: spring.ai.embedding.transformer.onnx.gpu-device-id
# Env var: SPRING_AI_EMBEDDING_TRANSFORMER_ONNX_GPUDEVICEID
export SPRING_AI_EMBEDDING_TRANSFORMER_ONNX_GPUDEVICEID=0
# Property: spring.ai.embedding.transformer.cache.directory
# Env var: SPRING_AI_EMBEDDING_TRANSFORMER_CACHE_DIRECTORY
export SPRING_AI_EMBEDDING_TRANSFORMER_CACHE_DIRECTORY=/var/cachedebug=true
logging.level.org.springframework.boot.autoconfigure=DEBUG
logging.level.org.springframework.ai=DEBUG@Component
public class ConfigValidator implements CommandLineRunner {
@Autowired
private TransformersEmbeddingModel model;
@Override
public void run(String... args) {
System.out.println("Dimensions: " + model.dimensions());
System.out.println("Model initialized successfully");
}
}Check property names:
# Wrong
spring.ai.embedding.onnx.gpu-device-id=0
# Correct
spring.ai.embedding.transformer.onnx.gpu-device-id=0Check data types:
# Wrong (string instead of int)
spring.ai.embedding.transformer.onnx.gpu-device-id=gpu0
# Correct
spring.ai.embedding.transformer.onnx.gpu-device-id=0Activate profile:
java -jar app.jar --spring.profiles.active=prodtessl i tessl/maven-org-springframework-ai--spring-ai-transformers@1.1.1