Spring AI Spring Boot Auto Configuration modules providing automatic setup for AI models, vector stores, MCP, and retry capabilities
Practical examples of using Spring AI Spring Boot Auto Configuration in production applications.
Complete implementation of a customer support chatbot with memory and RAG.
@Service
public class CustomerSupportChatbot {
private final ChatClient chatClient;
private final VectorStore knowledgeBase;
private final ChatMemory conversationMemory;
public CustomerSupportChatbot(
ChatClient.Builder chatClientBuilder,
VectorStore knowledgeBase,
ChatMemory conversationMemory) {
this.chatClient = chatClientBuilder
.defaultSystem("You are a helpful customer support agent. " +
"Use the provided knowledge base to answer questions accurately.")
.build();
this.knowledgeBase = knowledgeBase;
this.conversationMemory = conversationMemory;
}
public String handleCustomerQuery(String sessionId, String query) {
// 1. Retrieve relevant knowledge
List<Document> relevantDocs = knowledgeBase.similaritySearch(
SearchRequest.query(query)
.withTopK(3)
.withSimilarityThreshold(0.7)
);
String context = relevantDocs.stream()
.map(Document::getContent)
.collect(Collectors.joining("\n\n"));
// 2. Get conversation history
List<Message> history = conversationMemory.get(sessionId);
// 3. Generate response with context
String prompt = String.format(
"Knowledge Base:\n%s\n\nCustomer Question: %s",
context, query
);
String response = chatClient.prompt()
.user(prompt)
.call()
.content();
// 4. Store conversation
conversationMemory.add(sessionId, new UserMessage(query));
conversationMemory.add(sessionId, new AssistantMessage(response));
return response;
}
}Configuration:
# Chat provider
spring.ai.openai.api-key=${OPENAI_API_KEY}
spring.ai.openai.chat.options.model=gpt-4
spring.ai.openai.chat.options.temperature=0.3
# Vector store for knowledge base
spring.ai.vectorstore.pgvector.initialize-schema=true
spring.datasource.url=jdbc:postgresql://localhost:5432/support_kb
# Chat memory
spring.ai.chat.memory.repository.jdbc.initialize-schema=always
spring.ai.chat.memory.repository.jdbc.ttl=24h
# Retry for reliability
spring.ai.retry.max-attempts=5
spring.ai.retry.on-http-codes=429,503
# Monitoring
spring.ai.chat.observations.log-prompt=true
management.metrics.export.prometheus.enabled=trueAnalyze documents with embeddings and categorization.
@Service
public class DocumentAnalysisPipeline {
private final EmbeddingModel embeddingModel;
private final ChatModel chatModel;
private final VectorStore documentStore;
private final TextSplitter textSplitter;
public DocumentAnalysisPipeline(
EmbeddingModel embeddingModel,
ChatModel chatModel,
VectorStore documentStore) {
this.embeddingModel = embeddingModel;
this.chatModel = chatModel;
this.documentStore = documentStore;
this.textSplitter = new TokenTextSplitter(500, 100);
}
public DocumentAnalysisResult analyzeDocument(String content, String filename) {
// 1. Split document into chunks
List<Document> chunks = textSplitter.apply(
List.of(new Document(content))
);
// 2. Categorize document
String category = categorizeDocument(content);
// 3. Extract key points
List<String> keyPoints = extractKeyPoints(content);
// 4. Add metadata and store
chunks.forEach(chunk -> {
chunk.getMetadata().put("filename", filename);
chunk.getMetadata().put("category", category);
chunk.getMetadata().put("timestamp", Instant.now());
});
documentStore.add(chunks);
return new DocumentAnalysisResult(
filename,
category,
keyPoints,
chunks.size()
);
}
private String categorizeDocument(String content) {
String prompt = "Categorize this document into one of: " +
"technical, business, legal, marketing. " +
"Respond with only the category name.\n\n" +
content.substring(0, Math.min(1000, content.length()));
return chatModel.call(prompt);
}
private List<String> extractKeyPoints(String content) {
String prompt = "Extract 5 key points from this document " +
"as a bullet list:\n\n" + content;
String response = chatModel.call(prompt);
return Arrays.asList(response.split("\n"));
}
record DocumentAnalysisResult(
String filename,
String category,
List<String> keyPoints,
int chunkCount
) {}
}Implement automatic fallback between multiple AI providers.
@Configuration
public class MultiProviderConfig {
@Bean
@Primary
public ChatModel resilientChatModel(
@Qualifier("openAiChatModel") ChatModel primary,
@Qualifier("anthropicChatModel") ChatModel secondary,
@Qualifier("azureOpenAiChatModel") ChatModel tertiary,
RetryTemplate retryTemplate) {
return new FallbackChatModel(
List.of(primary, secondary, tertiary),
retryTemplate
);
}
}
@Service
public class FallbackChatModel implements ChatModel {
private final List<ChatModel> providers;
private final RetryTemplate retryTemplate;
private final AtomicInteger currentProviderIndex = new AtomicInteger(0);
public FallbackChatModel(
List<ChatModel> providers,
RetryTemplate retryTemplate) {
this.providers = providers;
this.retryTemplate = retryTemplate;
}
@Override
public String call(String message) {
for (int i = 0; i < providers.size(); i++) {
ChatModel provider = providers.get(
(currentProviderIndex.get() + i) % providers.size()
);
try {
return retryTemplate.execute(context ->
provider.call(message)
);
} catch (Exception e) {
log.warn("Provider {} failed: {}",
provider.getClass().getSimpleName(),
e.getMessage());
if (i == providers.size() - 1) {
throw e; // All providers failed
}
}
}
throw new IllegalStateException("No providers available");
}
@Override
public ChatResponse call(Prompt prompt) {
// Similar implementation
return null;
}
@Override
public Flux<ChatResponse> stream(Prompt prompt) {
// Similar implementation
return null;
}
}Automated code review using AI with caching.
@Service
public class CodeReviewAssistant {
private final ChatClient chatClient;
private final CacheManager cacheManager;
public CodeReviewAssistant(
ChatClient.Builder chatClientBuilder,
CacheManager cacheManager) {
this.chatClient = chatClientBuilder
.defaultSystem("""
You are an expert code reviewer. Analyze code for:
1. Code quality and best practices
2. Potential bugs and security issues
3. Performance concerns
4. Readability and maintainability
Provide specific, actionable feedback.
""")
.build();
this.cacheManager = cacheManager;
}
@Cacheable(value = "code-reviews", key = "#codeHash")
public CodeReviewResult reviewCode(
String code,
String language,
String codeHash) {
String prompt = String.format(
"Review this %s code:\n\n```%s\n%s\n```",
language, language, code
);
String review = chatClient.prompt()
.user(prompt)
.call()
.content();
return parseReviewResult(review);
}
private CodeReviewResult parseReviewResult(String review) {
// Parse AI response into structured result
return new CodeReviewResult(
extractIssues(review),
extractSuggestions(review),
calculateScore(review)
);
}
record CodeReviewResult(
List<Issue> issues,
List<Suggestion> suggestions,
int qualityScore
) {}
}Build a semantic search API with vector stores.
@RestController
@RequestMapping("/api/search")
public class SemanticSearchController {
private final VectorStore vectorStore;
private final MeterRegistry meterRegistry;
public SemanticSearchController(
VectorStore vectorStore,
MeterRegistry meterRegistry) {
this.vectorStore = vectorStore;
this.meterRegistry = meterRegistry;
}
@PostMapping
public SearchResponse search(@RequestBody SearchQuery query) {
Timer.Sample timer = Timer.start(meterRegistry);
try {
List<Document> results = vectorStore.similaritySearch(
SearchRequest.query(query.text())
.withTopK(query.limit())
.withSimilarityThreshold(query.threshold())
.withFilterExpression(
buildFilter(query.filters())
)
);
timer.stop(meterRegistry.timer("search.duration"));
meterRegistry.counter("search.requests").increment();
return new SearchResponse(
results.stream()
.map(this::toSearchResult)
.collect(Collectors.toList()),
results.size()
);
} catch (Exception e) {
meterRegistry.counter("search.errors").increment();
throw e;
}
}
@PostMapping("/index")
public IndexResponse indexDocuments(
@RequestBody List<IndexDocument> documents) {
List<Document> docs = documents.stream()
.map(d -> {
Document doc = new Document(d.content());
doc.getMetadata().putAll(d.metadata());
return doc;
})
.collect(Collectors.toList());
vectorStore.add(docs);
return new IndexResponse(docs.size(), "indexed");
}
record SearchQuery(
String text,
int limit,
double threshold,
Map<String, Object> filters
) {}
record SearchResponse(
List<SearchResult> results,
int total
) {}
}Integrate external tools via Model Context Protocol.
@Service
public class EnhancedChatService {
private final ChatClient chatClient;
private final List<McpSyncClient> mcpClients;
public EnhancedChatService(
ChatClient.Builder chatClientBuilder,
SyncMcpToolCallbackProvider mcpToolProvider,
List<McpSyncClient> mcpClients) {
this.chatClient = chatClientBuilder
.defaultFunctions(mcpToolProvider.getToolCallbacks())
.build();
this.mcpClients = mcpClients;
}
public String chatWithTools(String message) {
// AI can automatically call MCP tools as needed
return chatClient.prompt()
.user(message)
.call()
.content();
}
public List<String> listAvailableTools() {
List<String> tools = new ArrayList<>();
for (McpSyncClient client : mcpClients) {
McpSchema.ListToolsResult result = client.listTools();
result.tools().forEach(tool ->
tools.add(tool.name() + ": " + tool.description())
);
}
return tools;
}
}MCP Configuration:
# Stdio transport to local tools
spring.ai.mcp.client.stdio.connections.filesystem.command=node
spring.ai.mcp.client.stdio.connections.filesystem.args[0]=./mcp-servers/filesystem.js
spring.ai.mcp.client.stdio.connections.database.command=python
spring.ai.mcp.client.stdio.connections.database.args[0]=./mcp-servers/database.py
# SSE transport to remote tools
spring.ai.mcp.client.sse.connections.remote-api.url=https://mcp.example.com
spring.ai.mcp.client.sse.connections.remote-api.sse-endpoint=/mcp/sseProcess large batches of documents asynchronously.
@Service
public class BatchDocumentProcessor {
private final ChatModel chatModel;
private final EmbeddingModel embeddingModel;
private final VectorStore vectorStore;
private final ExecutorService executor;
public BatchDocumentProcessor(
ChatModel chatModel,
EmbeddingModel embeddingModel,
VectorStore vectorStore) {
this.chatModel = chatModel;
this.embeddingModel = embeddingModel;
this.vectorStore = vectorStore;
this.executor = Executors.newFixedThreadPool(10);
}
public CompletableFuture<BatchResult> processBatch(
List<String> documents) {
List<CompletableFuture<DocumentResult>> futures = documents.stream()
.map(doc -> CompletableFuture.supplyAsync(
() -> processDocument(doc),
executor
))
.collect(Collectors.toList());
return CompletableFuture.allOf(
futures.toArray(new CompletableFuture[0])
).thenApply(v -> {
List<DocumentResult> results = futures.stream()
.map(CompletableFuture::join)
.collect(Collectors.toList());
return new BatchResult(
results,
results.stream().filter(r -> r.success()).count(),
results.stream().filter(r -> !r.success()).count()
);
});
}
private DocumentResult processDocument(String content) {
try {
// Process with retry
String summary = chatModel.call(
"Summarize in 2-3 sentences: " + content
);
Document doc = new Document(content);
doc.getMetadata().put("summary", summary);
doc.getMetadata().put("processed", Instant.now());
vectorStore.add(List.of(doc));
return new DocumentResult(true, summary, null);
} catch (Exception e) {
return new DocumentResult(false, null, e.getMessage());
}
}
record DocumentResult(
boolean success,
String summary,
String error
) {}
record BatchResult(
List<DocumentResult> results,
long successCount,
long errorCount
) {}
}spring:
ai:
# Provider with fallback
openai:
api-key: ${OPENAI_API_KEY}
chat:
options:
model: gpt-4
temperature: 0.5
timeout: 60s
# Retry configuration
retry:
max-attempts: 10
backoff:
initial-interval: 2s
multiplier: 3
max-interval: 120s
on-http-codes: 429,500,502,503,504
exclude-on-http-codes: 401,403,400
# Vector store
vectorstore:
pgvector:
initialize-schema: false # Managed externally
index-type: HNSW
dimensions: 1536
# Chat memory
chat:
memory:
repository:
jdbc:
initialize-schema: never # Use Flyway
# Observations
chat:
observations:
log-prompt: false # Don't log in production
log-completion: false
include-error-logging: true
vectorstore:
observations:
include-query-content: false
# Monitoring
management:
endpoints:
web:
exposure:
include: health,metrics,prometheus
metrics:
export:
prometheus:
enabled: true
tracing:
enabled: true
sampling:
probability: 0.1 # 10% sampling
# Database
spring:
datasource:
url: jdbc:postgresql://db.example.com:5432/production_db
hikari:
maximum-pool-size: 20
minimum-idle: 5