Spring AI Chat Client provides a fluent API for building AI-powered applications with LLMs, supporting advisors, streaming, structured outputs, and conversation memory
Spring AI Chat Client provides evaluators for assessing the quality and accuracy of LLM responses. Evaluators help detect hallucinations, verify relevance, and measure response quality.
import org.springframework.ai.chat.evaluation.RelevancyEvaluator;
import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.evaluation.Evaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import org.springframework.ai.evaluation.EvaluationResponse;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.prompt.PromptTemplate;All evaluators implement the common Evaluator interface.
interface Evaluator {
EvaluationResponse evaluate(EvaluationRequest request);
}Evaluation Request:
class EvaluationRequest {
String getQuery(); // Original user query
String getResponse(); // LLM response to evaluate
List<String> getContext(); // Supporting context documents
}Evaluation Response:
class EvaluationResponse {
boolean isPass(); // Whether evaluation passed
double getScore(); // Numeric score (0.0-1.0)
String getFeedback(); // Detailed feedback text
}Evaluates whether an LLM response is relevant to the user query given the provided context.
class RelevancyEvaluator implements Evaluator {
static Builder builder();
EvaluationResponse evaluate(EvaluationRequest request);
interface Builder {
Builder chatClientBuilder(ChatClient.Builder chatClientBuilder);
Builder promptTemplate(PromptTemplate promptTemplate);
RelevancyEvaluator build();
}
}What It Checks:
Example - Basic Relevancy Evaluation:
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.evaluation.RelevancyEvaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import org.springframework.ai.evaluation.EvaluationResponse;
import java.util.List;
// Create evaluator
ChatClient.Builder chatClientBuilder = ChatClient.builder(chatModel);
RelevancyEvaluator evaluator = RelevancyEvaluator.builder()
.chatClientBuilder(chatClientBuilder)
.build();
// Evaluate a response
EvaluationRequest request = new EvaluationRequest(
"What is the capital of France?", // query
"Paris is the capital of France.", // response
List.of("France is a country in Europe. Its capital is Paris.") // context
);
EvaluationResponse evaluation = evaluator.evaluate(request);
System.out.println("Pass: " + evaluation.isPass()); // true
System.out.println("Score: " + evaluation.getScore()); // 1.0
System.out.println("Feedback: " + evaluation.getFeedback());Example - Detecting Irrelevant Response:
import org.springframework.ai.chat.evaluation.RelevancyEvaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import org.springframework.ai.evaluation.EvaluationResponse;
import java.util.List;
RelevancyEvaluator evaluator = RelevancyEvaluator.builder()
.chatClientBuilder(ChatClient.builder(chatModel))
.build();
// Response that doesn't address the query
EvaluationRequest request = new EvaluationRequest(
"What is the capital of France?",
"France is known for its wine and cheese.",
List.of("France is a country in Europe. Its capital is Paris.")
);
EvaluationResponse evaluation = evaluator.evaluate(request);
System.out.println("Pass: " + evaluation.isPass()); // false
System.out.println("Score: " + evaluation.getScore()); // < 0.5
System.out.println("Feedback: " + evaluation.getFeedback());
// "Response does not answer the question about the capital"Example - Custom Prompt Template:
import org.springframework.ai.chat.evaluation.RelevancyEvaluator;
import org.springframework.ai.chat.prompt.PromptTemplate;
// Custom evaluation prompt
PromptTemplate customPrompt = new PromptTemplate("""
Evaluate if the response is relevant to the query.
Query: {query}
Context: {context}
Response: {response}
Rate relevance from 0-10 and explain your reasoning.
""");
RelevancyEvaluator evaluator = RelevancyEvaluator.builder()
.chatClientBuilder(ChatClient.builder(chatModel))
.promptTemplate(customPrompt)
.build();Evaluates the factual accuracy of LLM responses against provided context to detect hallucinations.
class FactCheckingEvaluator implements Evaluator {
static Builder builder(ChatClient.Builder chatClientBuilder);
static Builder forBespokeMinicheck(ChatClient.Builder chatClientBuilder);
EvaluationResponse evaluate(EvaluationRequest request);
interface Builder {
Builder chatClientBuilder(ChatClient.Builder chatClientBuilder);
Builder evaluationPrompt(PromptTemplate promptTemplate);
FactCheckingEvaluator build();
}
}What It Checks:
Example - Basic Fact Checking:
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import org.springframework.ai.evaluation.EvaluationResponse;
import java.util.List;
// Create evaluator
ChatClient.Builder chatClientBuilder = ChatClient.builder(chatModel);
FactCheckingEvaluator evaluator = FactCheckingEvaluator
.builder(chatClientBuilder)
.build();
// Evaluate factual response
EvaluationRequest request = new EvaluationRequest(
"When was Spring Framework released?",
"Spring Framework was released in 2003.",
List.of("The Spring Framework was first released in 2003 by Rod Johnson.")
);
EvaluationResponse evaluation = evaluator.evaluate(request);
System.out.println("Pass: " + evaluation.isPass()); // true
System.out.println("Score: " + evaluation.getScore()); // 1.0
System.out.println("Feedback: " + evaluation.getFeedback());Example - Detecting Hallucination:
import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import org.springframework.ai.evaluation.EvaluationResponse;
import java.util.List;
FactCheckingEvaluator evaluator = FactCheckingEvaluator
.builder(ChatClient.builder(chatModel))
.build();
// Response with fabricated information
EvaluationRequest request = new EvaluationRequest(
"When was Spring Framework released?",
"Spring Framework was released in 1998.",
List.of("The Spring Framework was first released in 2003 by Rod Johnson.")
);
EvaluationResponse evaluation = evaluator.evaluate(request);
System.out.println("Pass: " + evaluation.isPass()); // false
System.out.println("Score: " + evaluation.getScore()); // 0.0
System.out.println("Feedback: " + evaluation.getFeedback());
// "Response contains incorrect date - framework was released in 2003, not 1998"Example - Bespoke Minicheck Model:
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.openai.OpenAiChatModel;
// Use specialized fact-checking model
ChatModel minicheckModel = new OpenAiChatModel(
openAiApi,
OpenAiChatOptions.builder()
.withModel("bespoke-minicheck")
.build()
);
// Pre-configured for Bespoke Minicheck
FactCheckingEvaluator evaluator = FactCheckingEvaluator
.forBespokeMinicheck(ChatClient.builder(minicheckModel))
.build();
EvaluationResponse evaluation = evaluator.evaluate(request);Example - Custom Evaluation Prompt:
import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.chat.prompt.PromptTemplate;
PromptTemplate customPrompt = new PromptTemplate("""
Verify the factual accuracy of the response.
Context: {context}
Response: {response}
Check each claim:
1. Is it supported by the context?
2. Does it contradict the context?
3. Is it fabricated?
Provide a score (0-1) and detailed feedback.
""");
FactCheckingEvaluator evaluator = FactCheckingEvaluator
.builder(ChatClient.builder(chatModel))
.evaluationPrompt(customPrompt)
.build();Combine multiple evaluators for comprehensive quality assessment.
Example - Multi-Evaluator Pipeline:
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.evaluation.RelevancyEvaluator;
import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.evaluation.Evaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import org.springframework.ai.evaluation.EvaluationResponse;
import java.util.List;
class EvaluationPipeline {
private final List<Evaluator> evaluators;
public EvaluationPipeline(ChatClient.Builder chatClientBuilder) {
this.evaluators = List.of(
RelevancyEvaluator.builder()
.chatClientBuilder(chatClientBuilder)
.build(),
FactCheckingEvaluator.builder(chatClientBuilder)
.build()
);
}
public List<EvaluationResponse> evaluate(EvaluationRequest request) {
return evaluators.stream()
.map(evaluator -> evaluator.evaluate(request))
.toList();
}
public boolean allPass(EvaluationRequest request) {
return evaluate(request).stream()
.allMatch(EvaluationResponse::isPass);
}
public double averageScore(EvaluationRequest request) {
return evaluate(request).stream()
.mapToDouble(EvaluationResponse::getScore)
.average()
.orElse(0.0);
}
}
// Usage
EvaluationPipeline pipeline = new EvaluationPipeline(
ChatClient.builder(chatModel)
);
EvaluationRequest request = new EvaluationRequest(
"What is Spring Boot?",
"Spring Boot is a framework for building Java applications.",
List.of("Spring Boot simplifies Spring application development...")
);
boolean allPass = pipeline.allPass(request);
double avgScore = pipeline.averageScore(request);
System.out.println("All evaluations passed: " + allPass);
System.out.println("Average score: " + avgScore);Use evaluators to assess Retrieval-Augmented Generation (RAG) responses.
Example - RAG Evaluation:
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.evaluation.RelevancyEvaluator;
import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import java.util.List;
// Generate response with RAG
String query = "What are the benefits of Spring Boot?";
List<String> retrievedDocs = vectorStore.similaritySearch(query);
String response = ChatClient.builder(chatModel)
.build()
.prompt()
.user(userPrompt -> userPrompt
.text("""
Answer the question based on the context.
Question: {query}
Context: {context}
""")
.param("query", query)
.param("context", String.join("\n", retrievedDocs))
)
.call()
.content();
// Evaluate RAG response
RelevancyEvaluator relevancyEvaluator = RelevancyEvaluator.builder()
.chatClientBuilder(ChatClient.builder(chatModel))
.build();
FactCheckingEvaluator factCheckingEvaluator = FactCheckingEvaluator
.builder(ChatClient.builder(chatModel))
.build();
EvaluationRequest evalRequest = new EvaluationRequest(
query,
response,
retrievedDocs
);
// Check relevance
EvaluationResponse relevanceEval = relevancyEvaluator.evaluate(evalRequest);
System.out.println("Relevance: " + relevanceEval.isPass());
// Check factual accuracy
EvaluationResponse factEval = factCheckingEvaluator.evaluate(evalRequest);
System.out.println("Factual accuracy: " + factEval.isPass());Use evaluators in automated tests to ensure response quality.
Example - Unit Test:
import org.junit.jupiter.api.Test;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.evaluation.RelevancyEvaluator;
import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import org.springframework.ai.evaluation.EvaluationResponse;
import java.util.List;
import static org.junit.jupiter.api.Assertions.*;
class ChatResponseQualityTest {
private final ChatClient chatClient = ChatClient.builder(chatModel).build();
private final RelevancyEvaluator relevancyEvaluator =
RelevancyEvaluator.builder()
.chatClientBuilder(ChatClient.builder(chatModel))
.build();
private final FactCheckingEvaluator factCheckingEvaluator =
FactCheckingEvaluator.builder(ChatClient.builder(chatModel))
.build();
@Test
void testResponseQuality() {
// Generate response
String query = "What is Spring Framework?";
List<String> context = List.of(
"Spring Framework is a comprehensive framework for Java development."
);
String response = chatClient
.prompt()
.user(query)
.call()
.content();
// Evaluate
EvaluationRequest request = new EvaluationRequest(
query, response, context
);
EvaluationResponse relevance = relevancyEvaluator.evaluate(request);
EvaluationResponse factCheck = factCheckingEvaluator.evaluate(request);
// Assert quality
assertTrue(relevance.isPass(),
"Response should be relevant: " + relevance.getFeedback());
assertTrue(factCheck.isPass(),
"Response should be factually accurate: " + factCheck.getFeedback());
assertTrue(relevance.getScore() > 0.7,
"Relevance score should be high");
assertTrue(factCheck.getScore() > 0.7,
"Fact checking score should be high");
}
}import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.evaluation.RelevancyEvaluator;
import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.evaluation.Evaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import org.springframework.ai.evaluation.EvaluationResponse;
import org.springframework.ai.vectorstore.VectorStore;
import java.util.List;
class EvaluatedChatService {
private final ChatClient chatClient;
private final VectorStore vectorStore;
private final RelevancyEvaluator relevancyEvaluator;
private final FactCheckingEvaluator factCheckingEvaluator;
public EvaluatedChatService(
ChatClient chatClient,
VectorStore vectorStore,
ChatClient.Builder evaluatorChatClientBuilder
) {
this.chatClient = chatClient;
this.vectorStore = vectorStore;
this.relevancyEvaluator = RelevancyEvaluator.builder()
.chatClientBuilder(evaluatorChatClientBuilder)
.build();
this.factCheckingEvaluator = FactCheckingEvaluator
.builder(evaluatorChatClientBuilder)
.build();
}
public EvaluatedResponse chat(String query) {
// Retrieve context
List<String> context = vectorStore.similaritySearch(query)
.stream()
.map(doc -> doc.getContent())
.toList();
// Generate response
String response = chatClient
.prompt()
.user(userSpec -> userSpec
.text("""
Answer based on the context:
Question: {query}
Context: {context}
""")
.param("query", query)
.param("context", String.join("\n", context))
)
.call()
.content();
// Evaluate response
EvaluationRequest evalRequest = new EvaluationRequest(
query, response, context
);
EvaluationResponse relevance = relevancyEvaluator.evaluate(evalRequest);
EvaluationResponse factCheck = factCheckingEvaluator.evaluate(evalRequest);
return new EvaluatedResponse(
response,
relevance,
factCheck
);
}
record EvaluatedResponse(
String response,
EvaluationResponse relevance,
EvaluationResponse factCheck
) {
public boolean isHighQuality() {
return relevance.isPass() && factCheck.isPass();
}
public double qualityScore() {
return (relevance.getScore() + factCheck.getScore()) / 2.0;
}
}
}
// Usage
EvaluatedChatService service = new EvaluatedChatService(
chatClient,
vectorStore,
ChatClient.builder(evaluatorChatModel)
);
EvaluatedResponse result = service.chat(
"What are the key features of Spring AI?"
);
System.out.println("Response: " + result.response());
System.out.println("High Quality: " + result.isHighQuality());
System.out.println("Quality Score: " + result.qualityScore());
System.out.println("Relevance: " + result.relevance().getFeedback());
System.out.println("Fact Check: " + result.factCheck().getFeedback());