CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-client-chat

Spring AI Chat Client provides a fluent API for building AI-powered applications with LLMs, supporting advisors, streaming, structured outputs, and conversation memory

Overview
Eval results
Files

evaluation.mddocs/reference/

Evaluation

Spring AI Chat Client provides evaluators for assessing the quality and accuracy of LLM responses. Evaluators help detect hallucinations, verify relevance, and measure response quality.

Imports

import org.springframework.ai.chat.evaluation.RelevancyEvaluator;
import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.evaluation.Evaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import org.springframework.ai.evaluation.EvaluationResponse;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.prompt.PromptTemplate;

Evaluator Interface

All evaluators implement the common Evaluator interface.

interface Evaluator {
    EvaluationResponse evaluate(EvaluationRequest request);
}

Evaluation Request:

class EvaluationRequest {
    String getQuery();           // Original user query
    String getResponse();        // LLM response to evaluate
    List<String> getContext();   // Supporting context documents
}

Evaluation Response:

class EvaluationResponse {
    boolean isPass();            // Whether evaluation passed
    double getScore();           // Numeric score (0.0-1.0)
    String getFeedback();        // Detailed feedback text
}

RelevancyEvaluator

Evaluates whether an LLM response is relevant to the user query given the provided context.

class RelevancyEvaluator implements Evaluator {
    static Builder builder();

    EvaluationResponse evaluate(EvaluationRequest request);

    interface Builder {
        Builder chatClientBuilder(ChatClient.Builder chatClientBuilder);
        Builder promptTemplate(PromptTemplate promptTemplate);
        RelevancyEvaluator build();
    }
}

What It Checks:

  • Response addresses the query
  • Response uses information from context
  • Response doesn't introduce irrelevant information

Example - Basic Relevancy Evaluation:

import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.evaluation.RelevancyEvaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import org.springframework.ai.evaluation.EvaluationResponse;
import java.util.List;

// Create evaluator
ChatClient.Builder chatClientBuilder = ChatClient.builder(chatModel);

RelevancyEvaluator evaluator = RelevancyEvaluator.builder()
    .chatClientBuilder(chatClientBuilder)
    .build();

// Evaluate a response
EvaluationRequest request = new EvaluationRequest(
    "What is the capital of France?",  // query
    "Paris is the capital of France.", // response
    List.of("France is a country in Europe. Its capital is Paris.") // context
);

EvaluationResponse evaluation = evaluator.evaluate(request);

System.out.println("Pass: " + evaluation.isPass());      // true
System.out.println("Score: " + evaluation.getScore());    // 1.0
System.out.println("Feedback: " + evaluation.getFeedback());

Example - Detecting Irrelevant Response:

import org.springframework.ai.chat.evaluation.RelevancyEvaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import org.springframework.ai.evaluation.EvaluationResponse;
import java.util.List;

RelevancyEvaluator evaluator = RelevancyEvaluator.builder()
    .chatClientBuilder(ChatClient.builder(chatModel))
    .build();

// Response that doesn't address the query
EvaluationRequest request = new EvaluationRequest(
    "What is the capital of France?",
    "France is known for its wine and cheese.",
    List.of("France is a country in Europe. Its capital is Paris.")
);

EvaluationResponse evaluation = evaluator.evaluate(request);

System.out.println("Pass: " + evaluation.isPass());      // false
System.out.println("Score: " + evaluation.getScore());    // < 0.5
System.out.println("Feedback: " + evaluation.getFeedback());
// "Response does not answer the question about the capital"

Example - Custom Prompt Template:

import org.springframework.ai.chat.evaluation.RelevancyEvaluator;
import org.springframework.ai.chat.prompt.PromptTemplate;

// Custom evaluation prompt
PromptTemplate customPrompt = new PromptTemplate("""
    Evaluate if the response is relevant to the query.

    Query: {query}
    Context: {context}
    Response: {response}

    Rate relevance from 0-10 and explain your reasoning.
    """);

RelevancyEvaluator evaluator = RelevancyEvaluator.builder()
    .chatClientBuilder(ChatClient.builder(chatModel))
    .promptTemplate(customPrompt)
    .build();

FactCheckingEvaluator

Evaluates the factual accuracy of LLM responses against provided context to detect hallucinations.

class FactCheckingEvaluator implements Evaluator {
    static Builder builder(ChatClient.Builder chatClientBuilder);
    static Builder forBespokeMinicheck(ChatClient.Builder chatClientBuilder);

    EvaluationResponse evaluate(EvaluationRequest request);

    interface Builder {
        Builder chatClientBuilder(ChatClient.Builder chatClientBuilder);
        Builder evaluationPrompt(PromptTemplate promptTemplate);
        FactCheckingEvaluator build();
    }
}

What It Checks:

  • Response claims are supported by context
  • No contradictions with context
  • No fabricated information

Example - Basic Fact Checking:

import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import org.springframework.ai.evaluation.EvaluationResponse;
import java.util.List;

// Create evaluator
ChatClient.Builder chatClientBuilder = ChatClient.builder(chatModel);

FactCheckingEvaluator evaluator = FactCheckingEvaluator
    .builder(chatClientBuilder)
    .build();

// Evaluate factual response
EvaluationRequest request = new EvaluationRequest(
    "When was Spring Framework released?",
    "Spring Framework was released in 2003.",
    List.of("The Spring Framework was first released in 2003 by Rod Johnson.")
);

EvaluationResponse evaluation = evaluator.evaluate(request);

System.out.println("Pass: " + evaluation.isPass());      // true
System.out.println("Score: " + evaluation.getScore());    // 1.0
System.out.println("Feedback: " + evaluation.getFeedback());

Example - Detecting Hallucination:

import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import org.springframework.ai.evaluation.EvaluationResponse;
import java.util.List;

FactCheckingEvaluator evaluator = FactCheckingEvaluator
    .builder(ChatClient.builder(chatModel))
    .build();

// Response with fabricated information
EvaluationRequest request = new EvaluationRequest(
    "When was Spring Framework released?",
    "Spring Framework was released in 1998.",
    List.of("The Spring Framework was first released in 2003 by Rod Johnson.")
);

EvaluationResponse evaluation = evaluator.evaluate(request);

System.out.println("Pass: " + evaluation.isPass());      // false
System.out.println("Score: " + evaluation.getScore());    // 0.0
System.out.println("Feedback: " + evaluation.getFeedback());
// "Response contains incorrect date - framework was released in 2003, not 1998"

Example - Bespoke Minicheck Model:

import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.openai.OpenAiChatModel;

// Use specialized fact-checking model
ChatModel minicheckModel = new OpenAiChatModel(
    openAiApi,
    OpenAiChatOptions.builder()
        .withModel("bespoke-minicheck")
        .build()
);

// Pre-configured for Bespoke Minicheck
FactCheckingEvaluator evaluator = FactCheckingEvaluator
    .forBespokeMinicheck(ChatClient.builder(minicheckModel))
    .build();

EvaluationResponse evaluation = evaluator.evaluate(request);

Example - Custom Evaluation Prompt:

import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.chat.prompt.PromptTemplate;

PromptTemplate customPrompt = new PromptTemplate("""
    Verify the factual accuracy of the response.

    Context: {context}
    Response: {response}

    Check each claim:
    1. Is it supported by the context?
    2. Does it contradict the context?
    3. Is it fabricated?

    Provide a score (0-1) and detailed feedback.
    """);

FactCheckingEvaluator evaluator = FactCheckingEvaluator
    .builder(ChatClient.builder(chatModel))
    .evaluationPrompt(customPrompt)
    .build();

Evaluation Pipeline

Combine multiple evaluators for comprehensive quality assessment.

Example - Multi-Evaluator Pipeline:

import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.evaluation.RelevancyEvaluator;
import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.evaluation.Evaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import org.springframework.ai.evaluation.EvaluationResponse;
import java.util.List;

class EvaluationPipeline {
    private final List<Evaluator> evaluators;

    public EvaluationPipeline(ChatClient.Builder chatClientBuilder) {
        this.evaluators = List.of(
            RelevancyEvaluator.builder()
                .chatClientBuilder(chatClientBuilder)
                .build(),
            FactCheckingEvaluator.builder(chatClientBuilder)
                .build()
        );
    }

    public List<EvaluationResponse> evaluate(EvaluationRequest request) {
        return evaluators.stream()
            .map(evaluator -> evaluator.evaluate(request))
            .toList();
    }

    public boolean allPass(EvaluationRequest request) {
        return evaluate(request).stream()
            .allMatch(EvaluationResponse::isPass);
    }

    public double averageScore(EvaluationRequest request) {
        return evaluate(request).stream()
            .mapToDouble(EvaluationResponse::getScore)
            .average()
            .orElse(0.0);
    }
}

// Usage
EvaluationPipeline pipeline = new EvaluationPipeline(
    ChatClient.builder(chatModel)
);

EvaluationRequest request = new EvaluationRequest(
    "What is Spring Boot?",
    "Spring Boot is a framework for building Java applications.",
    List.of("Spring Boot simplifies Spring application development...")
);

boolean allPass = pipeline.allPass(request);
double avgScore = pipeline.averageScore(request);

System.out.println("All evaluations passed: " + allPass);
System.out.println("Average score: " + avgScore);

RAG Evaluation

Use evaluators to assess Retrieval-Augmented Generation (RAG) responses.

Example - RAG Evaluation:

import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.evaluation.RelevancyEvaluator;
import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import java.util.List;

// Generate response with RAG
String query = "What are the benefits of Spring Boot?";
List<String> retrievedDocs = vectorStore.similaritySearch(query);

String response = ChatClient.builder(chatModel)
    .build()
    .prompt()
    .user(userPrompt -> userPrompt
        .text("""
            Answer the question based on the context.

            Question: {query}
            Context: {context}
            """)
        .param("query", query)
        .param("context", String.join("\n", retrievedDocs))
    )
    .call()
    .content();

// Evaluate RAG response
RelevancyEvaluator relevancyEvaluator = RelevancyEvaluator.builder()
    .chatClientBuilder(ChatClient.builder(chatModel))
    .build();

FactCheckingEvaluator factCheckingEvaluator = FactCheckingEvaluator
    .builder(ChatClient.builder(chatModel))
    .build();

EvaluationRequest evalRequest = new EvaluationRequest(
    query,
    response,
    retrievedDocs
);

// Check relevance
EvaluationResponse relevanceEval = relevancyEvaluator.evaluate(evalRequest);
System.out.println("Relevance: " + relevanceEval.isPass());

// Check factual accuracy
EvaluationResponse factEval = factCheckingEvaluator.evaluate(evalRequest);
System.out.println("Factual accuracy: " + factEval.isPass());

Automated Testing

Use evaluators in automated tests to ensure response quality.

Example - Unit Test:

import org.junit.jupiter.api.Test;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.evaluation.RelevancyEvaluator;
import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import org.springframework.ai.evaluation.EvaluationResponse;
import java.util.List;
import static org.junit.jupiter.api.Assertions.*;

class ChatResponseQualityTest {
    private final ChatClient chatClient = ChatClient.builder(chatModel).build();
    private final RelevancyEvaluator relevancyEvaluator =
        RelevancyEvaluator.builder()
            .chatClientBuilder(ChatClient.builder(chatModel))
            .build();
    private final FactCheckingEvaluator factCheckingEvaluator =
        FactCheckingEvaluator.builder(ChatClient.builder(chatModel))
            .build();

    @Test
    void testResponseQuality() {
        // Generate response
        String query = "What is Spring Framework?";
        List<String> context = List.of(
            "Spring Framework is a comprehensive framework for Java development."
        );

        String response = chatClient
            .prompt()
            .user(query)
            .call()
            .content();

        // Evaluate
        EvaluationRequest request = new EvaluationRequest(
            query, response, context
        );

        EvaluationResponse relevance = relevancyEvaluator.evaluate(request);
        EvaluationResponse factCheck = factCheckingEvaluator.evaluate(request);

        // Assert quality
        assertTrue(relevance.isPass(),
            "Response should be relevant: " + relevance.getFeedback());
        assertTrue(factCheck.isPass(),
            "Response should be factually accurate: " + factCheck.getFeedback());
        assertTrue(relevance.getScore() > 0.7,
            "Relevance score should be high");
        assertTrue(factCheck.getScore() > 0.7,
            "Fact checking score should be high");
    }
}

Complete Example

import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.chat.evaluation.RelevancyEvaluator;
import org.springframework.ai.chat.evaluation.FactCheckingEvaluator;
import org.springframework.ai.evaluation.Evaluator;
import org.springframework.ai.evaluation.EvaluationRequest;
import org.springframework.ai.evaluation.EvaluationResponse;
import org.springframework.ai.vectorstore.VectorStore;
import java.util.List;

class EvaluatedChatService {
    private final ChatClient chatClient;
    private final VectorStore vectorStore;
    private final RelevancyEvaluator relevancyEvaluator;
    private final FactCheckingEvaluator factCheckingEvaluator;

    public EvaluatedChatService(
        ChatClient chatClient,
        VectorStore vectorStore,
        ChatClient.Builder evaluatorChatClientBuilder
    ) {
        this.chatClient = chatClient;
        this.vectorStore = vectorStore;
        this.relevancyEvaluator = RelevancyEvaluator.builder()
            .chatClientBuilder(evaluatorChatClientBuilder)
            .build();
        this.factCheckingEvaluator = FactCheckingEvaluator
            .builder(evaluatorChatClientBuilder)
            .build();
    }

    public EvaluatedResponse chat(String query) {
        // Retrieve context
        List<String> context = vectorStore.similaritySearch(query)
            .stream()
            .map(doc -> doc.getContent())
            .toList();

        // Generate response
        String response = chatClient
            .prompt()
            .user(userSpec -> userSpec
                .text("""
                    Answer based on the context:

                    Question: {query}
                    Context: {context}
                    """)
                .param("query", query)
                .param("context", String.join("\n", context))
            )
            .call()
            .content();

        // Evaluate response
        EvaluationRequest evalRequest = new EvaluationRequest(
            query, response, context
        );

        EvaluationResponse relevance = relevancyEvaluator.evaluate(evalRequest);
        EvaluationResponse factCheck = factCheckingEvaluator.evaluate(evalRequest);

        return new EvaluatedResponse(
            response,
            relevance,
            factCheck
        );
    }

    record EvaluatedResponse(
        String response,
        EvaluationResponse relevance,
        EvaluationResponse factCheck
    ) {
        public boolean isHighQuality() {
            return relevance.isPass() && factCheck.isPass();
        }

        public double qualityScore() {
            return (relevance.getScore() + factCheck.getScore()) / 2.0;
        }
    }
}

// Usage
EvaluatedChatService service = new EvaluatedChatService(
    chatClient,
    vectorStore,
    ChatClient.builder(evaluatorChatModel)
);

EvaluatedResponse result = service.chat(
    "What are the key features of Spring AI?"
);

System.out.println("Response: " + result.response());
System.out.println("High Quality: " + result.isHighQuality());
System.out.println("Quality Score: " + result.qualityScore());
System.out.println("Relevance: " + result.relevance().getFeedback());
System.out.println("Fact Check: " + result.factCheck().getFeedback());

Install with Tessl CLI

npx tessl i tessl/maven-org-springframework-ai--spring-ai-client-chat

docs

index.md

tile.json