{
  "context": "Tests whether the agent implements tool calling using the annotation-based preferred approach, correctly handles memory via the advisor pattern with proper conversation ID propagation, avoids unsupported parameter types in tool methods, and uses ToolContext only for non-conversational extra context rather than conversation history.",
  "type": "weighted_checklist",
  "checklist": [
    {
      "name": "@Tool annotation used",
      "description": "Tool methods are annotated with @Tool (from org.springframework.ai.tool.annotation) rather than defined as function beans or FunctionToolCallback",
      "max_score": 8
    },
    {
      "name": "@Tool description provided",
      "description": "Each @Tool annotation includes a non-empty description attribute",
      "max_score": 7
    },
    {
      "name": "@ToolParam annotations",
      "description": "Tool method parameters are annotated with @ToolParam including a description attribute",
      "max_score": 7
    },
    {
      "name": "Optional param marked required=false",
      "description": "At least one @ToolParam with required=false is present for a genuinely optional parameter",
      "max_score": 8
    },
    {
      "name": "No unsupported param types",
      "description": "Tool method signatures do NOT use Optional, Mono, Flux, CompletableFuture, Function, Supplier, or Consumer as parameter types",
      "max_score": 9
    },
    {
      "name": "Memory advisor used",
      "description": "A memory advisor (MessageChatMemoryAdvisor or PromptChatMemoryAdvisor) is registered on the ChatClient rather than manual message injection",
      "max_score": 10
    },
    {
      "name": "CONVERSATION_ID passed",
      "description": "The ChatMemory.CONVERSATION_ID parameter is set via .advisors(a -> a.param(ChatMemory.CONVERSATION_ID, ...)) on per-request calls",
      "max_score": 10
    },
    {
      "name": "MessageWindowChatMemory used",
      "description": "MessageWindowChatMemory.builder() is used to construct the ChatMemory instance",
      "max_score": 8
    },
    {
      "name": "ToolContext not for history",
      "description": "Tool implementations do NOT access ToolContext to retrieve conversation history; ToolContext is used only for non-conversational context (e.g. tenantId, requestId) if used at all",
      "max_score": 10
    },
    {
      "name": "returnDirect applied",
      "description": "At least one tool uses returnDirect=true on its @Tool annotation for a tool whose result should go directly to the caller without LLM reformulation",
      "max_score": 8
    },
    {
      "name": "ChatClient.Builder injection",
      "description": "ChatClient is constructed by injecting and using ChatClient.Builder (the auto-configured bean) rather than calling ChatClient.create(chatModel) directly in a @Component or @Service",
      "max_score": 7
    },
    {
      "name": "ToolContext for extra context",
      "description": "If tenant/user/request context is passed to tools, it is done via .toolContext(Map.of(...)) on the ChatClient prompt, and retrieved via toolContext.getContext() inside the tool method",
      "max_score": 8
    }
  ]
}

evals

scenario-1

scenario-2

scenario-3

criteria.json

task.md

skills

tile.json

jbaruch/spring-ai-agent

criteria.json.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}evals/scenario-3/

criteria.jsonevals/scenario-3/