{
  "context": "Tests whether the agent creates an MCP server using correct Spring AI 2.0 starters and annotation packages, implements one of the five recognised agentic workflow patterns, and uses AugmentedToolCallbackProvider to capture and log the agent's inner reasoning before each tool call.",
  "type": "weighted_checklist",
  "checklist": [
    {
      "name": "MCP server starter",
      "description": "pom.xml uses spring-ai-starter-mcp-server-webmvc (or spring-ai-starter-mcp-server / spring-ai-starter-mcp-server-webflux) from groupId org.springframework.ai — NOT io.modelcontextprotocol.sdk",
      "max_score": 9
    },
    {
      "name": "@McpTool annotation package",
      "description": "Java source imports @McpTool from org.springframework.ai.mcp.annotation (NOT from org.springframework.ai.mcp.spring.annotations)",
      "max_score": 11
    },
    {
      "name": "@McpToolParam annotation package",
      "description": "Java source imports @McpToolParam from org.springframework.ai.mcp.annotation (NOT from org.springframework.ai.mcp.spring.annotations)",
      "max_score": 9
    },
    {
      "name": "McpTool descriptions",
      "description": "Each @McpTool annotation has a non-empty description attribute",
      "max_score": 7
    },
    {
      "name": "Agentic pattern implemented",
      "description": "The agent client uses one of the five Spring AI agentic workflow patterns: Chain, Routing, Parallelization, Orchestrator-Workers, or Evaluator-Optimizer — either by referencing the named pattern class or implementing the pattern's structure explicitly",
      "max_score": 15
    },
    {
      "name": "AugmentedToolCallbackProvider used",
      "description": "The ChatClient is configured with AugmentedToolCallbackProvider (from org.springframework.ai.tool.augmentation) to augment tool calls with inner-thought or reasoning fields",
      "max_score": 14
    },
    {
      "name": "Reasoning argument type defined",
      "description": "A record or class annotated argument type (e.g. containing an innerThought field with @ToolParam) is passed as argumentType to AugmentedToolCallbackProvider",
      "max_score": 9
    },
    {
      "name": "Reasoning consumer logs",
      "description": "The argumentConsumer on AugmentedToolCallbackProvider logs or records the reasoning/inner-thought value from each tool invocation event",
      "max_score": 8
    },
    {
      "name": "removeExtraArguments enabled",
      "description": "AugmentedToolCallbackProvider is configured with removeExtraArgumentsAfterProcessing(true) so the reasoning fields are stripped before the tool receives its actual arguments",
      "max_score": 9
    },
    {
      "name": "MCP server type configured",
      "description": "application.properties or application.yml contains spring.ai.mcp.server.type (SYNC or ASYNC) and spring.ai.mcp.server.protocol (SSE, STREAMABLE, or STATELESS)",
      "max_score": 9
    }
  ]
}

evals

scenario-1

criteria.json

task.md

scenario-2

scenario-3

skills

tile.json

jbaruch/spring-ai-agent

criteria.json.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}evals/scenario-1/

criteria.jsonevals/scenario-1/