{
  "context": "Tests whether the agent correctly implements input and output guardrails using the LangChain4j guardrail API with proper result methods, attaches them to the agent interface via annotations, and uses AgentMonitor to generate an HTML execution report.",
  "type": "weighted_checklist",
  "checklist": [
    {
      "name": "InputGuardrail implemented",
      "description": "A class implements InputGuardrail (from dev.langchain4j.guardrail) and overrides validate(UserMessage)",
      "max_score": 8
    },
    {
      "name": "OutputGuardrail implemented",
      "description": "A class implements OutputGuardrail (from dev.langchain4j.guardrail) and overrides validate(AiMessage)",
      "max_score": 8
    },
    {
      "name": "Input fatal() used",
      "description": "Input guardrail calls fatal() for the hard-block violation case (not just failure())",
      "max_score": 10
    },
    {
      "name": "Output reprompt() used",
      "description": "Output guardrail calls reprompt(reason, newPrompt) for the rewrite attempt case",
      "max_score": 10
    },
    {
      "name": "@InputGuardrails annotation",
      "description": "Agent interface is annotated with @InputGuardrails(InputSafetyGuard.class) or equivalent",
      "max_score": 9
    },
    {
      "name": "@OutputGuardrails annotation",
      "description": "Agent interface is annotated with @OutputGuardrails specifying the output guard class",
      "max_score": 9
    },
    {
      "name": "maxRetries on OutputGuardrails",
      "description": "@OutputGuardrails annotation specifies maxRetries parameter",
      "max_score": 7
    },
    {
      "name": "AgentMonitor created",
      "description": "Code creates an AgentMonitor instance using AgentMonitor.create()",
      "max_score": 8
    },
    {
      "name": "Monitor attached as listener",
      "description": "Monitor is attached to the agent/workflow via .listener(monitor)",
      "max_score": 8
    },
    {
      "name": "HTML report generated",
      "description": "Code calls monitor.generateReport() and writes the result to a file named agent-report.html",
      "max_score": 8
    },
    {
      "name": "Guardrail imports correct",
      "description": "InputGuardrail/OutputGuardrail imported from dev.langchain4j.guardrail; @InputGuardrails/@OutputGuardrails from dev.langchain4j.service.guardrail",
      "max_score": 7
    },
    {
      "name": "Input success() used",
      "description": "Input guardrail calls success() for the passing case",
      "max_score": 8
    }
  ]
}

evals

scenario-1

scenario-2

criteria.json

task.md

scenario-3

skills

tile.json

jbaruch/langchain4j-ai-agent

criteria.json.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}evals/scenario-2/

criteria.jsonevals/scenario-2/