{
  "context": "Tests whether the agent follows the audit workflow from the skill: runs the lookup protocol for the five tool categories and three problem buckets, quotes Guy's verbatim definitions, walks through every dimension, gives a clear verdict per dimension, and summarizes gaps with verbatim quotes.",
  "type": "weighted_checklist",
  "checklist": [
    {
      "name": "All five tools covered",
      "description": "skill-audit.md addresses all five tool categories from the talk: static analysis, dynamic tests (evals), dependency management, security tooling, and observability — each as a distinct section or subsection",
      "max_score": 20
    },
    {
      "name": "All three problems covered",
      "description": "skill-audit.md addresses all three problem buckets from the talk: security & governance, collaboration & reuse, and lifecycle & continuous optimization",
      "max_score": 15
    },
    {
      "name": "Verbatim definitions with line citations",
      "description": "At least three dimensions include a verbatim quote from transcript.md describing what 'good' looks like for that dimension, each with a transcript line citation",
      "max_score": 20
    },
    {
      "name": "Clear verdict per dimension",
      "description": "Each of the five tool categories receives an explicit verdict of 'covered', 'partial', or 'missing' (or a clear functional equivalent such as a rating or status label)",
      "max_score": 20
    },
    {
      "name": "Gap summary section present",
      "description": "The report ends with a dedicated summary section that lists all dimensions assessed as 'missing' or 'partial'",
      "max_score": 10
    },
    {
      "name": "Gap summary includes verbatim quotes",
      "description": "The gap summary section includes at least one verbatim quote from transcript.md (with line citation) describing what Guy said about the identified gaps or why they matter",
      "max_score": 15
    }
  ]
}