{
  "context": "Tests whether the agent uses GraphQL (not REST) to request Copilot review, provides the correct hardcoded bot ID, includes a fallback for when the bot ID is stale, and follows the correct PR title and body format when scripting the PR creation and review request workflow.",
  "type": "weighted_checklist",
  "checklist": [
    {
      "name": "GraphQL mutation used",
      "description": "Uses `gh api graphql` with a `requestReviews` mutation to add Copilot as a reviewer — does NOT use REST (`gh pr edit --add-reviewer`, `gh api .../requested_reviewers` POST)",
      "max_score": 15
    },
    {
      "name": "Correct Copilot bot ID",
      "description": "Passes bot ID `BOT_kgDOCnlnWA` in the `botIds` field of the `requestReviews` mutation",
      "max_score": 15
    },
    {
      "name": "PR node ID retrieval",
      "description": "Fetches the PR's GraphQL node ID (e.g. via a `pullRequest(number: N) { id }` query) before calling the mutation — does NOT pass the integer PR number directly to `requestReviews`",
      "max_score": 10
    },
    {
      "name": "Bot ID fallback included",
      "description": "Includes (or documents) a fallback GraphQL query that retrieves the Copilot bot ID from past PR reviews when the hardcoded ID is stale",
      "max_score": 10
    },
    {
      "name": "Review request verification",
      "description": "After requesting review, verifies acceptance using `gh api repos/.../pulls/<N>` and inspects `requested_reviewers`",
      "max_score": 8
    },
    {
      "name": "Feature branch guard",
      "description": "Script checks that the current branch is NOT main or master before proceeding",
      "max_score": 10
    },
    {
      "name": "PR title format",
      "description": "PR title in the script follows the pattern `<type>(<scope>): <imperative summary>` (e.g. `fix(auth): handle token expiry`)",
      "max_score": 8
    },
    {
      "name": "PR body Summary section",
      "description": "PR body template includes a `## Summary` section with bullet points describing what changed and why",
      "max_score": 8
    },
    {
      "name": "PR body Test plan section",
      "description": "PR body template includes a `## Test plan` section with markdown checkbox items (`- [ ] ...`)",
      "max_score": 8
    },
    {
      "name": "Pre-push readiness",
      "description": "Script or documentation includes steps to run tests AND linter before pushing/creating the PR, and requires both to pass",
      "max_score": 8
    }
  ]
}

jbaruch/coding-policy

criteria.json.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}evals/scenario-9/

criteria.jsonevals/scenario-9/