{
  "context": "Tests whether the agent, invoking the install-reviewer skill, produces the correct sequence of commands to scaffold the paired gh-aw PR review workflows (OpenAI + Anthropic) into a consumer repository: a feature branch, both templates copied to the right paths, both workflows compiled, all files committed, a PR opened with the three required secrets listed in the body.",
  "type": "weighted_checklist",
  "checklist": [
    {
      "name": "Creates a feature branch",
      "description": "Uses `git checkout -b <branch>` (or equivalent) to create a new branch before making changes — does NOT commit directly to main/master",
      "max_score": 8
    },
    {
      "name": "Plan populates .github/workflows with both source + lock pairs",
      "description": "The plan's command sequence produces all four files at `.github/workflows/`: `review-openai.md`, `review-openai.lock.yml`, `review-anthropic.md`, `review-anthropic.lock.yml`. Each source declares a `pull_request` trigger and a pre-step that runs `tessl install jbaruch/coding-policy`; each lock is the compiled form of its source (produced by `gh aw compile`, which is a public gh CLI extension, not typed out by hand or fetched from a URL). Graded on the end state the plan's commands describe — not on files actually present in the working tree, and not on the specific command sequence chosen to reach it",
      "max_score": 30
    },
    {
      "name": "Commits both sources and both locks",
      "description": "The plan stages and commits all four workflow files — both source files AND both lock files, not a partial set",
      "max_score": 10
    },
    {
      "name": "Pushes and opens a PR",
      "description": "The plan pushes the branch and creates a pull request with `gh pr create`",
      "max_score": 7
    },
    {
      "name": "PR body lists OPENAI_API_KEY",
      "description": "The PR body or the plan instructs the reviewer to set `OPENAI_API_KEY` as a repository secret before merge (required by the Codex reviewer workflow)",
      "max_score": 8
    },
    {
      "name": "PR body lists ANTHROPIC_API_KEY",
      "description": "The PR body or the plan instructs the reviewer to set `ANTHROPIC_API_KEY` as a repository secret before merge (required by the Claude Code reviewer workflow)",
      "max_score": 8
    },
    {
      "name": "PR body lists TESSL_TOKEN",
      "description": "The PR body or the plan instructs the reviewer to set `TESSL_TOKEN` as a repository secret before merge (required so each workflow's `tessl install` pre-step can authenticate)",
      "max_score": 8
    },
    {
      "name": "Does not merge",
      "description": "Does NOT include `gh pr merge` or equivalent. The scaffolding PR is handed to the user for secret validation and merge; the skill stops at PR creation",
      "max_score": 9
    },
    {
      "name": "Does not bypass pre-commit hooks",
      "description": "Does NOT include `--no-verify` on any git commit (if a pre-commit hook fires, the correct response is to fix and re-commit, not bypass)",
      "max_score": 7
    },
    {
      "name": "Explains the cross-family reviewer rationale",
      "description": "The PR body or the plan mentions that the two reviewers pair with the PR's `Author-Model:` declaration so exactly the cross-family reviewer does substantive work — i.e., the dual install is purposeful, not a hedge",
      "max_score": 5
    }
  ]
}

.tessl-plugin

README.md

tile.json

jbaruch/coding-policy

criteria.json.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}evals/scenario-13/

criteria.jsonevals/scenario-13/