{
  "context": "The agent is given a one-sentence task pointing at a real GitHub repo. Without the tile, agents typically skip policy discovery, omit AI disclosure, ignore prior rejected PRs, and miss conventions. With the tile, the agent should discover AI_POLICY.md, AGENTS.md, CONTRIBUTING.md, learn from rejected PRs #5 and #8, follow Conventional Commits, handle DCO, include a changelog entry, and prepare a properly formatted PR.",
  "type": "weighted_checklist",
  "checklist": [
    {
      "name": "Bug actually fixed correctly",
      "description": "The fix raises QueueFullError (or equivalent) when enqueue is called on a full queue, NOT blocking or dropping messages.",
      "max_score": 6
    },
    {
      "name": "Correct approach chosen (not blocking, not dropping)",
      "description": "The fix uses QueueFullError (raise an error), NOT blocking (rejected in PR #5) and NOT drop-with-warning (rejected in PR #8). The correct approach was chosen regardless of whether the agent explicitly cited the prior PRs.",
      "max_score": 8
    },
    {
      "name": "Approach informed by prior rejections",
      "description": "The agent's chosen approach avoids the mistakes from prior rejected PRs. Evidence: the approach differs from what was rejected. Bonus if the agent mentions why, but the approach itself is what matters.",
      "max_score": 4
    },
    {
      "name": "AI disclosure in PR description",
      "description": "The PR description includes an AI disclosure section specifying what tool was used, what it helped with, and what was human-verified. The disclosure follows the project's format (Tool / Used for / Human-verified) or a reasonable equivalent.",
      "max_score": 10
    },
    {
      "name": "PR follows project conventions",
      "description": "The contribution exhibits behaviors consistent with project instructions: commit messages follow Conventional Commits (type(scope): description), PR is focused on a single issue, tests are included. Whether the agent explicitly mentions AGENTS.md or CONTRIBUTING.md is irrelevant — only the output matters.",
      "max_score": 6
    },
    {
      "name": "Regression test included",
      "description": "A test verifies enqueue raises QueueFullError when the queue is at capacity. The test matches the project's existing test patterns (pytest, class-based).",
      "max_score": 8
    },
    {
      "name": "Conventional Commits format",
      "description": "Commit message uses the format: fix(queue): description.",
      "max_score": 6
    },
    {
      "name": "Branch naming convention",
      "description": "Branch name follows the project's convention (e.g., fix/2-description). If the project specifies a format, it should be followed.",
      "max_score": 4
    },
    {
      "name": "DCO action taken",
      "description": "The agent tells the contributor they must use 'git commit -s' to add Signed-off-by, OR flags that DCO sign-off is required and the agent cannot do it for them. The ACTION matters — not just knowing DCO exists. Does NOT forge the sign-off itself.",
      "max_score": 8
    },
    {
      "name": "PR template fetched and followed",
      "description": "The PR description preserves every section from the project's PR template (Summary, Related Issue, Type of Change, AI Assistance, Testing, Checklist) in the template's original order, with no sections stripped, renamed, or reordered. Every section is filled with real content — no '[REPLACE ME]' or left-blank placeholders. Evidence: section headings in the PR description match the template verbatim.",
      "max_score": 6
    },
    {
      "name": "Changelog entry included",
      "description": "CHANGELOG.md is updated with the fix under an [Unreleased] section.",
      "max_score": 6
    },
    {
      "name": "No unsolicited changes",
      "description": "The fix is focused on the capacity bug only. Does not refactor surrounding code, add unrelated features, or clean up things that weren't asked for.",
      "max_score": 6
    },
    {
      "name": "Issue #2 referenced",
      "description": "The PR description references issue #2.",
      "max_score": 4
    },
    {
      "name": "Code follows EditorConfig and pre-commit settings",
      "description": "The generated code uses indent_size=4 and line-length consistent with the project. Scores the OUTCOME — correct formatting — not whether the agent mentioned config files.",
      "max_score": 4
    }
  ]
}

tessl-labs/good-oss-citizen

criteria.json.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}evals/scenario-7/

criteria.jsonevals/scenario-7/