{
  "context": "Tests whether the agent correctly applies the spec quality gate (detecting unresolved [NEEDS CLARIFICATION] items, checking FR-XXX count, and assessing measurable criteria) and produces a semantic diff when re-planning over an existing plan.md, showing what changed in a bordered box format with downstream impact.",
  "type": "weighted_checklist",
  "checklist": [
    {
      "name": "NEEDS CLARIFICATION flagged",
      "description": "planning-notes.md or agent output mentions that [NEEDS CLARIFICATION] items were found in spec.md (FR-001 max file size and FR-005 URL expiry)",
      "max_score": 10
    },
    {
      "name": "FR count assessed",
      "description": "planning-notes.md or agent output shows the FR-XXX pattern count was checked (spec has 6 FR items, which is ≥ 3 so should NOT trigger ERROR, but the check should be visible)",
      "max_score": 8
    },
    {
      "name": "Measurable criteria warning",
      "description": "planning-notes.md or agent output flags that success criteria SC-001 through SC-004 lack numeric/time/percentage measurements (e.g., a WARNING about no measurable criteria)",
      "max_score": 10
    },
    {
      "name": "Quality score reported",
      "description": "planning-notes.md includes a spec quality score (e.g., 'Score: X/10') or an equivalent quality assessment summary",
      "max_score": 8
    },
    {
      "name": "Semantic diff present",
      "description": "planning-notes.md contains a section comparing old vs new plan (semantic diff), identifying changes to tech stack, architecture, or dependencies",
      "max_score": 10
    },
    {
      "name": "Semantic diff format",
      "description": "The semantic diff uses a bordered box format with +--- borders and +Added / ~Changed / -Removed markers (or equivalent structured diff format from the skill's formatting guide)",
      "max_score": 10
    },
    {
      "name": "Downstream impact flagged",
      "description": "planning-notes.md or the semantic diff notes downstream impact — specifically that tasks.md or implementation work may need updates due to the changed architecture",
      "max_score": 8
    },
    {
      "name": "Updated plan has new dependencies",
      "description": "The updated plan.md adds dependencies related to virus scanning (e.g., ClamAV, a malware scanning library or service) that were absent in the original plan",
      "max_score": 10
    },
    {
      "name": "Updated architecture diagram",
      "description": "The updated plan.md includes a more detailed architecture section that reflects the new components (virus scanner, quota service, or similar) beyond the original simple three-box diagram",
      "max_score": 8
    },
    {
      "name": "No governance in plan",
      "description": "The updated plan.md does NOT contain project-wide principles, non-negotiable rules, or team process requirements — these belong in CONSTITUTION.md",
      "max_score": 8
    },
    {
      "name": "Clarification assumptions documented",
      "description": "The planning-notes.md or plan.md explicitly documents the assumptions made for the two [NEEDS CLARIFICATION] items (file size limit and URL expiry), rather than leaving them unresolved",
      "max_score": 10
    }
  ]
}

rules

skills

README.md

tile.json

tessl-labs/intent-integrity-kit

criteria.json.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}evals/scenario-8/

criteria.jsonevals/scenario-8/