Spec-driven workflow covering requirement gathering, spec authoring, implementation review, and verification — with skills, rules, and evaluation scenarios.
96
90%
Does it follow best practices?
Impact
98%
1.19xAverage score across 9 eval scenarios
Passed
No known issues
{
"context": "Spec drift detection: the auth module was refactored (files moved, lockout threshold changed from 5 to 3, session TTL changed from 24h to 8h). Agent must catch all discrepancies.",
"type": "weighted_checklist",
"checklist": [
{
"name": "Drift report produced",
"description": "A file named drift-report.md is created",
"max_score": 4
},
{
"name": "Broken targets detected",
"description": "Report identifies that targets src/auth/login.py and src/auth/session.py no longer exist — files moved to src/identity/",
"max_score": 10
},
{
"name": "Broken test links detected",
"description": "Report identifies that [@test] links pointing to tests/auth/ are broken — tests moved to tests/identity/",
"max_score": 10
},
{
"name": "Lockout threshold change detected",
"description": "Report identifies that lockout threshold changed from 5 failed attempts (in spec) to 3 (in code)",
"max_score": 15
},
{
"name": "Session TTL change detected",
"description": "Report identifies that session expiry changed from 24 hours (in spec) to 8 hours (in code)",
"max_score": 15
},
{
"name": "Updated spec targets corrected",
"description": "Updated specs/auth.spec.md has targets pointing to src/identity/authentication.py and src/identity/sessions.py",
"max_score": 10
},
{
"name": "Updated spec test links corrected",
"description": "Updated specs/auth.spec.md has [@test] links pointing to tests/identity/ paths",
"max_score": 10
},
{
"name": "Updated spec lockout threshold corrected",
"description": "Updated specs/auth.spec.md says 3 failed attempts (matching the code), not 5",
"max_score": 8
},
{
"name": "Updated spec session TTL corrected",
"description": "Updated specs/auth.spec.md says 8 hours (matching the code), not 24",
"max_score": 8
},
{
"name": "No false positives",
"description": "Report does not flag things that are NOT drift (e.g. does not claim functions were removed when they still exist under new paths)",
"max_score": 10
}
]
}