docs
evals
scenario-1
scenario-10
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
{
"context": "This criteria evaluates how well the engineer uses pixelmatch and pngjs to build a command-line image comparison tool with proper CLI features including argument parsing, performance timing, exit codes, and error handling for automation workflows.",
"type": "weighted_checklist",
"checklist": [
{
"name": "PNG Loading",
"description": "Uses pngjs (PNG.sync.read or similar) to properly load PNG files from the file system into image buffers that can be passed to pixelmatch",
"max_score": 15
},
{
"name": "Pixelmatch Invocation",
"description": "Correctly calls pixelmatch with img1.data, img2.data, output buffer (or null), width, height, and optional threshold parameter from command line",
"max_score": 20
},
{
"name": "Performance Timing",
"description": "Implements performance timing using console.time/console.timeEnd or equivalent to measure and display comparison duration",
"max_score": 15
},
{
"name": "Dimension Validation",
"description": "Detects when images have different dimensions by comparing width/height properties and reports this appropriately",
"max_score": 10
},
{
"name": "Error Percentage",
"description": "Calculates and displays error percentage as (mismatched pixels / total pixels * 100) where total pixels = width * height",
"max_score": 10
},
{
"name": "Exit Code 0",
"description": "Uses process.exit(0) or returns 0 when images match exactly (mismatched pixels === 0)",
"max_score": 5
},
{
"name": "Exit Code 64",
"description": "Uses process.exit(64) for invalid arguments or file reading errors",
"max_score": 5
},
{
"name": "Exit Code 65",
"description": "Uses process.exit(65) specifically when image dimensions don't match",
"max_score": 5
},
{
"name": "Exit Code 66",
"description": "Uses process.exit(66) when images differ (mismatched pixels > 0)",
"max_score": 5
},
{
"name": "Optional Diff Output",
"description": "Handles optional output-diff.png argument by creating output buffer when provided and writing PNG using pngjs (PNG.sync.write), or passing null to pixelmatch when not provided",
"max_score": 10
}
]
}