docs
evals
scenario-1
scenario-10
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
{
"context": "This criteria evaluates how well the engineer uses the pixelmatch package to implement a screenshot comparison tool with visual diff generation capabilities. The focus is on correct usage of pixelmatch's core API for comparing images, generating visual diff outputs, and handling the required buffer management.",
"type": "weighted_checklist",
"checklist": [
{
"name": "PNG Reading Setup",
"description": "Uses pngjs to read PNG images and extract image data (width, height, and data buffer) needed for pixelmatch comparison",
"max_score": 15
},
{
"name": "pixelmatch Function Call",
"description": "Correctly calls the pixelmatch function with the required parameters: img1 buffer, img2 buffer, output buffer (or null), width, and height",
"max_score": 25
},
{
"name": "Output Buffer Allocation",
"description": "Properly allocates an output buffer (Uint8Array, Uint8ClampedArray, or Buffer) with the correct size (width * height * 4) for the visual diff image",
"max_score": 15
},
{
"name": "Diff Image Generation",
"description": "Generates and saves the visual diff image by passing the output buffer to pixelmatch and then encoding it to PNG format using pngjs",
"max_score": 20
},
{
"name": "Mismatch Count Usage",
"description": "Uses the return value from pixelmatch (number of mismatched pixels) to calculate and report comparison statistics",
"max_score": 15
},
{
"name": "Statistics Calculation",
"description": "Correctly calculates totalPixels (width * height) and percentDifference ((mismatchedPixels / totalPixels) * 100) based on the comparison results",
"max_score": 10
}
]
}