docs
evals
scenario-1
scenario-10
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
{
"context": "This evaluation assesses how effectively the engineer uses the pixelmatch library's threshold parameter to control comparison sensitivity. The focus is on understanding that different threshold values produce different sensitivity levels, and correctly applying the pixelmatch function to implement the three comparison modes.",
"type": "weighted_checklist",
"checklist": [
{
"name": "Imports pixelmatch",
"description": "Correctly imports the pixelmatch library/function at the top of the module",
"max_score": 10
},
{
"name": "Strict mode threshold",
"description": "Uses pixelmatch with a threshold of 0.0 (or very close to 0) in compareStrict to detect the smallest differences",
"max_score": 25
},
{
"name": "Standard mode threshold",
"description": "Uses pixelmatch with a threshold around 0.1 (the default value) in compareStandard to tolerate minor variations",
"max_score": 25
},
{
"name": "Relaxed mode threshold",
"description": "Uses pixelmatch with a threshold of 0.3 or higher in compareRelaxed to only flag major differences",
"max_score": 25
},
{
"name": "Correct parameters",
"description": "Passes the correct parameters to pixelmatch in all three functions: img1, img2, null (for output), width, height, and the options object with threshold",
"max_score": 15
}
]
}