docs
evals
scenario-1
scenario-10
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
{
"context": "This evaluation assesses how well the engineer handles edge cases when processing test results, similar to karma-qunit's approach to filtering global failures, handling missing properties, and ensuring robust cross-version compatibility.",
"type": "weighted_checklist",
"checklist": [
{
"name": "Global failure filtering",
"description": "Correctly filters out test results where name is exactly 'global failure', preventing framework artifacts from being reported",
"max_score": 25
},
{
"name": "Missing runtime handling",
"description": "Properly detects missing or undefined 'runtime' property and sets it to 0, ensuring compatibility with older framework versions",
"max_score": 20
},
{
"name": "Invalid object validation",
"description": "Validates that each test object has required properties (name, passed, failed) and skips objects missing any of these properties",
"max_score": 25
},
{
"name": "Module name normalization",
"description": "Correctly normalizes missing, null, or undefined 'module' properties to empty string, ensuring consistent data structure",
"max_score": 20
},
{
"name": "Correct output structure",
"description": "Returns a new array with processed test objects that maintain all original properties while applying the edge case transformations",
"max_score": 10
}
]
}