Create terminal-based presentation slides using presenterm's markdown format with themes, diagrams, code highlighting, and more
92
90%
Does it follow best practices?
Impact
97%
2.15xAverage score across 5 eval scenarios
Passed
No known issues
{
"context": "Tests whether the agent correctly uses presenterm column layouts for side-by-side content, span-based colored text (with correct HTML tag restrictions), and chooses a diagram over a column wall for quantitative benchmark data.",
"type": "weighted_checklist",
"checklist": [
{
"name": "Column layout used",
"description": "At least one slide uses <!-- column_layout: [...] --> to display content side-by-side",
"max_score": 17
},
{
"name": "Column layout syntax complete",
"description": "Column layout slides include all three required commands: <!-- column_layout: [...] -->, at least one <!-- column: N -->, and <!-- reset_layout -->",
"max_score": 15
},
{
"name": "Span tags for colored text",
"description": "Colored or highlighted text uses <span> tags with style or class attributes (e.g. <span style=\"color: #...\">text</span>)",
"max_score": 18
},
{
"name": "No unsupported HTML tags",
"description": "Does NOT use <div>, <p>, or other non-span HTML tags for text styling",
"max_score": 16
},
{
"name": "Correct slide separator",
"description": "Uses <!-- end_slide --> as the slide separator — does NOT use --- as a slide separator",
"max_score": 2
},
{
"name": "Setext slide titles",
"description": "Slide titles use setext header style (Title\\n===) not ATX headers (# Title)",
"max_score": 2
},
{
"name": "Dark theme set",
"description": "Frontmatter sets theme to one of the dark safe defaults (catppuccin-mocha or dark)",
"max_score": 3
},
{
"name": "Frontmatter completeness",
"description": "Frontmatter includes title, author, and theme fields",
"max_score": 3
},
{
"name": "Incremental reveal on lists",
"description": "At least one slide uses <!-- incremental_lists: true --> or <!-- pause --> to progressively reveal content",
"max_score": 12
},
{
"name": "Diagram for comparison",
"description": "At least one diagram (mermaid +render or d2 +render) is used rather than a plain text table or bullet list for the benchmark/comparison data",
"max_score": 12
}
]
}