Systematic diary exploration: discover tags, entry distribution, coverage gaps, agent mistakes, and compile recipes
86
90%
Does it follow best practices?
Impact
81%
1.06xAverage score across 5 eval scenarios
Advisory
Suggest reviewing before use
{
"context": "Tests whether the agent correctly implements Phase 1 inventory analysis: pagination with configurable batch size, entry type counting, tag frequency, tag namespace discovery from data (not hardcoded), importance histogram, and temporal range.",
"type": "weighted_checklist",
"checklist": [
{
"name": "Pagination function",
"description": "paginateEntries function accepts a fetch function and batch size parameter, calling repeatedly with increasing offset until all entries are retrieved",
"max_score": 10
},
{
"name": "Batch size 50",
"description": "Default or recommended batch size is 50 entries per page",
"max_score": 8
},
{
"name": "Entry type counts",
"description": "Report includes count per entryType value (procedural, semantic, episodic, reflection, identity)",
"max_score": 8
},
{
"name": "Tag frequency",
"description": "Report includes frequency count for every distinct tag across all entries",
"max_score": 8
},
{
"name": "Tag namespace grouping",
"description": "Tags are grouped by prefix before the first colon (e.g., risk:, branch:, scope:) with distinct values listed under each namespace",
"max_score": 12
},
{
"name": "Namespace discovery not hardcoded",
"description": "Code discovers namespaces dynamically from the data rather than checking a fixed list of known prefixes",
"max_score": 12
},
{
"name": "Importance histogram",
"description": "Report includes distribution of importance values across the 1-10 range",
"max_score": 8
},
{
"name": "Temporal range",
"description": "Report includes earliest and most recent entry dates",
"max_score": 8
},
{
"name": "Sample report generated",
"description": "inventory-report.md contains a rendered report from the sample data with markdown tables",
"max_score": 10
},
{
"name": "Tag tree format",
"description": "Tag namespaces are presented as an indented tree (namespace → values with counts) rather than a flat list",
"max_score": 8
},
{
"name": "Total entry count",
"description": "Report header includes the total number of entries analyzed",
"max_score": 8
}
]
}