{
  "context": "The agent was asked to write a publish-ready news and analysis article for the tessl.io blog covering a major MCP Server SDK release. Evaluate the output file article.md for adherence to the news/analysis format, house style, and appropriate handling of unverified third-party claims.",
  "type": "weighted_checklist",
  "checklist": [
    {
      "name": "News structure complete",
      "description": "The article follows the news/analysis sequence: Lead (what happened and why it matters), Context, Details section (version number, key changes, pricing, platform support), Analysis ('So what?'), optional Tessl angle, and What to watch. All required sections are present.",
      "max_score": 12
    },
    {
      "name": "Concrete details present",
      "description": "The article includes specific details from the brief: version 2.0.0, the streaming context transport, OAuth 2.1 support, the context.push() deprecation, the $0.004/1,000 token pricing for the hosted relay, and platform versions (Node.js 18+, Python 3.11+, Go 1.22+).",
      "max_score": 10
    },
    {
      "name": "Suggestive language for claims",
      "description": "Unverified or third-party claims are phrased with suggestive language throughout. The article uses formulations like 'aims to', 'according to', 'reportedly', or 'the team says' rather than stating claimed improvements as established facts. The unverified latency improvements are specifically not stated as fact.",
      "max_score": 12
    },
    {
      "name": "No sycophantic language",
      "description": "The article does not use words like 'exciting', 'incredible', 'amazing', or similar enthusiasm-signaling terms when covering the announcement. The tone is informative and neutral, not celebratory.",
      "max_score": 8
    },
    {
      "name": "Analysis section quality",
      "description": "The 'So what?' analysis section goes beyond summarizing the announcement. It explains the practical implications for developers building agents, includes the writer's perspective, and is specific enough to be useful to the target audience.",
      "max_score": 10
    },
    {
      "name": "Skills terminology",
      "description": "If the article references Tessl features, it uses 'skills' not 'tiles'. If 'tiles' appears, it is only in historical context with the explanation 'skills (previously called tiles)'.",
      "max_score": 6
    },
    {
      "name": "What to watch closing",
      "description": "The article ends with a 'what to watch' section or equivalent that identifies concrete signals or developments readers should monitor. The closing is specific, not generic.",
      "max_score": 8
    },
    {
      "name": "No hype language",
      "description": "The article contains none of the prohibited terms: 'revolutionary', 'game-changing', 'cutting-edge', 'unlock', 'supercharge'. Industry trend statements use suggestive phrasing ('appears to be moving toward', 'is gaining traction') not declarative phrasing.",
      "max_score": 8
    },
    {
      "name": "No em dashes",
      "description": "The article contains zero em dashes. Any sentence that might naturally use an em dash has been restructured using a comma, period, or colon.",
      "max_score": 6
    },
    {
      "name": "SEO metadata complete",
      "description": "Metadata block includes: title (<60 chars), primary keyword from the MCP & integrations cluster or related cluster, meta description (130-155 characters), URL slug (3-6 words, lowercase, hyphenated), at least 2 internal links with contextual anchor text, and estimated read time.",
      "max_score": 10
    },
    {
      "name": "Word count",
      "description": "The article body is between 800 and 1500 words, appropriate for the news/analysis format.",
      "max_score": 10
    }
  ]
}

bapfernandez/article-creator

criteria.json.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}evals/scenario-2/

criteria.jsonevals/scenario-2/