Collect and normalize agent logs, discover installed verifiers, and dispatch LLM judges to evaluate adherence. Produces per-session verdicts and aggregated reports.
91
90%
Does it follow best practices?
Impact
96%
3.09xAverage score across 3 eval scenarios
Passed
No known issues
#!/usr/bin/env python3
"""
Merge individual friction review files into an aggregated summary.
Reads friction JSON files from friction/<agent>/<session>.friction.json,
aggregates friction counts by type and agent, and writes friction-summary.json.
No external dependencies.
"""
import argparse
import json
import sys
from collections import defaultdict
from datetime import datetime, timezone
from pathlib import Path
def load_friction_reviews(friction_dir: Path) -> list[dict]:
"""Load all friction review JSON files."""
reviews = []
if not friction_dir.exists():
return reviews
for agent_dir in sorted(friction_dir.iterdir()):
if not agent_dir.is_dir() or agent_dir.name.startswith((".", "_")):
continue
for review_file in sorted(agent_dir.glob("*.friction.json")):
try:
data = json.loads(review_file.read_text(encoding="utf-8"))
data["_source_file"] = str(review_file)
data["_agent_dir"] = agent_dir.name
reviews.append(data)
except (json.JSONDecodeError, OSError) as e:
print(f"Warning: failed to read {review_file}: {e}",
file=sys.stderr)
return reviews
def aggregate(reviews: list[dict]) -> dict:
"""Aggregate friction reviews into summary statistics."""
total_input_tokens = 0
total_output_tokens = 0
# Counts
friction_by_type: dict[str, int] = defaultdict(int)
friction_by_impact: dict[str, int] = defaultdict(int)
friction_by_agent: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
outcome_counts: dict[str, int] = defaultdict(int)
satisfaction_counts: dict[str, int] = defaultdict(int)
sessions_with_friction = 0
all_friction_events: list[dict] = []
for review in reviews:
meta = review.get("_meta", {})
total_input_tokens += meta.get("input_tokens", 0) or 0
total_output_tokens += meta.get("output_tokens", 0) or 0
agent = review.get("agent", review.get("_agent_dir", "unknown"))
outcome = review.get("outcome", "unknown")
satisfaction = review.get("satisfaction", "unknown")
friction_events = review.get("friction", [])
outcome_counts[outcome] += 1
satisfaction_counts[satisfaction] += 1
if friction_events:
sessions_with_friction += 1
for event in friction_events:
ftype = event.get("type", "unknown")
impact = event.get("impact", "unknown")
friction_by_type[ftype] += 1
friction_by_impact[impact] += 1
friction_by_agent[agent][ftype] += 1
all_friction_events.append({
"session_id": review.get("session_id", "unknown"),
"agent": agent,
"type": ftype,
"description": event.get("description", ""),
"turns": event.get("turns", []),
"impact": impact,
})
total_sessions = len(reviews)
friction_rate = (
round(sessions_with_friction / total_sessions, 2)
if total_sessions > 0
else 0
)
# Cost estimate (haiku pricing)
estimated_cost = (
total_input_tokens / 1_000_000 * 0.80
+ total_output_tokens / 1_000_000 * 4.0
)
return {
"timestamp": datetime.now(timezone.utc).isoformat(),
"sessions_count": total_sessions,
"sessions_with_friction": sessions_with_friction,
"friction_rate": friction_rate,
"total_friction_events": len(all_friction_events),
"by_type": dict(friction_by_type),
"by_impact": dict(friction_by_impact),
"by_agent": {
agent: dict(types)
for agent, types in friction_by_agent.items()
},
"outcomes": dict(outcome_counts),
"satisfaction": dict(satisfaction_counts),
"friction_events": all_friction_events,
"cost": {
"total_input_tokens": total_input_tokens,
"total_output_tokens": total_output_tokens,
"estimated_cost_usd": round(estimated_cost, 4),
},
}
def main():
parser = argparse.ArgumentParser(
description="Merge friction review files into aggregate summary"
)
parser.add_argument(
"--dir",
nargs="+",
required=True,
help="Run directory(ies) containing friction/",
)
parser.add_argument(
"--out",
default=None,
help="Output path (default: <first-dir>/friction-summary.json)",
)
args = parser.parse_args()
reviews = []
for d in args.dir:
friction_dir = Path(d) / "friction"
if not friction_dir.exists():
print(f"Warning: {friction_dir} not found, skipping", file=sys.stderr)
continue
reviews.extend(load_friction_reviews(friction_dir))
if not reviews:
print("No friction review files found", file=sys.stderr)
sys.exit(1)
result = aggregate(reviews)
out_path = Path(args.out) if args.out else Path(args.dir[0]) / "friction-summary.json"
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text(json.dumps(result, indent=2), encoding="utf-8")
# Summary
print(f"Aggregated {result['sessions_count']} sessions")
print(f" Sessions with friction: {result['sessions_with_friction']} ({result['friction_rate']:.0%})")
print(f" Total friction events: {result['total_friction_events']}")
if result["by_type"]:
print("\n By type:")
for ftype, count in sorted(result["by_type"].items(), key=lambda x: -x[1]):
print(f" {ftype}: {count}")
if result["by_impact"]:
print("\n By impact:")
for impact, count in sorted(result["by_impact"].items()):
print(f" {impact}: {count}")
cost = result["cost"]
print(f"\nCost: ${cost['estimated_cost_usd']:.4f} "
f"({cost['total_input_tokens'] + cost['total_output_tokens']:,} tokens)")
print(f"Output: {out_path}")
if __name__ == "__main__":
main()