CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl-labs/audit-logs

Collect and normalize agent logs, discover installed verifiers, and dispatch LLM judges to evaluate adherence. Produces per-session verdicts and aggregated reports.

91

3.09x
Quality

90%

Does it follow best practices?

Impact

96%

3.09x

Average score across 3 eval scenarios

SecuritybySnyk

Passed

No known issues

Overview
Quality
Evals
Security
Files

merge_friction.pyskills/friction-review/scripts/

#!/usr/bin/env python3
"""
Merge individual friction review files into an aggregated summary.

Reads friction JSON files from friction/<agent>/<session>.friction.json,
aggregates friction counts by type and agent, and writes friction-summary.json.

No external dependencies.
"""

import argparse
import json
import sys
from collections import defaultdict
from datetime import datetime, timezone
from pathlib import Path


def load_friction_reviews(friction_dir: Path) -> list[dict]:
    """Load all friction review JSON files."""
    reviews = []
    if not friction_dir.exists():
        return reviews

    for agent_dir in sorted(friction_dir.iterdir()):
        if not agent_dir.is_dir() or agent_dir.name.startswith((".", "_")):
            continue
        for review_file in sorted(agent_dir.glob("*.friction.json")):
            try:
                data = json.loads(review_file.read_text(encoding="utf-8"))
                data["_source_file"] = str(review_file)
                data["_agent_dir"] = agent_dir.name
                reviews.append(data)
            except (json.JSONDecodeError, OSError) as e:
                print(f"Warning: failed to read {review_file}: {e}",
                      file=sys.stderr)

    return reviews


def aggregate(reviews: list[dict]) -> dict:
    """Aggregate friction reviews into summary statistics."""
    total_input_tokens = 0
    total_output_tokens = 0

    # Counts
    friction_by_type: dict[str, int] = defaultdict(int)
    friction_by_impact: dict[str, int] = defaultdict(int)
    friction_by_agent: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
    outcome_counts: dict[str, int] = defaultdict(int)
    satisfaction_counts: dict[str, int] = defaultdict(int)

    sessions_with_friction = 0
    all_friction_events: list[dict] = []

    for review in reviews:
        meta = review.get("_meta", {})
        total_input_tokens += meta.get("input_tokens", 0) or 0
        total_output_tokens += meta.get("output_tokens", 0) or 0

        agent = review.get("agent", review.get("_agent_dir", "unknown"))
        outcome = review.get("outcome", "unknown")
        satisfaction = review.get("satisfaction", "unknown")
        friction_events = review.get("friction", [])

        outcome_counts[outcome] += 1
        satisfaction_counts[satisfaction] += 1

        if friction_events:
            sessions_with_friction += 1

        for event in friction_events:
            ftype = event.get("type", "unknown")
            impact = event.get("impact", "unknown")

            friction_by_type[ftype] += 1
            friction_by_impact[impact] += 1
            friction_by_agent[agent][ftype] += 1

            all_friction_events.append({
                "session_id": review.get("session_id", "unknown"),
                "agent": agent,
                "type": ftype,
                "description": event.get("description", ""),
                "turns": event.get("turns", []),
                "impact": impact,
            })

    total_sessions = len(reviews)
    friction_rate = (
        round(sessions_with_friction / total_sessions, 2)
        if total_sessions > 0
        else 0
    )

    # Cost estimate (haiku pricing)
    estimated_cost = (
        total_input_tokens / 1_000_000 * 0.80
        + total_output_tokens / 1_000_000 * 4.0
    )

    return {
        "timestamp": datetime.now(timezone.utc).isoformat(),
        "sessions_count": total_sessions,
        "sessions_with_friction": sessions_with_friction,
        "friction_rate": friction_rate,
        "total_friction_events": len(all_friction_events),
        "by_type": dict(friction_by_type),
        "by_impact": dict(friction_by_impact),
        "by_agent": {
            agent: dict(types)
            for agent, types in friction_by_agent.items()
        },
        "outcomes": dict(outcome_counts),
        "satisfaction": dict(satisfaction_counts),
        "friction_events": all_friction_events,
        "cost": {
            "total_input_tokens": total_input_tokens,
            "total_output_tokens": total_output_tokens,
            "estimated_cost_usd": round(estimated_cost, 4),
        },
    }


def main():
    parser = argparse.ArgumentParser(
        description="Merge friction review files into aggregate summary"
    )
    parser.add_argument(
        "--dir",
        nargs="+",
        required=True,
        help="Run directory(ies) containing friction/",
    )
    parser.add_argument(
        "--out",
        default=None,
        help="Output path (default: <first-dir>/friction-summary.json)",
    )
    args = parser.parse_args()

    reviews = []
    for d in args.dir:
        friction_dir = Path(d) / "friction"
        if not friction_dir.exists():
            print(f"Warning: {friction_dir} not found, skipping", file=sys.stderr)
            continue
        reviews.extend(load_friction_reviews(friction_dir))

    if not reviews:
        print("No friction review files found", file=sys.stderr)
        sys.exit(1)

    result = aggregate(reviews)

    out_path = Path(args.out) if args.out else Path(args.dir[0]) / "friction-summary.json"
    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_text(json.dumps(result, indent=2), encoding="utf-8")

    # Summary
    print(f"Aggregated {result['sessions_count']} sessions")
    print(f"  Sessions with friction: {result['sessions_with_friction']} ({result['friction_rate']:.0%})")
    print(f"  Total friction events: {result['total_friction_events']}")

    if result["by_type"]:
        print("\n  By type:")
        for ftype, count in sorted(result["by_type"].items(), key=lambda x: -x[1]):
            print(f"    {ftype}: {count}")

    if result["by_impact"]:
        print("\n  By impact:")
        for impact, count in sorted(result["by_impact"].items()):
            print(f"    {impact}: {count}")

    cost = result["cost"]
    print(f"\nCost: ${cost['estimated_cost_usd']:.4f} "
          f"({cost['total_input_tokens'] + cost['total_output_tokens']:,} tokens)")
    print(f"Output: {out_path}")


if __name__ == "__main__":
    main()

tile.json