CtrlK
BlogDocsLog inGet started
Tessl Logo

tessleng/agent-insight-experiment

Scan a repository to surface actionable findings about agent performance. Analyzes source code, git history, GitHub data, agent logs, and agent context, then synthesizes cross-referenced findings with targeted actions informed by Tessl product awareness. Supports incremental multi-developer contributions and produces a self-contained HTML report.

70

Quality

88%

Does it follow best practices?

Impact

No eval scenarios have been run

SecuritybySnyk

Advisory

Suggest reviewing before use

Overview
Quality
Evals
Security
Files

github-data-collector.shskills/analyze-github-data/scripts/

#!/usr/bin/env bash
# github-data-collector.sh — Collect GitHub data for the
# analyze-github-data skill. Gathers PR listings, agent-authored PRs,
# review comments, CI run data, issues, and PR iteration depth — all
# via the gh CLI in a single pass.
#
# Usage:
#   bash github-data-collector.sh [--root <dir>] [--out <path>]
#
# Defaults:
#   --root  current working directory
#
# Output: JSON to stdout (or to --out <path> if given).
# Requires: gh (authenticated), jq.

set -euo pipefail

# ── CLI parsing ──────────────────────────────────────────────────────

ROOT="$(pwd)"
OUT=""

while [[ $# -gt 0 ]]; do
  case "$1" in
    --root) ROOT="$2"; shift 2 ;;
    --out)  OUT="$2"; shift 2 ;;
    -h|--help)
      echo "Usage: bash github-data-collector.sh [--root <dir>] [--out <path>]"
      exit 0
      ;;
    *) echo "Unknown option: $1" >&2; exit 1 ;;
  esac
done

cd "$ROOT"

# ── Prerequisite checks ──────────────────────────────────────────────

if ! command -v gh >/dev/null 2>&1; then
  echo '{"error": "gh CLI not found. Install from https://cli.github.com/"}' >&2
  exit 1
fi

# Real API probe — gh auth status also fails inside Cursor's macOS sandbox
# because the keychain isn't reachable, even when the token is valid. An
# actual API call distinguishes "sandboxed keychain" from "not logged in"
# and from "token rejected by GitHub".
GH_PREFLIGHT_ERR="$(mktemp -t gh-preflight.XXXXXX.err)"
trap 'rm -f "$GH_PREFLIGHT_ERR"' EXIT
if ! gh api user --jq .login >/dev/null 2>"$GH_PREFLIGHT_ERR"; then
  if grep -qiE 'keyring|token.*invalid|secret storage' "$GH_PREFLIGHT_ERR"; then
    echo '{"error": "gh is authenticated on your machine but cannot read the token from the macOS keychain inside this sandbox. Fix: add `export GH_TOKEN=\"$(gh auth token)\"` to your shell profile and restart the agent, or grant the agent elevated shell permissions."}' >&2
  elif grep -qiE '401|unauthori[sz]ed|bad credentials' "$GH_PREFLIGHT_ERR"; then
    echo '{"error": "gh token rejected by GitHub (401). Run: gh auth login -h github.com"}' >&2
  else
    # Strip control chars, then escape backslashes and quotes so the
    # embedded stderr can't break the JSON envelope.
    probe_err=$(head -c 300 "$GH_PREFLIGHT_ERR" | tr -d '\000-\037\177')
    probe_err_escaped=$(printf '%s' "$probe_err" | sed -e 's/\\/\\\\/g' -e 's/"/\\"/g')
    echo "{\"error\": \"gh API probe failed: $probe_err_escaped\"}" >&2
  fi
  exit 1
fi

if ! command -v jq >/dev/null 2>&1; then
  echo '{"error": "jq not found. Install with: brew install jq"}' >&2
  exit 1
fi

# ── Resolve repo ─────────────────────────────────────────────────────

REPO=$(gh repo view --json nameWithOwner -q '.nameWithOwner' 2>/dev/null || true)
if [ -z "$REPO" ]; then
  echo '{"error": "Could not determine repository. Ensure this is a GitHub-linked git repo."}' >&2
  exit 1
fi

echo "Collecting GitHub data for $REPO..." >&2

# ── Step 1: Merged PRs (last 100) ────────────────────────────────────

echo "  Fetching merged PRs..." >&2
MERGED_PRS=$(gh pr list --state merged --limit 100 \
  --json number,title,author,createdAt,mergedAt,additions,deletions,changedFiles,reviewDecision,labels \
  2>/dev/null || echo '[]')

# ── Step 2: Open PRs (last 30) ───────────────────────────────────────

echo "  Fetching open PRs..." >&2
OPEN_PRS=$(gh pr list --state open --limit 30 \
  --json number,title,author,createdAt,additions,deletions,changedFiles,labels \
  2>/dev/null || echo '[]')

# ── Step 3: Identify agent-authored PRs ──────────────────────────────

echo "  Identifying agent-authored PRs..." >&2
ALL_PRS=$(gh pr list --state all --limit 200 \
  --json number,title,author,labels \
  2>/dev/null || echo '[]')

AGENT_PRS=$(echo "$ALL_PRS" | jq '[.[] | select(
  (.author.login | test("bot|ai|copilot|agent|automated|dependabot|renovate"; "i")) or
  ((.labels // [])[]?.name | test("ai|agent|copilot|generated|automated"; "i"))
)]' 2>/dev/null || echo '[]')

# Search for PRs mentioning agent tools in body
AGENT_SEARCH=$(gh search prs --repo "$REPO" --limit 50 --json number,title \
  "co-authored" OR "generated by" OR "cursor" OR "claude" OR "copilot" \
  2>/dev/null || echo '[]')

# ── Step 4: Review comments from last 50 merged PRs ──────────────────

echo "  Fetching review comments (this may take a moment)..." >&2
MERGED_PR_NUMBERS=$(echo "$MERGED_PRS" | jq -r '.[0:50] | .[].number' 2>/dev/null || true)

REVIEW_COMMENTS="["
RC_FIRST=true
for pr in $MERGED_PR_NUMBERS; do
  COMMENTS=$(gh api "repos/$REPO/pulls/$pr/comments" \
    --jq '[.[] | {pr_number: (.pull_request_url | split("/") | last | tonumber), body: .body, path: .path, created_at: .created_at, user: .user.login}]' \
    2>/dev/null || echo '[]')

  # Only include if there are comments
  if [ "$COMMENTS" != "[]" ] && [ -n "$COMMENTS" ]; then
    # Strip outer brackets and append items
    INNER=$(echo "$COMMENTS" | jq -c '.[]' 2>/dev/null || true)
    while IFS= read -r item; do
      [ -z "$item" ] && continue
      if $RC_FIRST; then RC_FIRST=false; else REVIEW_COMMENTS+=","; fi
      REVIEW_COMMENTS+="$item"
    done <<< "$INNER"
  fi
done
REVIEW_COMMENTS+="]"

# Validate the JSON; fall back to empty array if malformed
echo "$REVIEW_COMMENTS" | jq . >/dev/null 2>&1 || REVIEW_COMMENTS="[]"

# ── Step 5: CI run data ──────────────────────────────────────────────

echo "  Fetching CI runs..." >&2
CI_RUNS=$(gh run list --limit 100 \
  --json databaseId,name,conclusion,headBranch,createdAt \
  2>/dev/null || echo '[]')

# Failure summary by workflow
CI_FAILURE_SUMMARY=$(echo "$CI_RUNS" | jq '
  group_by(.name)
  | map({
      name: .[0].name,
      total: length,
      failed: ([.[] | select(.conclusion == "failure")] | length),
      cancelled: ([.[] | select(.conclusion == "cancelled")] | length)
    })
  | sort_by(-.failed)
' 2>/dev/null || echo '[]')

# Most recent failed run log snippet
FAILED_RUN_LOG=""
FAILED_RUN_ID=$(echo "$CI_RUNS" | jq -r '[.[] | select(.conclusion == "failure")][0].databaseId // empty' 2>/dev/null || true)
if [ -n "$FAILED_RUN_ID" ]; then
  echo "  Fetching failed run log snippet..." >&2
  FAILED_RUN_LOG=$(gh run view "$FAILED_RUN_ID" --log-failed 2>/dev/null | tail -50 || true)
fi

# ── Step 6: Issues (last 100) ────────────────────────────────────────

echo "  Fetching issues..." >&2
ISSUES=$(gh issue list --state all --limit 100 \
  --json number,title,labels,createdAt,closedAt \
  2>/dev/null || echo '[]')

# ── Step 7: PR iteration depth (comment counts) ──────────────────────

echo "  Calculating PR iteration depth..." >&2
ITERATION_DEPTH="["
ID_FIRST=true

# Use the already-fetched merged PR numbers (up to 100)
ALL_MERGED_NUMBERS=$(echo "$MERGED_PRS" | jq -r '.[].number' 2>/dev/null || true)
for pr in $ALL_MERGED_NUMBERS; do
  COUNT=$(gh api "repos/$REPO/pulls/$pr/comments" --jq 'length' 2>/dev/null || echo "0")
  if [ "$COUNT" -gt 0 ] 2>/dev/null; then
    if $ID_FIRST; then ID_FIRST=false; else ITERATION_DEPTH+=","; fi
    ITERATION_DEPTH+="{\"pr\":$pr,\"comment_count\":$COUNT}"
  fi
done
ITERATION_DEPTH+="]"

# Sort by comment count descending, take top 20
ITERATION_DEPTH=$(echo "$ITERATION_DEPTH" | jq '[sort_by(-.comment_count)[0:20] | .[]?]' 2>/dev/null || echo '[]')

# ── Step 8: PR size statistics ────────────────────────────────────────

PR_STATS=$(echo "$MERGED_PRS" | jq '{
  count: length,
  avg_additions: ([.[].additions] | if length > 0 then (add / length | floor) else 0 end),
  avg_deletions: ([.[].deletions] | if length > 0 then (add / length | floor) else 0 end),
  avg_changed_files: ([.[].changedFiles] | if length > 0 then (add / length | floor) else 0 end),
  max_additions: ([.[].additions] | max // 0),
  max_changed_files: ([.[].changedFiles] | max // 0),
  review_decisions: (group_by(.reviewDecision) | map({key: .[0].reviewDecision, value: length}) | from_entries)
}' 2>/dev/null || echo '{}')

# ── Escape the failed run log for JSON embedding ─────────────────────

FAILED_LOG_JSON=$(printf '%s' "$FAILED_RUN_LOG" | jq -Rs . 2>/dev/null || echo '""')

# ── Assemble final JSON ──────────────────────────────────────────────

echo "  Assembling output..." >&2

JSON=$(jq -n \
  --arg collected_at "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
  --arg repo "$REPO" \
  --argjson merged_prs "$MERGED_PRS" \
  --argjson open_prs "$OPEN_PRS" \
  --argjson agent_prs "$AGENT_PRS" \
  --argjson agent_search "$AGENT_SEARCH" \
  --argjson review_comments "$REVIEW_COMMENTS" \
  --argjson ci_runs "$CI_RUNS" \
  --argjson ci_failure_summary "$CI_FAILURE_SUMMARY" \
  --argjson failed_run_log "$FAILED_LOG_JSON" \
  --argjson issues "$ISSUES" \
  --argjson iteration_depth "$ITERATION_DEPTH" \
  --argjson pr_stats "$PR_STATS" \
  '{
    collected_at: $collected_at,
    repo: $repo,
    merged_prs: $merged_prs,
    open_prs: $open_prs,
    agent_authored_prs: $agent_prs,
    agent_search_results: $agent_search,
    review_comments: $review_comments,
    ci_runs: $ci_runs,
    ci_failure_summary: $ci_failure_summary,
    failed_run_log_snippet: $failed_run_log,
    issues: $issues,
    pr_iteration_depth: $iteration_depth,
    pr_statistics: $pr_stats,
    counts: {
      merged_prs: ($merged_prs | length),
      open_prs: ($open_prs | length),
      agent_prs: ($agent_prs | length),
      review_comments: ($review_comments | length),
      ci_runs: ($ci_runs | length),
      issues: ($issues | length)
    }
  }')

if [ -n "$OUT" ]; then
  echo "$JSON" > "$OUT"
  echo "GitHub data collected: $OUT" >&2
else
  echo "$JSON"
fi

echo "Done." >&2

skills

analyze-github-data

README.md

tile.json