CtrlK
BlogDocsLog inGet started
Tessl Logo

tessleng/agent-insight-experiment

Scan a repository to surface actionable findings about agent performance. Analyzes source code, git history, GitHub data, agent logs, and agent context, then synthesizes cross-referenced findings with targeted actions informed by Tessl product awareness. Supports incremental multi-developer contributions and produces a self-contained HTML report.

70

Quality

88%

Does it follow best practices?

Impact

No eval scenarios have been run

SecuritybySnyk

Advisory

Suggest reviewing before use

Overview
Quality
Evals
Security
Files

git-data-collector.shskills/analyze-git-history/scripts/

#!/usr/bin/env bash
# git-data-collector.sh — Collect git history metrics for the
# analyze-git-history skill. Gathers repository vitals, file churn,
# co-change pairs, reverts, contributor concentration, large commits,
# message conventions, and pattern shifts — all in a single pass.
#
# Usage:
#   bash git-data-collector.sh [--root <dir>] [--months <n>] [--out <path>]
#
# Defaults:
#   --root    current working directory
#   --months  6
#
# Output: JSON to stdout (or to --out <path> if given).
# Requires: git. Uses jq for JSON assembly if available, otherwise
# falls back to manual construction.

set -euo pipefail

# ── CLI parsing ──────────────────────────────────────────────────────

ROOT="$(pwd)"
MONTHS=6
OUT=""

while [[ $# -gt 0 ]]; do
  case "$1" in
    --root)   ROOT="$2"; shift 2 ;;
    --months) MONTHS="$2"; shift 2 ;;
    --out)    OUT="$2"; shift 2 ;;
    -h|--help)
      echo "Usage: bash git-data-collector.sh [--root <dir>] [--months <n>] [--out <path>]"
      exit 0
      ;;
    *) echo "Unknown option: $1" >&2; exit 1 ;;
  esac
done

GIT="git -C $ROOT"

# Verify git repo
if ! $GIT rev-parse --git-dir >/dev/null 2>&1; then
  echo '{"error": "Not a git repository"}' >&2
  exit 1
fi

SINCE="${MONTHS} months ago"
SINCE_HALF=$(( MONTHS / 2 ))

HAS_JQ=false
command -v jq >/dev/null 2>&1 && HAS_JQ=true

# ── Helper: escape a string for JSON ─────────────────────────────────

json_escape() {
  local s="$1"
  s="${s//\\/\\\\}"
  s="${s//\"/\\\"}"
  s="${s//$'\n'/\\n}"
  s="${s//$'\r'/\\r}"
  s="${s//$'\t'/\\t}"
  printf '%s' "$s"
}

# ── Helper: build a JSON array of strings from lines ─────────────────

lines_to_json_array() {
  local first=true
  printf '['
  while IFS= read -r line; do
    [ -z "$line" ] && continue
    if $first; then first=false; else printf ','; fi
    printf '"%s"' "$(json_escape "$line")"
  done
  printf ']'
}

# ── Step 1: Repository vitals ────────────────────────────────────────

TOTAL_COMMITS=$($GIT rev-list --count HEAD 2>/dev/null || echo 0)
UNIQUE_AUTHORS=$($GIT log --format='%ae' 2>/dev/null | sort -u | wc -l | tr -d ' ')
LAST_COMMIT_DATE=$($GIT log -1 --format='%ci' 2>/dev/null || echo "unknown")
RECENT_COMMITS=$($GIT log --since="$SINCE" --oneline 2>/dev/null | wc -l | tr -d ' ')

# ── Step 2: File churn (top 40) ──────────────────────────────────────

# Format: "count\tfile"
CHURN_RAW=$($GIT log --since="$SINCE" --name-only --pretty=format: 2>/dev/null \
  | grep -v '^$' | sort | uniq -c | sort -rn | head -40 || true)

CHURN_JSON="["
CHURN_FIRST=true
while IFS= read -r line; do
  [ -z "$line" ] && continue
  count=$(echo "$line" | awk '{print $1}')
  file=$(echo "$line" | awk '{$1=""; print substr($0,2)}')
  if $CHURN_FIRST; then CHURN_FIRST=false; else CHURN_JSON+=","; fi
  CHURN_JSON+="{\"file\":\"$(json_escape "$file")\",\"changes\":$count}"
done <<< "$CHURN_RAW"
CHURN_JSON+="]"

# ── Step 3: Co-change pairs (top 10 high-churn files) ────────────────

# Get top 10 files from churn
TOP_FILES=$($GIT log --since="$SINCE" --name-only --pretty=format: 2>/dev/null \
  | grep -v '^$' | sort | uniq -c | sort -rn | head -10 | awk '{$1=""; print substr($0,2)}' || true)

COCHANGE_JSON="["
COCHANGE_FIRST=true
while IFS= read -r file; do
  [ -z "$file" ] && continue

  # Get SHAs that touched this file, then find co-changed files
  COCHANGED=$($GIT log --since="$SINCE" --pretty=format:"%H" -- "$file" 2>/dev/null \
    | head -20 \
    | while read -r sha; do
        $GIT diff-tree --no-commit-id --name-only -r "$sha" 2>/dev/null
      done \
    | grep -v "^${file}$" \
    | sort | uniq -c | sort -rn | head -10 || true)

  PAIRS_JSON="["
  PAIRS_FIRST=true
  while IFS= read -r pair_line; do
    [ -z "$pair_line" ] && continue
    pcount=$(echo "$pair_line" | awk '{print $1}')
    pfile=$(echo "$pair_line" | awk '{$1=""; print substr($0,2)}')
    if $PAIRS_FIRST; then PAIRS_FIRST=false; else PAIRS_JSON+=","; fi
    PAIRS_JSON+="{\"file\":\"$(json_escape "$pfile")\",\"co_changes\":$pcount}"
  done <<< "$COCHANGED"
  PAIRS_JSON+="]"

  if $COCHANGE_FIRST; then COCHANGE_FIRST=false; else COCHANGE_JSON+=","; fi
  COCHANGE_JSON+="{\"file\":\"$(json_escape "$file")\",\"co_changed_with\":$PAIRS_JSON}"
done <<< "$TOP_FILES"
COCHANGE_JSON+="]"

# ── Step 4: Reverts ──────────────────────────────────────────────────

REVERTS_RAW=$($GIT log --since="$SINCE" --oneline --grep="revert" -i 2>/dev/null || true)

REVERTS_JSON="["
REVERTS_FIRST=true
while IFS= read -r line; do
  [ -z "$line" ] && continue
  sha=$(echo "$line" | awk '{print $1}')
  subject=$(echo "$line" | cut -d' ' -f2-)
  if $REVERTS_FIRST; then REVERTS_FIRST=false; else REVERTS_JSON+=","; fi
  REVERTS_JSON+="{\"sha\":\"$sha\",\"subject\":\"$(json_escape "$subject")\"}"
done <<< "$REVERTS_RAW"
REVERTS_JSON+="]"

# ── Step 5: Fix-up commits (capped at 30) ────────────────────────────

FIXES_RAW=$($GIT log --since="$SINCE" --oneline --grep="fix" -i 2>/dev/null | head -30 || true)

FIXES_JSON="["
FIXES_FIRST=true
while IFS= read -r line; do
  [ -z "$line" ] && continue
  sha=$(echo "$line" | awk '{print $1}')
  subject=$(echo "$line" | cut -d' ' -f2-)
  if $FIXES_FIRST; then FIXES_FIRST=false; else FIXES_JSON+=","; fi
  FIXES_JSON+="{\"sha\":\"$sha\",\"subject\":\"$(json_escape "$subject")\"}"
done <<< "$FIXES_RAW"
FIXES_JSON+="]"

# ── Step 6: Contributor concentration (top 20 directories) ───────────

TOP_DIRS=$($GIT log --since="$SINCE" --name-only --pretty=format: 2>/dev/null \
  | grep -v '^$' \
  | sed 's|/[^/]*$||' \
  | sort | uniq -c | sort -rn | head -20 | awk '{$1=""; print substr($0,2)}' || true)

CONTRIB_JSON="["
CONTRIB_FIRST=true
while IFS= read -r dir; do
  [ -z "$dir" ] && continue

  AUTHORS_RAW=$($GIT log --since="$SINCE" --format='%ae' -- "$dir" 2>/dev/null \
    | sort | uniq -c | sort -rn | head -5 || true)

  AUTHORS_JSON="["
  AUTH_FIRST=true
  while IFS= read -r auth_line; do
    [ -z "$auth_line" ] && continue
    acount=$(echo "$auth_line" | awk '{print $1}')
    aemail=$(echo "$auth_line" | awk '{print $2}')
    if $AUTH_FIRST; then AUTH_FIRST=false; else AUTHORS_JSON+=","; fi
    AUTHORS_JSON+="{\"email\":\"$(json_escape "$aemail")\",\"commits\":$acount}"
  done <<< "$AUTHORS_RAW"
  AUTHORS_JSON+="]"

  if $CONTRIB_FIRST; then CONTRIB_FIRST=false; else CONTRIB_JSON+=","; fi
  CONTRIB_JSON+="{\"directory\":\"$(json_escape "$dir")\",\"authors\":$AUTHORS_JSON}"
done <<< "$TOP_DIRS"
CONTRIB_JSON+="]"

# ── Step 7: Large commits (>20 files changed) ────────────────────────

LARGE_RAW=$($GIT log --since="$SINCE" --pretty=format:"%H %s" --shortstat 2>/dev/null || true)

LARGE_JSON="["
LARGE_FIRST=true
CURRENT_SHA=""
CURRENT_SUBJECT=""
while IFS= read -r line; do
  [ -z "$line" ] && continue

  if echo "$line" | grep -qE '^[0-9a-f]{40} '; then
    CURRENT_SHA=$(echo "$line" | awk '{print $1}')
    CURRENT_SUBJECT=$(echo "$line" | cut -d' ' -f2-)
  elif echo "$line" | grep -qE '^ [0-9]+ file'; then
    files_changed=$(echo "$line" | grep -oE '[0-9]+ file' | awk '{print $1}')
    if [ "$files_changed" -gt 20 ] 2>/dev/null; then
      insertions=$(echo "$line" | grep -oE '[0-9]+ insertion' | awk '{print $1}' || echo "0")
      deletions=$(echo "$line" | grep -oE '[0-9]+ deletion' | awk '{print $1}' || echo "0")
      [ -z "$insertions" ] && insertions=0
      [ -z "$deletions" ] && deletions=0
      if $LARGE_FIRST; then LARGE_FIRST=false; else LARGE_JSON+=","; fi
      LARGE_JSON+="{\"sha\":\"${CURRENT_SHA:0:12}\",\"subject\":\"$(json_escape "$CURRENT_SUBJECT")\",\"files_changed\":$files_changed,\"insertions\":$insertions,\"deletions\":$deletions}"
    fi
  fi
done <<< "$LARGE_RAW"
LARGE_JSON+="]"

# ── Step 8: Commit message prefixes (top 20) ─────────────────────────

PREFIX_RAW=$($GIT log --since="$SINCE" --pretty=format:"%s" 2>/dev/null \
  | sed -E 's/\(.*//' | sed -E 's/:.*//' | sed -E 's/^(.{0,30}).*/\1/' \
  | sort | uniq -c | sort -rn | head -20 || true)

PREFIX_JSON="["
PREFIX_FIRST=true
while IFS= read -r line; do
  [ -z "$line" ] && continue
  pcount=$(echo "$line" | awk '{print $1}')
  prefix=$(echo "$line" | awk '{$1=""; print substr($0,2)}')
  if $PREFIX_FIRST; then PREFIX_FIRST=false; else PREFIX_JSON+=","; fi
  PREFIX_JSON+="{\"prefix\":\"$(json_escape "$prefix")\",\"count\":$pcount}"
done <<< "$PREFIX_RAW"
PREFIX_JSON+="]"

# ── Step 9: Pattern shifts (recent half vs older half) ────────────────

RECENT_DIRS=$($GIT log --since="${SINCE_HALF} months ago" --name-only --pretty=format: 2>/dev/null \
  | grep -v '^$' | sed 's|/[^/]*$||' | sort | uniq -c | sort -rn | head -20 || true)

OLDER_DIRS=$($GIT log --since="$SINCE" --until="${SINCE_HALF} months ago" --name-only --pretty=format: 2>/dev/null \
  | grep -v '^$' | sed 's|/[^/]*$||' | sort | uniq -c | sort -rn | head -20 || true)

format_dir_activity() {
  local json="["
  local first=true
  while IFS= read -r line; do
    [ -z "$line" ] && continue
    dcount=$(echo "$line" | awk '{print $1}')
    dname=$(echo "$line" | awk '{$1=""; print substr($0,2)}')
    if $first; then first=false; else json+=","; fi
    json+="{\"directory\":\"$(json_escape "$dname")\",\"changes\":$dcount}"
  done <<< "$1"
  json+="]"
  echo "$json"
}

RECENT_DIRS_JSON=$(format_dir_activity "$RECENT_DIRS")
OLDER_DIRS_JSON=$(format_dir_activity "$OLDER_DIRS")

# ── Assemble final JSON ──────────────────────────────────────────────

JSON=$(cat <<EOF
{
  "collected_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
  "root": "$ROOT",
  "analysis_window_months": $MONTHS,
  "vitals": {
    "total_commits": $TOTAL_COMMITS,
    "unique_authors": $UNIQUE_AUTHORS,
    "last_commit_date": "$(json_escape "$LAST_COMMIT_DATE")",
    "recent_commits": $RECENT_COMMITS
  },
  "file_churn": $CHURN_JSON,
  "co_changes": $COCHANGE_JSON,
  "reverts": $REVERTS_JSON,
  "fix_commits": $FIXES_JSON,
  "contributor_concentration": $CONTRIB_JSON,
  "large_commits": $LARGE_JSON,
  "commit_message_prefixes": $PREFIX_JSON,
  "pattern_shifts": {
    "recent_half_months": $SINCE_HALF,
    "recent": $RECENT_DIRS_JSON,
    "older": $OLDER_DIRS_JSON
  }
}
EOF
)

if [ -n "$OUT" ]; then
  echo "$JSON" > "$OUT"
  echo "Git data collected: $OUT" >&2
else
  echo "$JSON"
fi

skills

analyze-git-history

README.md

tile.json