Scan a repository to surface actionable findings about agent performance. Analyzes source code, git history, GitHub data, agent logs, and agent context, then synthesizes cross-referenced findings with targeted actions informed by Tessl product awareness. Supports incremental multi-developer contributions and produces a self-contained HTML report.
70
88%
Does it follow best practices?
Impact
—
No eval scenarios have been run
Advisory
Suggest reviewing before use
#!/usr/bin/env bash
# git-data-collector.sh — Collect git history metrics for the
# analyze-git-history skill. Gathers repository vitals, file churn,
# co-change pairs, reverts, contributor concentration, large commits,
# message conventions, and pattern shifts — all in a single pass.
#
# Usage:
# bash git-data-collector.sh [--root <dir>] [--months <n>] [--out <path>]
#
# Defaults:
# --root current working directory
# --months 6
#
# Output: JSON to stdout (or to --out <path> if given).
# Requires: git. Uses jq for JSON assembly if available, otherwise
# falls back to manual construction.
set -euo pipefail
# ── CLI parsing ──────────────────────────────────────────────────────
ROOT="$(pwd)"
MONTHS=6
OUT=""
while [[ $# -gt 0 ]]; do
case "$1" in
--root) ROOT="$2"; shift 2 ;;
--months) MONTHS="$2"; shift 2 ;;
--out) OUT="$2"; shift 2 ;;
-h|--help)
echo "Usage: bash git-data-collector.sh [--root <dir>] [--months <n>] [--out <path>]"
exit 0
;;
*) echo "Unknown option: $1" >&2; exit 1 ;;
esac
done
GIT="git -C $ROOT"
# Verify git repo
if ! $GIT rev-parse --git-dir >/dev/null 2>&1; then
echo '{"error": "Not a git repository"}' >&2
exit 1
fi
SINCE="${MONTHS} months ago"
SINCE_HALF=$(( MONTHS / 2 ))
HAS_JQ=false
command -v jq >/dev/null 2>&1 && HAS_JQ=true
# ── Helper: escape a string for JSON ─────────────────────────────────
json_escape() {
local s="$1"
s="${s//\\/\\\\}"
s="${s//\"/\\\"}"
s="${s//$'\n'/\\n}"
s="${s//$'\r'/\\r}"
s="${s//$'\t'/\\t}"
printf '%s' "$s"
}
# ── Helper: build a JSON array of strings from lines ─────────────────
lines_to_json_array() {
local first=true
printf '['
while IFS= read -r line; do
[ -z "$line" ] && continue
if $first; then first=false; else printf ','; fi
printf '"%s"' "$(json_escape "$line")"
done
printf ']'
}
# ── Step 1: Repository vitals ────────────────────────────────────────
TOTAL_COMMITS=$($GIT rev-list --count HEAD 2>/dev/null || echo 0)
UNIQUE_AUTHORS=$($GIT log --format='%ae' 2>/dev/null | sort -u | wc -l | tr -d ' ')
LAST_COMMIT_DATE=$($GIT log -1 --format='%ci' 2>/dev/null || echo "unknown")
RECENT_COMMITS=$($GIT log --since="$SINCE" --oneline 2>/dev/null | wc -l | tr -d ' ')
# ── Step 2: File churn (top 40) ──────────────────────────────────────
# Format: "count\tfile"
CHURN_RAW=$($GIT log --since="$SINCE" --name-only --pretty=format: 2>/dev/null \
| grep -v '^$' | sort | uniq -c | sort -rn | head -40 || true)
CHURN_JSON="["
CHURN_FIRST=true
while IFS= read -r line; do
[ -z "$line" ] && continue
count=$(echo "$line" | awk '{print $1}')
file=$(echo "$line" | awk '{$1=""; print substr($0,2)}')
if $CHURN_FIRST; then CHURN_FIRST=false; else CHURN_JSON+=","; fi
CHURN_JSON+="{\"file\":\"$(json_escape "$file")\",\"changes\":$count}"
done <<< "$CHURN_RAW"
CHURN_JSON+="]"
# ── Step 3: Co-change pairs (top 10 high-churn files) ────────────────
# Get top 10 files from churn
TOP_FILES=$($GIT log --since="$SINCE" --name-only --pretty=format: 2>/dev/null \
| grep -v '^$' | sort | uniq -c | sort -rn | head -10 | awk '{$1=""; print substr($0,2)}' || true)
COCHANGE_JSON="["
COCHANGE_FIRST=true
while IFS= read -r file; do
[ -z "$file" ] && continue
# Get SHAs that touched this file, then find co-changed files
COCHANGED=$($GIT log --since="$SINCE" --pretty=format:"%H" -- "$file" 2>/dev/null \
| head -20 \
| while read -r sha; do
$GIT diff-tree --no-commit-id --name-only -r "$sha" 2>/dev/null
done \
| grep -v "^${file}$" \
| sort | uniq -c | sort -rn | head -10 || true)
PAIRS_JSON="["
PAIRS_FIRST=true
while IFS= read -r pair_line; do
[ -z "$pair_line" ] && continue
pcount=$(echo "$pair_line" | awk '{print $1}')
pfile=$(echo "$pair_line" | awk '{$1=""; print substr($0,2)}')
if $PAIRS_FIRST; then PAIRS_FIRST=false; else PAIRS_JSON+=","; fi
PAIRS_JSON+="{\"file\":\"$(json_escape "$pfile")\",\"co_changes\":$pcount}"
done <<< "$COCHANGED"
PAIRS_JSON+="]"
if $COCHANGE_FIRST; then COCHANGE_FIRST=false; else COCHANGE_JSON+=","; fi
COCHANGE_JSON+="{\"file\":\"$(json_escape "$file")\",\"co_changed_with\":$PAIRS_JSON}"
done <<< "$TOP_FILES"
COCHANGE_JSON+="]"
# ── Step 4: Reverts ──────────────────────────────────────────────────
REVERTS_RAW=$($GIT log --since="$SINCE" --oneline --grep="revert" -i 2>/dev/null || true)
REVERTS_JSON="["
REVERTS_FIRST=true
while IFS= read -r line; do
[ -z "$line" ] && continue
sha=$(echo "$line" | awk '{print $1}')
subject=$(echo "$line" | cut -d' ' -f2-)
if $REVERTS_FIRST; then REVERTS_FIRST=false; else REVERTS_JSON+=","; fi
REVERTS_JSON+="{\"sha\":\"$sha\",\"subject\":\"$(json_escape "$subject")\"}"
done <<< "$REVERTS_RAW"
REVERTS_JSON+="]"
# ── Step 5: Fix-up commits (capped at 30) ────────────────────────────
FIXES_RAW=$($GIT log --since="$SINCE" --oneline --grep="fix" -i 2>/dev/null | head -30 || true)
FIXES_JSON="["
FIXES_FIRST=true
while IFS= read -r line; do
[ -z "$line" ] && continue
sha=$(echo "$line" | awk '{print $1}')
subject=$(echo "$line" | cut -d' ' -f2-)
if $FIXES_FIRST; then FIXES_FIRST=false; else FIXES_JSON+=","; fi
FIXES_JSON+="{\"sha\":\"$sha\",\"subject\":\"$(json_escape "$subject")\"}"
done <<< "$FIXES_RAW"
FIXES_JSON+="]"
# ── Step 6: Contributor concentration (top 20 directories) ───────────
TOP_DIRS=$($GIT log --since="$SINCE" --name-only --pretty=format: 2>/dev/null \
| grep -v '^$' \
| sed 's|/[^/]*$||' \
| sort | uniq -c | sort -rn | head -20 | awk '{$1=""; print substr($0,2)}' || true)
CONTRIB_JSON="["
CONTRIB_FIRST=true
while IFS= read -r dir; do
[ -z "$dir" ] && continue
AUTHORS_RAW=$($GIT log --since="$SINCE" --format='%ae' -- "$dir" 2>/dev/null \
| sort | uniq -c | sort -rn | head -5 || true)
AUTHORS_JSON="["
AUTH_FIRST=true
while IFS= read -r auth_line; do
[ -z "$auth_line" ] && continue
acount=$(echo "$auth_line" | awk '{print $1}')
aemail=$(echo "$auth_line" | awk '{print $2}')
if $AUTH_FIRST; then AUTH_FIRST=false; else AUTHORS_JSON+=","; fi
AUTHORS_JSON+="{\"email\":\"$(json_escape "$aemail")\",\"commits\":$acount}"
done <<< "$AUTHORS_RAW"
AUTHORS_JSON+="]"
if $CONTRIB_FIRST; then CONTRIB_FIRST=false; else CONTRIB_JSON+=","; fi
CONTRIB_JSON+="{\"directory\":\"$(json_escape "$dir")\",\"authors\":$AUTHORS_JSON}"
done <<< "$TOP_DIRS"
CONTRIB_JSON+="]"
# ── Step 7: Large commits (>20 files changed) ────────────────────────
LARGE_RAW=$($GIT log --since="$SINCE" --pretty=format:"%H %s" --shortstat 2>/dev/null || true)
LARGE_JSON="["
LARGE_FIRST=true
CURRENT_SHA=""
CURRENT_SUBJECT=""
while IFS= read -r line; do
[ -z "$line" ] && continue
if echo "$line" | grep -qE '^[0-9a-f]{40} '; then
CURRENT_SHA=$(echo "$line" | awk '{print $1}')
CURRENT_SUBJECT=$(echo "$line" | cut -d' ' -f2-)
elif echo "$line" | grep -qE '^ [0-9]+ file'; then
files_changed=$(echo "$line" | grep -oE '[0-9]+ file' | awk '{print $1}')
if [ "$files_changed" -gt 20 ] 2>/dev/null; then
insertions=$(echo "$line" | grep -oE '[0-9]+ insertion' | awk '{print $1}' || echo "0")
deletions=$(echo "$line" | grep -oE '[0-9]+ deletion' | awk '{print $1}' || echo "0")
[ -z "$insertions" ] && insertions=0
[ -z "$deletions" ] && deletions=0
if $LARGE_FIRST; then LARGE_FIRST=false; else LARGE_JSON+=","; fi
LARGE_JSON+="{\"sha\":\"${CURRENT_SHA:0:12}\",\"subject\":\"$(json_escape "$CURRENT_SUBJECT")\",\"files_changed\":$files_changed,\"insertions\":$insertions,\"deletions\":$deletions}"
fi
fi
done <<< "$LARGE_RAW"
LARGE_JSON+="]"
# ── Step 8: Commit message prefixes (top 20) ─────────────────────────
PREFIX_RAW=$($GIT log --since="$SINCE" --pretty=format:"%s" 2>/dev/null \
| sed -E 's/\(.*//' | sed -E 's/:.*//' | sed -E 's/^(.{0,30}).*/\1/' \
| sort | uniq -c | sort -rn | head -20 || true)
PREFIX_JSON="["
PREFIX_FIRST=true
while IFS= read -r line; do
[ -z "$line" ] && continue
pcount=$(echo "$line" | awk '{print $1}')
prefix=$(echo "$line" | awk '{$1=""; print substr($0,2)}')
if $PREFIX_FIRST; then PREFIX_FIRST=false; else PREFIX_JSON+=","; fi
PREFIX_JSON+="{\"prefix\":\"$(json_escape "$prefix")\",\"count\":$pcount}"
done <<< "$PREFIX_RAW"
PREFIX_JSON+="]"
# ── Step 9: Pattern shifts (recent half vs older half) ────────────────
RECENT_DIRS=$($GIT log --since="${SINCE_HALF} months ago" --name-only --pretty=format: 2>/dev/null \
| grep -v '^$' | sed 's|/[^/]*$||' | sort | uniq -c | sort -rn | head -20 || true)
OLDER_DIRS=$($GIT log --since="$SINCE" --until="${SINCE_HALF} months ago" --name-only --pretty=format: 2>/dev/null \
| grep -v '^$' | sed 's|/[^/]*$||' | sort | uniq -c | sort -rn | head -20 || true)
format_dir_activity() {
local json="["
local first=true
while IFS= read -r line; do
[ -z "$line" ] && continue
dcount=$(echo "$line" | awk '{print $1}')
dname=$(echo "$line" | awk '{$1=""; print substr($0,2)}')
if $first; then first=false; else json+=","; fi
json+="{\"directory\":\"$(json_escape "$dname")\",\"changes\":$dcount}"
done <<< "$1"
json+="]"
echo "$json"
}
RECENT_DIRS_JSON=$(format_dir_activity "$RECENT_DIRS")
OLDER_DIRS_JSON=$(format_dir_activity "$OLDER_DIRS")
# ── Assemble final JSON ──────────────────────────────────────────────
JSON=$(cat <<EOF
{
"collected_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
"root": "$ROOT",
"analysis_window_months": $MONTHS,
"vitals": {
"total_commits": $TOTAL_COMMITS,
"unique_authors": $UNIQUE_AUTHORS,
"last_commit_date": "$(json_escape "$LAST_COMMIT_DATE")",
"recent_commits": $RECENT_COMMITS
},
"file_churn": $CHURN_JSON,
"co_changes": $COCHANGE_JSON,
"reverts": $REVERTS_JSON,
"fix_commits": $FIXES_JSON,
"contributor_concentration": $CONTRIB_JSON,
"large_commits": $LARGE_JSON,
"commit_message_prefixes": $PREFIX_JSON,
"pattern_shifts": {
"recent_half_months": $SINCE_HALF,
"recent": $RECENT_DIRS_JSON,
"older": $OLDER_DIRS_JSON
}
}
EOF
)
if [ -n "$OUT" ]; then
echo "$JSON" > "$OUT"
echo "Git data collected: $OUT" >&2
else
echo "$JSON"
fi