A curated collection of Agent Skills for working with dbt, to help AI agents understand and execute dbt workflows more effectively.
91
Does it follow best practices?
Validation for skill structure
"""CLI entry point for skill-eval."""
from pathlib import Path
from typing import Optional
import typer
from skill_eval import __version__
from skill_eval.logging import logger
from skill_eval.selector import is_interactive, select_run, select_scenarios
app = typer.Typer(help="A/B test skill variations against recorded scenarios.")
def get_latest_run(runs_dir: Path, *, silent: bool = False) -> Path:
"""Get the most recent run directory.
Args:
runs_dir: Directory containing runs
silent: If True, don't print the run name
Returns:
Path to the most recent run directory
Raises:
typer.Exit: If no runs found
"""
if not runs_dir.exists():
typer.echo("Error: No runs directory found", err=True)
raise typer.Exit(1)
run_dirs = sorted(
[d for d in runs_dir.iterdir() if d.is_dir() and not d.name.startswith(".")],
reverse=True,
)
if not run_dirs:
typer.echo("Error: No runs found", err=True)
raise typer.Exit(1)
if not silent:
typer.echo(f"Using latest run: {run_dirs[0].name}")
return run_dirs[0]
def find_run(runs_dir: Path, run_id: Optional[str], *, latest: bool = False) -> Path:
"""Find a run by exact or partial ID match, or get latest/prompt if no ID provided.
Args:
runs_dir: Directory containing runs
run_id: Full or partial run ID, or None for latest/interactive
latest: If True and no run_id, use latest without prompting
Returns:
Path to the matching run directory
Raises:
typer.Exit: If no match or multiple matches found
"""
if not runs_dir.exists():
typer.echo("Error: No runs directory found", err=True)
raise typer.Exit(1)
# Get all run directories
all_runs = [
d for d in runs_dir.iterdir() if d.is_dir() and not d.name.startswith(".")
]
if not all_runs:
typer.echo("Error: No runs found", err=True)
raise typer.Exit(1)
if run_id is None:
# No run_id provided - decide behavior based on flags and interactivity
if latest:
return get_latest_run(runs_dir)
if is_interactive():
# Show interactive selector
selected = select_run(all_runs, "Select a run")
if selected is None:
typer.echo("Selection cancelled", err=True)
raise typer.Exit(1)
typer.echo(f"Selected run: {selected.name}")
return selected
else:
# Non-interactive: fall back to latest
return get_latest_run(runs_dir)
# Try exact match first
exact_match = runs_dir / run_id
if exact_match.exists() and exact_match.is_dir():
return exact_match
# Try partial match (contains)
matches = [d for d in all_runs if run_id in d.name]
if len(matches) == 1:
typer.echo(f"Matched run: {matches[0].name}")
return matches[0]
elif len(matches) > 1:
# Multiple matches
if is_interactive():
# Show selector with only the matching runs
selected = select_run(matches, f"Multiple runs match '{run_id}'")
if selected is None:
typer.echo("Selection cancelled", err=True)
raise typer.Exit(1)
typer.echo(f"Selected run: {selected.name}")
return selected
else:
typer.echo(f"Error: '{run_id}' matches multiple runs:", err=True)
for m in sorted(matches, key=lambda d: d.name, reverse=True)[:10]:
typer.echo(f" - {m.name}", err=True)
if len(matches) > 10:
typer.echo(f" ... and {len(matches) - 10} more", err=True)
raise typer.Exit(1)
else:
typer.echo(f"Error: No run matching '{run_id}'", err=True)
recent = sorted(all_runs, key=lambda d: d.name, reverse=True)[:5]
if recent:
typer.echo("Recent runs:", err=True)
for r in recent:
typer.echo(f" - {r.name}", err=True)
raise typer.Exit(1)
def find_scenarios(
scenarios_dir: Path, names: list[str] | None, *, all_flag: bool = False
) -> list[Path]:
"""Find scenarios by name or prompt for selection.
Args:
scenarios_dir: Directory containing scenarios
names: List of scenario names (can be partial matches), or None
all_flag: If True, return all scenarios without prompting
Returns:
List of scenario directory paths
Raises:
typer.Exit: If no scenarios found or selection cancelled
"""
if not scenarios_dir.exists():
typer.echo("Error: No scenarios directory found", err=True)
raise typer.Exit(1)
# Get all scenario directories (exclude only hidden dirs starting with .)
all_scenarios = [
d
for d in scenarios_dir.iterdir()
if d.is_dir() and not d.name.startswith(".")
]
if not all_scenarios:
typer.echo("Error: No scenarios found", err=True)
raise typer.Exit(1)
if all_flag:
return sorted(all_scenarios, key=lambda d: d.name)
if names:
# Match provided names
matched: list[Path] = []
for name in names:
# Try exact match first
exact = scenarios_dir / name
if exact.exists() and exact.is_dir():
matched.append(exact)
continue
# Try partial match
partial_matches = [d for d in all_scenarios if name in d.name]
if len(partial_matches) == 1:
matched.append(partial_matches[0])
elif len(partial_matches) > 1:
typer.echo(f"Error: '{name}' matches multiple scenarios:", err=True)
for m in partial_matches:
typer.echo(f" - {m.name}", err=True)
raise typer.Exit(1)
else:
typer.echo(f"Error: No scenario matching '{name}'", err=True)
raise typer.Exit(1)
return matched
# No names provided - decide behavior based on interactivity
if is_interactive():
# Show interactive multi-selector
selected = select_scenarios(all_scenarios, "Select scenarios to run")
if not selected:
typer.echo("No scenarios selected", err=True)
raise typer.Exit(1)
typer.echo(f"Selected {len(selected)} scenario(s)")
return selected
else:
# Non-interactive without --all: error
typer.echo("Error: Specify scenario names or use --all", err=True)
raise typer.Exit(1)
def version_callback(value: bool) -> None:
if value:
typer.echo(f"skill-eval {__version__}")
raise typer.Exit()
@app.callback()
def main(
version: bool = typer.Option(
False, "--version", "-v", callback=version_callback, is_eager=True
),
) -> None:
"""Skill evaluation CLI."""
pass
@app.command()
def run(
scenarios: Optional[list[str]] = typer.Argument(
None, help="Scenario names to run (supports partial matches)"
),
all_scenarios: bool = typer.Option(False, "--all", help="Run all scenarios"),
parallel: bool = typer.Option(False, "--parallel", "-p", help="Run tasks in parallel"),
workers: int = typer.Option(4, "--workers", "-w", help="Number of parallel workers"),
verbose: bool = typer.Option(False, "--verbose", "-v", help="Show detailed progress (tool calls)"),
) -> None:
"""Run scenarios against skill variants."""
from skill_eval.logging import set_level
from skill_eval.models import load_scenario
from skill_eval.runner import Runner, RunTask
if verbose:
set_level("DEBUG")
evals_dir = Path.cwd()
scenarios_dir = evals_dir / "scenarios"
scenario_dirs = find_scenarios(scenarios_dir, scenarios, all_flag=all_scenarios)
runner = Runner(evals_dir=evals_dir)
run_dir = runner.create_run_dir()
typer.echo(f"Run directory: {run_dir}")
# Load all scenarios
loaded_scenarios = [load_scenario(d) for d in sorted(scenario_dirs)]
if parallel:
# Build task list for all scenario/skill-set combinations
tasks = [
RunTask(scenario=s, skill_set=ss, run_dir=run_dir)
for s in loaded_scenarios
for ss in s.skill_sets
]
total = len(tasks)
typer.echo(f"\nRunning {total} tasks with {workers} workers...\n")
completed = 0
passed = 0
failed = 0
def on_complete(task: RunTask, result) -> None:
nonlocal completed, passed, failed
completed += 1
if result.success:
passed += 1
icon = "✓"
else:
failed += 1
icon = "✗"
logger.info(f"[{completed}/{total}] {task.scenario.name}/{task.skill_set.name} {icon}")
runner.run_parallel(tasks, max_workers=workers, progress_callback=on_complete)
logger.success(f"Run complete: {passed} passed, {failed} failed")
else:
# Sequential execution (original behavior)
for scenario_obj in loaded_scenarios:
logger.info(f"Scenario: {scenario_obj.name}")
for skill_set in scenario_obj.skill_sets:
logger.info(f" Starting: {skill_set.name}")
result = runner.run_scenario(scenario_obj, skill_set, run_dir)
if result.success:
logger.success(f" Completed: {skill_set.name}")
else:
logger.error(f" Failed: {skill_set.name} - {result.error}")
logger.success(f"Run complete: {run_dir}")
typer.echo(f"Next: uv run skill-eval grade {run_dir.name}")
@app.command()
def grade(
run_id: Optional[str] = typer.Argument(None, help="Run ID (full or partial). Defaults to latest run."),
auto: bool = typer.Option(False, "--auto", help="Auto-grade using Claude"),
latest: bool = typer.Option(False, "--latest", "-l", help="Use latest run without prompting"),
) -> None:
"""Grade outputs from a run."""
import yaml
from skill_eval.grader import (
auto_grade_run,
build_grading_prompt,
call_claude_grader,
compute_skill_usage,
init_grades_file,
parse_grade_response,
save_grades,
)
evals_dir = Path.cwd()
runs_dir = evals_dir / "runs"
scenarios_dir = evals_dir / "scenarios"
run_dir = find_run(runs_dir, run_id, latest=latest)
if auto:
typer.echo(f"Auto-grading run: {run_id}")
typer.echo()
# Count scenarios and skill sets for progress
total = sum(
1
for scenario_dir in run_dir.iterdir()
if scenario_dir.is_dir() and not scenario_dir.name.startswith(".")
for skill_set_dir in scenario_dir.iterdir()
if skill_set_dir.is_dir()
)
current = 0
results: dict[str, dict[str, dict]] = {}
for scenario_dir in sorted(run_dir.iterdir()):
if not scenario_dir.is_dir() or scenario_dir.name.startswith("."):
continue
scenario_name = scenario_dir.name
results[scenario_name] = {}
for skill_set_dir in sorted(scenario_dir.iterdir()):
if not skill_set_dir.is_dir():
continue
skill_set_name = skill_set_dir.name
current += 1
typer.echo(f" [{current}/{total}] Grading {scenario_name}/{skill_set_name}...", nl=False)
from dataclasses import asdict
# Load metadata for skill usage computation
metadata_file = skill_set_dir / "metadata.yaml"
metadata = {}
if metadata_file.exists():
with metadata_file.open() as f:
metadata = yaml.safe_load(f) or {}
grading_prompt = build_grading_prompt(scenarios_dir / scenario_name, skill_set_dir)
response = call_claude_grader(grading_prompt)
grade = parse_grade_response(response)
# Add skill usage data (computed from metadata, not from Claude)
available, invoked, pct = compute_skill_usage(metadata)
grade.skills_available = available
grade.skills_invoked = invoked
grade.skill_usage_pct = pct
results[scenario_name][skill_set_name] = asdict(grade)
# Show result
success_icon = "✓" if grade.success else "✗" if grade.success is False else "?"
score = grade.score if grade.score is not None else "?"
typer.echo(f" {success_icon} (score: {score})")
grades = {"graded_at": None, "grader": "claude-auto", "results": results}
save_grades(run_dir, grades)
grades_file = run_dir / "grades.yaml"
typer.echo(f"\nGrades saved to: {grades_file}")
typer.echo(f"Run: uv run skill-eval report {run_id}")
else:
grades_file = init_grades_file(run_dir)
typer.echo(f"Grades file: {grades_file}")
typer.echo("\nOutputs to review:")
for scenario_dir in sorted(run_dir.iterdir()):
if not scenario_dir.is_dir():
continue
typer.echo(f"\n {scenario_dir.name}/")
for skill_set_dir in sorted(scenario_dir.iterdir()):
if not skill_set_dir.is_dir():
continue
typer.echo(f" {skill_set_dir.name}/output.md")
typer.echo(f"\nEdit {grades_file} to record your grades.")
typer.echo(f"Then run: uv run skill-eval report {run_id}")
@app.command()
def report(
run_id: Optional[str] = typer.Argument(None, help="Run ID (full or partial). Defaults to latest run."),
latest: bool = typer.Option(False, "--latest", "-l", help="Use latest run without prompting"),
) -> None:
"""Generate comparison report for a run."""
from skill_eval.reporter import print_rich_report, save_report
evals_dir = Path.cwd()
runs_dir = evals_dir / "runs"
run_dir = find_run(runs_dir, run_id, latest=latest)
reports_dir = evals_dir / "reports"
reports_dir.mkdir(exist_ok=True)
report_file = save_report(run_dir, reports_dir)
print_rich_report(run_dir)
typer.echo(f"\nSaved to: {report_file}")
@app.command()
def review(
run_id: Optional[str] = typer.Argument(None, help="Run ID (full or partial). Defaults to latest run."),
latest: bool = typer.Option(False, "--latest", "-l", help="Use latest run without prompting"),
) -> None:
"""Open HTML transcripts in browser for review."""
import webbrowser
evals_dir = Path.cwd()
runs_dir = evals_dir / "runs"
run_dir = find_run(runs_dir, run_id, latest=latest)
# Find all transcript index.html files
transcripts = list(run_dir.glob("**/transcript/index.html"))
if not transcripts:
typer.echo(f"Error: No transcripts found in {run_dir}", err=True)
raise typer.Exit(1)
typer.echo(f"Opening {len(transcripts)} transcript(s)...")
for transcript in sorted(transcripts):
# Show which transcript we're opening
rel_path = transcript.relative_to(run_dir)
typer.echo(f" {rel_path}")
webbrowser.open(f"file://{transcript}")
if __name__ == "__main__":
app()Install with Tessl CLI
npx tessl i dbt-labs/dbt-agent-skills@1.1.0evals
scenarios
dbt-docs-arguments
dbt-docs-unit-test-fixtures
dbt-job-failure
dbt-unit-test-format-choice
example-yaml-error
fusion-migration-triage-basic
fusion-migration-triage-blocked
fusion-triage-cat-a-static-analysis
fusion-triage-cat-b-dict-meta-get
fusion-triage-cat-b-unexpected-config
fusion-triage-cat-b-unused-schema
fusion-triage-cat-b-yaml-syntax
fusion-triage-cat-c-hardcoded-fqn
tests
scripts
skills
dbt
skills
adding-dbt-unit-test
references
answering-natural-language-questions-with-dbt
building-dbt-semantic-layer
configuring-dbt-mcp-server
fetching-dbt-docs
scripts
running-dbt-commands
troubleshooting-dbt-job-errors
references
using-dbt-for-analytics-engineering
dbt-migration
skills
migrating-dbt-core-to-fusion
migrating-dbt-project-across-platforms