A curated collection of Agent Skills for working with dbt, to help AI agents understand and execute dbt workflows more effectively.
91
Does it follow best practices?
Validation for skill structure
"""Grading utilities for skill evaluation."""
import re
import subprocess
from dataclasses import asdict
from datetime import datetime
from pathlib import Path
import yaml
from skill_eval.models import Grade
def compute_skill_usage(metadata: dict) -> tuple[list[str], list[str], float | None]:
"""Compute skill usage statistics from run metadata.
Args:
metadata: Run metadata containing skills_available and skills_invoked
Returns:
Tuple of (skills_available, skills_invoked, usage_percentage)
usage_percentage is None if no skills were available
"""
available = metadata.get("skills_available", [])
invoked = metadata.get("skills_invoked", [])
if not available:
return available, invoked, None
# Calculate percentage of available skills that were invoked
invoked_set = set(invoked)
available_set = set(available)
used_count = len(invoked_set & available_set)
pct = (used_count / len(available_set)) * 100 if available_set else None
return available, invoked, pct
GRADING_PROMPT_TEMPLATE = """You are grading an AI assistant's response to a task.
## Task Given
{prompt}
## Expected Behavior & Grading Criteria
{scenario}
## Assistant's Response
{output}
## Tools & Resources Used
{tools_used}
## Modified Files
{modified_files}
---
Grade this response on the following dimensions:
### 1. Task Completion
Did the assistant accomplish the main task described in the prompt?
### 2. Tool Usage
Were the appropriate tools used for this task? Consider:
- Did it use the tools that were available and relevant?
- Did it avoid unnecessary tool calls?
- If MCP servers were available, did it leverage them appropriately?
- If skills were available, did it invoke them when relevant?
### 3. Solution Quality
How good is the solution? Consider correctness, completeness, and clarity.
---
Respond with ONLY a YAML block (no markdown fences), using this exact format:
success: true or false (did it accomplish the main task?)
score: 1-5 (1=failed, 2=poor, 3=acceptable, 4=good, 5=excellent)
tool_usage: appropriate, partial, or inappropriate (did it use the right tools?)
notes: brief explanation of your grade (one line)
"""
def build_grading_prompt(
scenario_dir: Path,
output_dir: Path,
) -> str:
"""Build the grading prompt for a scenario/skill-set combination."""
# Load scenario description and criteria
scenario_file = scenario_dir / "scenario.md"
scenario_content = scenario_file.read_text() if scenario_file.exists() else "No scenario description provided."
# Load the original prompt
prompt_file = scenario_dir / "prompt.txt"
prompt_content = prompt_file.read_text() if prompt_file.exists() else "No prompt provided."
# Load the assistant's output
output_file = output_dir / "output.md"
output_content = output_file.read_text() if output_file.exists() else "No output recorded."
# Load metadata for tools used
metadata_file = output_dir / "metadata.yaml"
tools_used = []
skills_invoked = []
skills_available = []
mcp_servers = []
if metadata_file.exists():
with metadata_file.open() as f:
metadata = yaml.safe_load(f)
tools_used = metadata.get("tools_used", [])
skills_invoked = metadata.get("skills_invoked", [])
skills_available = metadata.get("skills_available", [])
mcp_servers = metadata.get("mcp_servers", [])
# Build tools section with clear availability vs usage
tools_lines = ["Tools called:"]
tools_lines.extend(f" - {t}" for t in tools_used) if tools_used else tools_lines.append(" (none)")
if skills_available:
tools_lines.append("\nSkills available:")
tools_lines.extend(f" - {s}" for s in skills_available)
tools_lines.append("\nSkills actually invoked:")
tools_lines.extend(f" - {s}" for s in skills_invoked) if skills_invoked else tools_lines.append(" (none)")
if mcp_servers:
tools_lines.append("\nMCP servers configured:")
for server in mcp_servers:
if isinstance(server, dict):
name = server.get("name", "unknown")
status = server.get("status", "unknown")
tools_lines.append(f" - {name} (status: {status})")
else:
tools_lines.append(f" - {server}")
tools_str = "\n".join(tools_lines)
# List modified files in changes/
changes_dir = output_dir / "changes"
modified_files = []
if changes_dir.exists():
for f in changes_dir.rglob("*"):
if f.is_file():
rel_path = f.relative_to(changes_dir)
modified_files.append(str(rel_path))
modified_files_str = "\n".join(f"- {f}" for f in modified_files) if modified_files else "No files modified."
return GRADING_PROMPT_TEMPLATE.format(
prompt=prompt_content,
scenario=scenario_content,
output=output_content,
tools_used=tools_str,
modified_files=modified_files_str,
)
def parse_grade_response(response: str) -> Grade:
"""Parse the YAML grade response from Claude."""
# Try to extract YAML from the response (handle markdown fences if present)
yaml_match = re.search(r"```ya?ml?\s*(.*?)\s*```", response, re.DOTALL)
if yaml_match:
yaml_content = yaml_match.group(1)
else:
# Assume the whole response is YAML
yaml_content = response.strip()
try:
parsed = yaml.safe_load(yaml_content)
if not isinstance(parsed, dict):
return Grade(notes=f"Failed to parse: {response[:100]}")
return Grade(
success=parsed.get("success"),
score=parsed.get("score"),
tool_usage=parsed.get("tool_usage"),
criteria=parsed.get("criteria", {}),
notes=parsed.get("notes", ""),
observations=parsed.get("observations", ""),
)
except yaml.YAMLError as e:
return Grade(notes=f"YAML parse error: {e}")
def call_claude_grader(prompt: str) -> str:
"""Call Claude CLI to grade a response."""
cmd = [
"claude",
"--print",
"--no-session-persistence",
"-p",
prompt,
]
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=120,
)
return result.stdout.strip()
except subprocess.TimeoutExpired:
return "error: timeout"
except Exception as e:
return f"error: {e}"
def auto_grade_run(run_dir: Path, scenarios_dir: Path) -> dict:
"""Auto-grade all outputs in a run using Claude."""
results: dict = {}
for scenario_output_dir in sorted(run_dir.iterdir()):
if not scenario_output_dir.is_dir() or scenario_output_dir.name.startswith("."):
continue
scenario_name = scenario_output_dir.name
scenario_dir = scenarios_dir / scenario_name
results[scenario_name] = {}
for skill_set_dir in sorted(scenario_output_dir.iterdir()):
if not skill_set_dir.is_dir():
continue
skill_set_name = skill_set_dir.name
# Build and send grading prompt
grading_prompt = build_grading_prompt(scenario_dir, skill_set_dir)
response = call_claude_grader(grading_prompt)
grade = parse_grade_response(response)
results[scenario_name][skill_set_name] = asdict(grade)
grades = {
"graded_at": datetime.now().isoformat(),
"grader": "claude-auto",
"results": results,
}
return grades
def init_grades_file(run_dir: Path) -> Path:
"""Create initial grades.yaml file for a run."""
grades_file = run_dir / "grades.yaml"
if grades_file.exists():
return grades_file
results: dict = {}
for scenario_dir in run_dir.iterdir():
if not scenario_dir.is_dir():
continue
scenario_name = scenario_dir.name
results[scenario_name] = {}
for skill_set_dir in scenario_dir.iterdir():
if not skill_set_dir.is_dir():
continue
skill_set_name = skill_set_dir.name
results[scenario_name][skill_set_name] = asdict(Grade())
grades = {
"graded_at": None,
"grader": "human",
"results": results,
}
with grades_file.open("w") as f:
yaml.dump(grades, f, default_flow_style=False, sort_keys=False)
return grades_file
def load_grades(run_dir: Path) -> dict:
"""Load grades from a run directory."""
grades_file = run_dir / "grades.yaml"
if not grades_file.exists():
return {}
with grades_file.open() as f:
return yaml.safe_load(f)
def save_grades(run_dir: Path, grades: dict) -> None:
"""Save grades to a run directory."""
grades["graded_at"] = datetime.now().isoformat()
grades_file = run_dir / "grades.yaml"
with grades_file.open("w") as f:
yaml.dump(grades, f, default_flow_style=False, sort_keys=False)Install with Tessl CLI
npx tessl i dbt-labs/dbt-agent-skillsevals
scenarios
dbt-docs-arguments
dbt-docs-unit-test-fixtures
dbt-job-failure
dbt-unit-test-format-choice
example-yaml-error
fusion-migration-triage-basic
fusion-migration-triage-blocked
fusion-triage-cat-a-static-analysis
fusion-triage-cat-b-dict-meta-get
fusion-triage-cat-b-unexpected-config
fusion-triage-cat-b-unused-schema
fusion-triage-cat-b-yaml-syntax
fusion-triage-cat-c-hardcoded-fqn
tests
scripts
skills
dbt
skills
adding-dbt-unit-test
references
answering-natural-language-questions-with-dbt
building-dbt-semantic-layer
configuring-dbt-mcp-server
fetching-dbt-docs
scripts
running-dbt-commands
troubleshooting-dbt-job-errors
references
using-dbt-for-analytics-engineering
dbt-migration
skills
migrating-dbt-core-to-fusion
migrating-dbt-project-across-platforms