A command-line tool for data transformation and analytics engineering workflows.
—
dbt generates structured artifacts containing metadata about projects, execution results, and data catalogs. These artifacts enable state comparison, documentation generation, and integration with external tools.
# dbt.artifacts.schemas.base
"""Base artifact schema classes providing common functionality."""
# dbt.artifacts.schemas.results
"""Result artifact schemas for execution outcomes."""
# dbt.artifacts.schemas.batch_results
"""Batch result schemas for grouped execution results."""# dbt.artifacts.schemas.catalog
"""Catalog artifact schemas containing database metadata and column information."""
# dbt.artifacts.schemas.freshness
"""Source freshness schemas for data freshness monitoring results."""
# dbt.artifacts.schemas.manifest
"""Manifest artifact schemas defining project structure and compiled resources."""
# dbt.artifacts.schemas.run
"""Run result schemas containing detailed execution information."""The manifest is dbt's compiled representation of a project, containing all nodes, relationships, and metadata.
# Generated by: dbt parse, dbt compile, dbt run, etc.
# Location: target/manifest.json
# Schema: dbt.artifacts.schemas.manifest
class Manifest:
"""
Complete compiled representation of a dbt project.
Contains all nodes, sources, metrics, and their relationships
after compilation and dependency resolution.
"""
nodes: Dict[str, Any] # All project nodes (models, tests, etc.)
sources: Dict[str, Any] # Source definitions
macros: Dict[str, Any] # Macro definitions
metrics: Dict[str, Any] # Metric definitions
exposures: Dict[str, Any] # Exposure definitions
parent_map: Dict[str, List[str]] # Node dependencies
child_map: Dict[str, List[str]] # Node dependentsContains detailed information about execution results from dbt commands.
# Generated by: dbt run, dbt test, dbt build, dbt seed, etc.
# Location: target/run_results.json
# Schema: dbt.artifacts.schemas.run
class RunExecutionResult:
"""
Results from executing dbt commands like run, test, build.
Contains timing information, success/failure status, and
detailed results for each executed node.
"""
results: List[RunResult] # Individual node results
elapsed_time: float # Total execution time
args: Dict[str, Any] # Command arguments usedclass RunResult: """Result for a single node execution."""
unique_id: str # Node unique identifier
status: str # 'success', 'error', 'skipped', etc.
execution_time: float # Node execution time
adapter_response: Dict # Database adapter response
message: Optional[str] # Result message
failures: Optional[int] # Number of test failures (for tests)### Catalog Artifact
Contains database metadata and column information discovered during documentation generation.
```python { .api }
# Generated by: dbt docs generate
# Location: target/catalog.json
# Schema: dbt.artifacts.schemas.catalog
class CatalogArtifact:
"""
Database catalog information including table and column metadata.
Generated during docs generation by introspecting the database
to gather actual table structures and statistics.
"""
nodes: Dict[str, CatalogTable] # Table metadata by node
sources: Dict[str, CatalogTable] # Source table metadataclass CatalogTable: """Metadata for a single table or view."""
columns: Dict[str, CatalogColumn] # Column information
stats: Dict[str, CatalogStats] # Table statistics
metadata: Dict[str, Any] # Additional metadata### Source Freshness Results
Results from source freshness checks.
```python { .api }
# Generated by: dbt source freshness
# Location: target/sources.json
# Schema: dbt.artifacts.schemas.freshness
class FreshnessExecutionResult:
"""
Results from source freshness checks.
Contains freshness status and timing information
for each checked source.
"""
results: List[FreshnessResult] # Individual source results
elapsed_time: float # Total check timeimport json
from pathlib import Path
def load_manifest():
"""Load the compiled manifest."""
manifest_path = Path('target/manifest.json')
if manifest_path.exists():
with open(manifest_path) as f:
return json.load(f)
return None
manifest = load_manifest()
if manifest:
# Access nodes
models = {k: v for k, v in manifest['nodes'].items()
if v['resource_type'] == 'model'}
print(f"Found {len(models)} models")
# Access sources
sources = manifest.get('sources', {})
print(f"Found {len(sources)} sources")def load_run_results():
"""Load the latest run results."""
results_path = Path('target/run_results.json')
if results_path.exists():
with open(results_path) as f:
return json.load(f)
return None
run_results = load_run_results()
if run_results:
# Analyze results
total_nodes = len(run_results['results'])
successful = sum(1 for r in run_results['results']
if r['status'] == 'success')
print(f"Executed {total_nodes} nodes, {successful} successful")
print(f"Total time: {run_results['elapsed_time']:.2f}s")Access artifacts through the dbtRunner:
from dbt.cli.main import dbtRunner
runner = dbtRunner()
# Generate manifest
parse_result = runner.invoke(['parse'])
if parse_result.success:
manifest = parse_result.result
# Access manifest data
print(f"Nodes: {len(manifest.nodes)}")
print(f"Sources: {len(manifest.sources)}")
print(f"Macros: {len(manifest.macros)}")
# Generate run results
run_result = runner.invoke(['run'])
if run_result.success:
execution_result = run_result.result
# Access run data
print(f"Execution time: {execution_result.elapsed_time}")
for node_result in execution_result.results:
print(f"{node_result.unique_id}: {node_result.status}")from dbt.constants import (
MANIFEST_FILE_NAME, # "manifest.json"
RUN_RESULTS_FILE_NAME, # "run_results.json"
CATALOG_FILENAME, # "catalog.json"
SOURCE_RESULT_FILE_NAME, # "sources.json"
)
# Standard artifact paths (relative to project root)
target_dir = Path('target')
manifest_path = target_dir / MANIFEST_FILE_NAME
run_results_path = target_dir / RUN_RESULTS_FILE_NAME
catalog_path = target_dir / CATALOG_FILENAME
sources_path = target_dir / SOURCE_RESULT_FILE_NAME# Artifacts can be written to custom locations
runner = dbtRunner()
# Custom target path
result = runner.invoke(['run'], target_path='custom_target')
# Custom output paths for specific commands
result = runner.invoke([
'source', 'freshness',
'--output-path', 'freshness_results.json'
])Use artifacts for state-based selection and comparison:
# Run only changed models
result = runner.invoke([
'run',
'--select', 'state:modified',
'--state', 'path/to/previous/artifacts'
])
# Test only new resources
result = runner.invoke([
'test',
'--select', 'state:new',
'--state', 'path/to/previous/artifacts'
])def ci_pipeline():
"""Example CI pipeline using artifacts."""
runner = dbtRunner()
# Parse project and check for issues
parse_result = runner.invoke(['parse'])
if not parse_result.success:
return False
manifest = parse_result.result
# Run only changed models in CI
run_result = runner.invoke([
'run',
'--select', 'state:modified+',
'--state', 'prod_artifacts/'
])
if run_result.success:
# Save artifacts for downstream use
execution_result = run_result.result
save_artifacts(execution_result)
return run_result.successdef analyze_project_health():
"""Analyze project health from artifacts."""
manifest = load_manifest()
run_results = load_run_results()
if not (manifest and run_results):
return
# Model analysis
models = [n for n in manifest['nodes'].values()
if n['resource_type'] == 'model']
# Execution analysis
execution_times = {}
for result in run_results['results']:
execution_times[result['unique_id']] = result.get('execution_time', 0)
# Find slow models
slow_models = sorted(execution_times.items(),
key=lambda x: x[1], reverse=True)[:10]
print("Slowest models:")
for node_id, time in slow_models:
print(f" {node_id}: {time:.2f}s")def validate_artifacts():
"""Validate artifact completeness and consistency."""
manifest = load_manifest()
run_results = load_run_results()
if not manifest:
print("Warning: No manifest found")
return False
if not run_results:
print("Warning: No run results found")
return False
# Check consistency
manifest_nodes = set(manifest['nodes'].keys())
result_nodes = set(r['unique_id'] for r in run_results['results'])
missing = manifest_nodes - result_nodes
if missing:
print(f"Warning: {len(missing)} nodes missing from results")
return len(missing) == 0Install with Tessl CLI
npx tessl i tessl/pypi-dbt-core