tessl/pypi-dbt-core

A command-line tool for data transformation and analytics engineering workflows.

—

Pending

Overview

Eval results

Files

Artifacts

Name: tessl/pypi-dbt-core
Author: tessl

dbt generates structured artifacts containing metadata about projects, execution results, and data catalogs. These artifacts enable state comparison, documentation generation, and integration with external tools.

Artifact Schema Modules

Base Schemas

# dbt.artifacts.schemas.base
"""Base artifact schema classes providing common functionality."""

# dbt.artifacts.schemas.results  
"""Result artifact schemas for execution outcomes."""

# dbt.artifacts.schemas.batch_results
"""Batch result schemas for grouped execution results."""

Specialized Artifact Types

# dbt.artifacts.schemas.catalog
"""Catalog artifact schemas containing database metadata and column information."""

# dbt.artifacts.schemas.freshness
"""Source freshness schemas for data freshness monitoring results."""

# dbt.artifacts.schemas.manifest
"""Manifest artifact schemas defining project structure and compiled resources."""

# dbt.artifacts.schemas.run
"""Run result schemas containing detailed execution information."""

Core Artifact Types

Manifest Artifact

The manifest is dbt's compiled representation of a project, containing all nodes, relationships, and metadata.

# Generated by: dbt parse, dbt compile, dbt run, etc.
# Location: target/manifest.json
# Schema: dbt.artifacts.schemas.manifest

class Manifest:
    """
    Complete compiled representation of a dbt project.
    
    Contains all nodes, sources, metrics, and their relationships
    after compilation and dependency resolution.
    """
    
    nodes: Dict[str, Any]          # All project nodes (models, tests, etc.)
    sources: Dict[str, Any]        # Source definitions
    macros: Dict[str, Any]         # Macro definitions
    metrics: Dict[str, Any]        # Metric definitions
    exposures: Dict[str, Any]      # Exposure definitions
    parent_map: Dict[str, List[str]]  # Node dependencies
    child_map: Dict[str, List[str]]   # Node dependents

Run Results Artifact

Contains detailed information about execution results from dbt commands.

# Generated by: dbt run, dbt test, dbt build, dbt seed, etc.
# Location: target/run_results.json  
# Schema: dbt.artifacts.schemas.run

class RunExecutionResult:
    """
    Results from executing dbt commands like run, test, build.
    
    Contains timing information, success/failure status, and
    detailed results for each executed node.
    """
    
    results: List[RunResult]       # Individual node results
    elapsed_time: float           # Total execution time
    args: Dict[str, Any]          # Command arguments used

class RunResult: """Result for a single node execution."""

unique_id: str                # Node unique identifier
status: str                   # 'success', 'error', 'skipped', etc.
execution_time: float         # Node execution time
adapter_response: Dict        # Database adapter response
message: Optional[str]        # Result message
failures: Optional[int]       # Number of test failures (for tests)

### Catalog Artifact

Contains database metadata and column information discovered during documentation generation.

```python { .api }
# Generated by: dbt docs generate
# Location: target/catalog.json
# Schema: dbt.artifacts.schemas.catalog

class CatalogArtifact:
    """
    Database catalog information including table and column metadata.
    
    Generated during docs generation by introspecting the database
    to gather actual table structures and statistics.
    """
    
    nodes: Dict[str, CatalogTable]    # Table metadata by node
    sources: Dict[str, CatalogTable]  # Source table metadata

class CatalogTable: """Metadata for a single table or view."""

columns: Dict[str, CatalogColumn]  # Column information
stats: Dict[str, CatalogStats]     # Table statistics
metadata: Dict[str, Any]           # Additional metadata

### Source Freshness Results

Results from source freshness checks.

```python { .api }
# Generated by: dbt source freshness
# Location: target/sources.json
# Schema: dbt.artifacts.schemas.freshness

class FreshnessExecutionResult:
    """
    Results from source freshness checks.
    
    Contains freshness status and timing information
    for each checked source.
    """
    
    results: List[FreshnessResult]  # Individual source results
    elapsed_time: float            # Total check time

Accessing Artifacts

Reading Manifest

import json
from pathlib import Path

def load_manifest():
    """Load the compiled manifest."""
    manifest_path = Path('target/manifest.json')
    if manifest_path.exists():
        with open(manifest_path) as f:
            return json.load(f)
    return None

manifest = load_manifest()
if manifest:
    # Access nodes
    models = {k: v for k, v in manifest['nodes'].items() 
              if v['resource_type'] == 'model'}
    print(f"Found {len(models)} models")
    
    # Access sources
    sources = manifest.get('sources', {})
    print(f"Found {len(sources)} sources")

Reading Run Results

def load_run_results():
    """Load the latest run results."""
    results_path = Path('target/run_results.json')
    if results_path.exists():
        with open(results_path) as f:
            return json.load(f)
    return None

run_results = load_run_results()
if run_results:
    # Analyze results
    total_nodes = len(run_results['results'])
    successful = sum(1 for r in run_results['results'] 
                    if r['status'] == 'success')
    
    print(f"Executed {total_nodes} nodes, {successful} successful")
    print(f"Total time: {run_results['elapsed_time']:.2f}s")

Programmatic Access

Access artifacts through the dbtRunner:

from dbt.cli.main import dbtRunner

runner = dbtRunner()

# Generate manifest
parse_result = runner.invoke(['parse'])
if parse_result.success:
    manifest = parse_result.result
    
    # Access manifest data
    print(f"Nodes: {len(manifest.nodes)}")
    print(f"Sources: {len(manifest.sources)}")
    print(f"Macros: {len(manifest.macros)}")

# Generate run results  
run_result = runner.invoke(['run'])
if run_result.success:
    execution_result = run_result.result
    
    # Access run data
    print(f"Execution time: {execution_result.elapsed_time}")
    for node_result in execution_result.results:
        print(f"{node_result.unique_id}: {node_result.status}")

Artifact File Locations

Standard Locations

from dbt.constants import (
    MANIFEST_FILE_NAME,           # "manifest.json"
    RUN_RESULTS_FILE_NAME,        # "run_results.json"
    CATALOG_FILENAME,             # "catalog.json"
    SOURCE_RESULT_FILE_NAME,      # "sources.json"
)

# Standard artifact paths (relative to project root)
target_dir = Path('target')
manifest_path = target_dir / MANIFEST_FILE_NAME
run_results_path = target_dir / RUN_RESULTS_FILE_NAME
catalog_path = target_dir / CATALOG_FILENAME
sources_path = target_dir / SOURCE_RESULT_FILE_NAME

Custom Artifact Paths

# Artifacts can be written to custom locations
runner = dbtRunner()

# Custom target path
result = runner.invoke(['run'], target_path='custom_target')

# Custom output paths for specific commands
result = runner.invoke([
    'source', 'freshness', 
    '--output-path', 'freshness_results.json'
])

State Comparison

Use artifacts for state-based selection and comparison:

# Run only changed models
result = runner.invoke([
    'run', 
    '--select', 'state:modified',
    '--state', 'path/to/previous/artifacts'
])

# Test only new resources
result = runner.invoke([
    'test',
    '--select', 'state:new',
    '--state', 'path/to/previous/artifacts'
])

Integration Examples

CI/CD Pipeline

def ci_pipeline():
    """Example CI pipeline using artifacts."""
    runner = dbtRunner()
    
    # Parse project and check for issues
    parse_result = runner.invoke(['parse'])
    if not parse_result.success:
        return False
    
    manifest = parse_result.result
    
    # Run only changed models in CI
    run_result = runner.invoke([
        'run', 
        '--select', 'state:modified+',
        '--state', 'prod_artifacts/'
    ])
    
    if run_result.success:
        # Save artifacts for downstream use
        execution_result = run_result.result
        save_artifacts(execution_result)
    
    return run_result.success

Custom Analytics

def analyze_project_health():
    """Analyze project health from artifacts."""
    manifest = load_manifest()
    run_results = load_run_results()
    
    if not (manifest and run_results):
        return
    
    # Model analysis
    models = [n for n in manifest['nodes'].values() 
              if n['resource_type'] == 'model']
    
    # Execution analysis
    execution_times = {}
    for result in run_results['results']:
        execution_times[result['unique_id']] = result.get('execution_time', 0)
    
    # Find slow models
    slow_models = sorted(execution_times.items(), 
                        key=lambda x: x[1], reverse=True)[:10]
    
    print("Slowest models:")
    for node_id, time in slow_models:
        print(f"  {node_id}: {time:.2f}s")

Artifact Validation

def validate_artifacts():
    """Validate artifact completeness and consistency."""
    manifest = load_manifest()
    run_results = load_run_results()
    
    if not manifest:
        print("Warning: No manifest found")
        return False
    
    if not run_results:
        print("Warning: No run results found")
        return False
    
    # Check consistency
    manifest_nodes = set(manifest['nodes'].keys())
    result_nodes = set(r['unique_id'] for r in run_results['results'])
    
    missing = manifest_nodes - result_nodes
    if missing:
        print(f"Warning: {len(missing)} nodes missing from results")
    
    return len(missing) == 0

Install with Tessl CLI