SQL Lineage Analysis Tool powered by Python
The LineageRunner class provides the main programmatic interface for analyzing SQL statements and extracting lineage information. It supports multiple SQL dialects, metadata integration, and various output formats including visualization and data export.
The primary entry point for SQL lineage analysis, offering both table-level and column-level lineage extraction with configurable parsing and metadata options.
class LineageRunner:
def __init__(
self,
sql: str,
dialect: str = "ansi",
metadata_provider: MetaDataProvider = DummyMetaDataProvider(),
verbose: bool = False,
silent_mode: bool = False,
draw_options: Optional[Dict[str, Any]] = None,
file_path: str = "."
):
"""
The entry point of SQLLineage after command line options are parsed.
Parameters:
- sql: a string representation of SQL statements
- dialect: SQL dialect name (default: "ansi")
- metadata_provider: provider for table/column metadata
- verbose: enable verbose output for debugging
- silent_mode: skip unsupported SQL statements silently
- draw_options: options for visualization (optional)
- file_path: file path context for relative references
"""Methods for accessing the parsed SQL statements and their components.
def statements(self) -> List[str]:
"""Get list of individual SQL statements"""
@staticmethod
def supported_dialects() -> Dict[str, List[str]]:
"""Get mapping of parser names to supported SQL dialects"""Properties for accessing table-level lineage information including source, target, and intermediate tables identified in the SQL analysis.
@property
def source_tables(self) -> List[Table]:
"""Get list of source tables (tables being read from)"""
@property
def target_tables(self) -> List[Table]:
"""Get list of target tables (tables being written to)"""
@property
def intermediate_tables(self) -> List[Table]:
"""Get list of intermediate tables (CTEs, temp tables, etc.)"""Methods for extracting column-level dependencies and relationships between source and target columns.
def get_column_lineage(
self,
exclude_path_ending_in_subquery: bool = True,
exclude_subquery_columns: bool = False
) -> List[Tuple[Column, Column]]:
"""
Get column-level lineage as list of (source_column, target_column) tuples.
Parameters:
- exclude_path_ending_in_subquery: exclude paths ending in subqueries
- exclude_subquery_columns: exclude columns from subqueries
Returns:
List of tuples where each tuple contains:
- source_column: Column object representing the source
- target_column: Column object representing the target
"""Methods for generating various output formats and launching interactive visualizations.
def print_table_lineage(self) -> None:
"""Print table-level lineage to stdout in human-readable format"""
def print_column_lineage(self) -> None:
"""Print column-level lineage to stdout in human-readable format"""
def to_cytoscape(self, level: LineageLevel = LineageLevel.TABLE) -> List[Dict[str, Dict[str, str]]]:
"""
Export lineage graph to Cytoscape.js format for web visualization.
Parameters:
- level: lineage level (LineageLevel.TABLE or LineageLevel.COLUMN)
Returns:
List of dictionaries representing nodes and edges in Cytoscape format
"""
def draw(self) -> None:
"""
Launch interactive web visualization of the lineage graph.
Opens a web browser with an interactive graph interface.
"""from sqllineage.runner import LineageRunner
sql = """
CREATE TABLE staging.customer_summary AS
SELECT
c.customer_id,
c.customer_name,
COUNT(o.order_id) as order_count,
SUM(o.total_amount) as total_spent
FROM raw.customers c
LEFT JOIN raw.orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.customer_name
"""
runner = LineageRunner(sql)
print("Sources:", [str(t) for t in runner.source_tables])
print("Targets:", [str(t) for t in runner.target_tables])from sqllineage.runner import LineageRunner
from sqllineage.core.metadata.dummy import DummyMetaDataProvider
# Provide schema metadata for more accurate column lineage
metadata = {
"raw.customers": ["customer_id", "customer_name", "email", "created_date"],
"raw.orders": ["order_id", "customer_id", "total_amount", "order_date"]
}
metadata_provider = DummyMetaDataProvider(metadata)
runner = LineageRunner(sql, metadata_provider=metadata_provider)
# Get detailed column lineage
for src_col, tgt_col in runner.get_column_lineage():
print(f"{src_col} -> {tgt_col}")# Analyze Snowflake SQL
snowflake_sql = """
CREATE OR REPLACE TABLE analytics.daily_sales AS
SELECT
DATE_TRUNC('DAY', order_timestamp) as sale_date,
SUM(amount) as daily_revenue
FROM raw.transactions
WHERE order_timestamp >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY DATE_TRUNC('DAY', order_timestamp)
"""
runner = LineageRunner(snowflake_sql, dialect="snowflake")
print("Supported dialects:", runner.supported_dialects())# Launch interactive web visualization
runner.draw()
# Export to Cytoscape format for custom visualization
cyto_data = runner.to_cytoscape()
print("Graph data:", cyto_data)
# Print human-readable lineage reports
runner.print_table_lineage()
runner.print_column_lineage()complex_sql = """
-- Mix of supported and unsupported statements
CREATE VIEW sales_view AS SELECT * FROM sales;
GRANT SELECT ON sales_view TO analyst_role; -- Unsupported
DROP TABLE old_temp_table;
"""
# Silent mode skips unsupported statements
runner = LineageRunner(complex_sql, silent_mode=True)
print("Analysis completed, unsupported statements skipped")
# Verbose mode for debugging
runner_verbose = LineageRunner(complex_sql, verbose=True)Install with Tessl CLI
npx tessl i tessl/pypi-sqllineage