SQL Lineage Analysis Tool powered by Python
Command-line interface for analyzing SQL files and generating lineage reports, with options for different output formats and visualization modes. The CLI provides a convenient way to perform lineage analysis without writing Python code.
def main(args=None) -> None:
"""
The command line interface entry point.
Parameters:
- args: command line arguments for sqllineage command (optional)
"""The package installs a console script entry point:
sqllineage = sqllineage.cli:main# SQL from command line
sqllineage -e "SELECT * FROM customers"
# SQL from file
sqllineage -f queries.sql
# Multiple files
sqllineage -f query1.sql -f query2.sql# Verbose mode - show statement level lineage
sqllineage -v -e "SELECT * FROM customers"
# Set lineage level (table or column)
sqllineage -l table -f queries.sql
sqllineage -l column -f queries.sql# Launch graph visualization
sqllineage -g -f queries.sql
# Specify visualization server host and port
sqllineage -g -H 0.0.0.0 -p 8080 -f queries.sql# Specify SQL dialect
sqllineage -d snowflake -f snowflake_queries.sql
sqllineage -d postgres -f postgres_queries.sql
# List available dialects
sqllineage --dialects# Silent mode - skip unsupported statements
sqllineage --silent_mode -f mixed_queries.sql# Use SQLAlchemy metadata provider
sqllineage --sqlalchemy_url "postgresql://user:pass@host/db" -f queries.sql| Argument | Short | Type | Description |
|---|---|---|---|
--version | flag | Show version and exit | |
-e <sql> | -e | string | SQL from command line |
-f <filename> | -f | string | SQL from files (can be repeated) |
--verbose | -v | flag | Show statement level lineage result |
--level <level> | -l | choice | Lineage level: table or column |
--graph-visualization | -g | flag | Show graph visualization |
--host <host> | -H | string | Visualization webserver host (default: localhost) |
--port <port> | -p | integer | Visualization webserver port (default: 5000) |
--dialect <dialect> | -d | string | SQL dialect (default: ansi) |
--dialects | flag | List available dialects | |
--silent_mode | flag | Skip unsupported statements | |
--sqlalchemy_url <url> | string | SQLAlchemy URL for metadata |
# Analyze a single SQL statement
sqllineage -e "INSERT INTO summary SELECT * FROM details"
# Output:
# <default>.details
# =================
# <default>.summary# Create a SQL file
cat > analysis.sql << 'EOF'
CREATE TABLE staging.customers AS
SELECT
customer_id,
customer_name,
email
FROM raw.customer_data
WHERE active = 'Y';
INSERT INTO analytics.customer_summary
SELECT
customer_id,
customer_name,
COUNT(*) as interaction_count
FROM staging.customers c
JOIN raw.interactions i ON c.customer_id = i.customer_id
GROUP BY customer_id, customer_name;
EOF
# Analyze the file
sqllineage -f analysis.sqlsqllineage -v -f analysis.sql
# Output includes statement-level breakdown:
# ************** Statement #1 **************
# CREATE TABLE staging.customers AS SELECT customer_id, customer_name, email FROM raw.customer_data WHERE active = 'Y'
# raw.customer_data
# =================
# staging.customers
#
# ************** Statement #2 **************
# INSERT INTO analytics.customer_summary SELECT customer_id, customer_name, COUNT(*) as interaction_count FROM staging.customers c JOIN raw.interactions i ON c.customer_id = i.customer_id GROUP BY customer_id, customer_name
# staging.customers
# raw.interactions
# =================
# analytics.customer_summarysqllineage -l column -f analysis.sql
# Output shows column-level relationships:
# raw.customer_data.customer_id -> staging.customers.customer_id
# raw.customer_data.customer_name -> staging.customers.customer_name
# raw.customer_data.email -> staging.customers.email
# staging.customers.customer_id -> analytics.customer_summary.customer_id
# staging.customers.customer_name -> analytics.customer_summary.customer_name# List available SQL dialects
sqllineage --dialects
# Output:
# {
# "sqlparse": ["non-validating"],
# "sqlfluff": ["ansi", "bigquery", "clickhouse", "databricks", "db2", "duckdb", "exasol", "hive", "materialize", "mysql", "oracle", "postgres", "redshift", "snowflake", "soql", "sparksql", "sqlite", "teradata", "trino", "tsql"]
# }
# Analyze Snowflake SQL
sqllineage -d snowflake -e "CREATE OR REPLACE TABLE analytics.daily_sales AS SELECT DATE_TRUNC('DAY', order_timestamp) as sale_date, SUM(amount) as daily_revenue FROM raw.transactions GROUP BY DATE_TRUNC('DAY', order_timestamp)"# Launch web visualization (opens browser)
sqllineage -g -f complex_queries.sql
# Specify custom host/port for visualization
sqllineage -g -H 0.0.0.0 -p 8080 -f queries.sql# PostgreSQL metadata
sqllineage --sqlalchemy_url "postgresql://analyst:password@db.company.com:5432/analytics" -l column -f postgres_queries.sql
# Snowflake metadata
sqllineage --sqlalchemy_url "snowflake://user:password@account/database/schema" -d snowflake -l column -f snowflake_queries.sql
# MySQL metadata
sqllineage --sqlalchemy_url "mysql://user:password@localhost:3306/analytics" -d mysql -f mysql_queries.sql# Create mixed SQL file with supported and unsupported statements
cat > mixed.sql << 'EOF'
-- Supported
CREATE VIEW sales_view AS SELECT * FROM sales;
-- Unsupported (DDL permissions)
GRANT SELECT ON sales_view TO analyst_role;
-- Supported
INSERT INTO summary SELECT customer_id, SUM(amount) FROM sales_view GROUP BY customer_id;
-- Unsupported (procedural)
CREATE PROCEDURE update_summary() AS BEGIN UPDATE summary SET last_updated = NOW(); END;
EOF
# Normal mode (shows errors)
sqllineage -f mixed.sql
# Silent mode (skips unsupported statements)
sqllineage --silent_mode -f mixed.sql# Multi-step analysis with different outputs
echo "CREATE TABLE mart.customer_360 AS
SELECT
c.customer_id,
c.customer_name,
p.total_purchases,
s.support_tickets,
m.email_engagement
FROM raw.customers c
LEFT JOIN analytics.purchase_summary p ON c.customer_id = p.customer_id
LEFT JOIN analytics.support_summary s ON c.customer_id = s.customer_id
LEFT JOIN analytics.marketing_summary m ON c.customer_id = m.customer_id" > customer_360.sql
# Table-level analysis
echo "=== TABLE LINEAGE ==="
sqllineage -f customer_360.sql
# Column-level analysis
echo -e "\n=== COLUMN LINEAGE ==="
sqllineage -l column -f customer_360.sql
# Verbose analysis
echo -e "\n=== VERBOSE ANALYSIS ==="
sqllineage -v -f customer_360.sql
# Launch visualization
sqllineage -g -f customer_360.sql# Set default schema via environment
export SQLLINEAGE_DEFAULT_SCHEMA="analytics"
# Analyze unqualified table references
sqllineage -e "SELECT * FROM customers" # Interprets as analytics.customers
# Override with different dialect
sqllineage -d postgres -e "SELECT customer_id, email FROM users WHERE created_date >= CURRENT_DATE - INTERVAL '30 days'"# Process multiple SQL files
for file in queries/*.sql; do
echo "=== Analyzing $file ==="
sqllineage -f "$file"
echo
done
# Generate lineage reports for all files
mkdir -p reports
for file in queries/*.sql; do
basename=$(basename "$file" .sql)
sqllineage -v -f "$file" > "reports/${basename}_lineage.txt"
done#!/bin/bash
# lineage_check.sh - CI script to validate SQL lineage
set -e
echo "Validating SQL lineage in migration files..."
# Check for unsupported SQL patterns
if ! sqllineage --silent_mode -f migrations/*.sql > /dev/null 2>&1; then
echo "ERROR: SQL lineage analysis failed"
exit 1
fi
# Generate lineage documentation
sqllineage -v -f migrations/*.sql > docs/lineage_report.txt
echo "SQL lineage validation completed successfully"Install with Tessl CLI
npx tessl i tessl/pypi-sqllineage