SQL Lineage Analysis Tool powered by Python
npx @tessl/cli install tessl/pypi-sqllineage@1.5.0SQLLineage is a Python library that analyzes SQL statements to extract data lineage information, identifying source and target tables without requiring deep knowledge of SQL parsing. It leverages pluggable parser libraries (sqlfluff and sqlparse) to parse SQL commands, analyzes the AST, stores lineage information in a graph using networkx, and provides human-readable results.
pip install sqllineagefrom sqllineage.runner import LineageRunnerCommon imports for working with models:
from sqllineage.core.models import Table, Column, SchemaFor metadata providers:
from sqllineage.core.metadata.dummy import DummyMetaDataProvider
from sqllineage.core.metadata.sqlalchemy import SQLAlchemyMetaDataProviderfrom sqllineage.runner import LineageRunner
# Basic table-level lineage analysis
sql = """
INSERT INTO target_table
SELECT a.col1, b.col2
FROM source_table_a a
JOIN source_table_b b ON a.id = b.id
"""
# Create runner and analyze SQL
runner = LineageRunner(sql)
# Get lineage results
print("Source tables:", [str(table) for table in runner.source_tables])
print("Target tables:", [str(table) for table in runner.target_tables])
# Column-level lineage
for src_col, tgt_col in runner.get_column_lineage():
print(f"{src_col} -> {tgt_col}")
# Launch web visualization
runner.draw()SQLLineage uses a pluggable parser architecture with configurable metadata providers:
This design enables support for 20+ SQL dialects while providing both programmatic and interactive interfaces for data lineage analysis.
Core functionality for analyzing SQL statements and extracting lineage information. The LineageRunner class provides the main programmatic interface with support for multiple SQL dialects, metadata integration, and various output formats.
class LineageRunner:
def __init__(
self,
sql: str,
dialect: str = "ansi",
metadata_provider: MetaDataProvider = DummyMetaDataProvider(),
verbose: bool = False,
silent_mode: bool = False,
draw_options: Optional[Dict[str, Any]] = None,
file_path: str = "."
): ...
@property
def source_tables(self) -> List[Table]: ...
@property
def target_tables(self) -> List[Table]: ...
@property
def intermediate_tables(self) -> List[Table]: ...
def get_column_lineage(
self,
exclude_path_ending_in_subquery: bool = True,
exclude_subquery_columns: bool = False
) -> List[Tuple[Column, Column]]: ...
def print_column_lineage(self) -> None: ...
def print_table_lineage(self) -> None: ...
def statements(self) -> List[str]: ...
def to_cytoscape(self, level: LineageLevel = LineageLevel.TABLE) -> List[Dict[str, Dict[str, str]]]: ...
@staticmethod
def supported_dialects() -> Dict[str, List[str]]: ...
def draw(self) -> None: ...SQL Analysis and Lineage Runner
Core data classes representing SQL entities like tables, columns, schemas, and subqueries. These models provide the foundation for lineage analysis and include support for complex SQL constructs like CTEs, subqueries, and cross-schema references.
class Table:
def __init__(self, name: str, schema: Schema = Schema(), **kwargs): ...
class Column:
def __init__(self, name: str, **kwargs): ...
class Schema:
def __init__(self, name: Optional[str] = None): ...
class SubQuery:
def __init__(self, subquery: Any, subquery_raw: str, alias: Optional[str]): ...Pluggable interfaces for providing schema and table metadata to enhance lineage analysis. Supports both simple dictionary-based metadata and database introspection via SQLAlchemy.
class MetaDataProvider:
def get_table_columns(self, table: Table, **kwargs) -> List[Column]: ...
class DummyMetaDataProvider(MetaDataProvider):
def __init__(self, metadata: Optional[Dict[str, List[str]]] = None): ...
class SQLAlchemyMetaDataProvider(MetaDataProvider):
def __init__(self, url: str, engine_kwargs: Optional[Dict[str, Any]] = None): ...Thread-safe configuration system for customizing SQLLineage behavior including default schemas, dialect-specific parsing options, and integration settings.
class _SQLLineageConfigLoader:
def __call__(self, **kwargs) -> "_SQLLineageConfigLoader": ...
def __enter__(self) -> None: ...
def __exit__(self, exc_type, exc_val, exc_tb) -> None: ...
# Global configuration instance
SQLLineageConfig = _SQLLineageConfigLoader()Command-line interface for analyzing SQL files and generating lineage reports, with options for different output formats and visualization modes.
def main(args=None) -> None:
"""The command line interface entry point"""Console script entry point: sqllineage
Web-based visualization and export capabilities for lineage graphs, including support for Cytoscape.js format and interactive browser interface.
def draw_lineage_graph(**kwargs) -> None: ...
def to_cytoscape(graph: DiGraph, compound=False) -> List[Dict[str, Dict[str, Any]]]: ...class SQLLineageException(Exception):
"""Base Exception for SQLLineage"""
class UnsupportedStatementException(SQLLineageException):
"""Raised for SQL statement that SQLLineage doesn't support analyzing"""
class InvalidSyntaxException(SQLLineageException):
"""Raised for SQL statement that parser cannot parse"""
class MetaDataProviderException(SQLLineageException):
"""Raised for MetaDataProvider errors"""
class ConfigException(SQLLineageException):
"""Raised for configuration errors"""# Package constants
NAME: str = "sqllineage"
VERSION: str = "1.5.4"
DEFAULT_DIALECT: str = "ansi"
DEFAULT_HOST: str = "localhost"
DEFAULT_PORT: int = 5000
STATIC_FOLDER: str = "build"
# Lineage levels
class LineageLevel:
TABLE = "table"
COLUMN = "column"
# Node and edge types for graph analysis
class NodeTag:
READ = "read"
WRITE = "write"
CTE = "cte"
DROP = "drop"
SOURCE_ONLY = "source_only"
TARGET_ONLY = "target_only"
SELFLOOP = "selfloop"
class EdgeTag:
INDEX = "index"
class EdgeType:
LINEAGE = "lineage"
RENAME = "rename"
HAS_COLUMN = "has_column"
HAS_ALIAS = "has_alias"