SQL Lineage Analysis Tool powered by Python
Core data classes representing SQL entities like tables, columns, schemas, and subqueries. These models provide the foundation for lineage analysis and include support for complex SQL constructs like CTEs, subqueries, and cross-schema references.
Represents database schemas with support for default schema handling and cross-schema references.
class Schema:
unknown: str = "<default>" # Class attribute for unknown schema
def __init__(self, name: Optional[str] = None):
"""
Create a schema object.
Parameters:
- name: schema name (optional, uses default if not provided)
"""
# raw_name: str # Instance attribute set in __init__
def __str__(self) -> str:
"""String representation of the schema"""
def __bool__(self) -> bool:
"""Check if schema is known (not the default unknown schema)"""Represents database tables with schema qualification, alias support, and flexible name parsing.
class Table:
def __init__(self, name: str, schema: Schema = Schema(), **kwargs):
"""
Create a table object.
Parameters:
- name: table name, optionally qualified (schema.table format)
- schema: Schema object (ignored if name is already qualified)
- alias: table alias (passed via kwargs)
"""
# schema: Schema # Instance attribute set in __init__
# raw_name: str # Instance attribute set in __init__
# alias: str # Instance attribute set in __init__
def __str__(self) -> str:
"""String representation as schema.table"""
@staticmethod
def of(table: Any) -> "Table":
"""Abstract factory method for creating Table from parser objects"""Represents table columns with parent table relationships, source column tracking, and alias resolution.
class Column:
def __init__(self, name: str, **kwargs):
"""
Create a column object.
Parameters:
- name: column name
- Additional attributes passed via kwargs
"""
# raw_name: str # Instance attribute set in __init__
# source_columns: List[Tuple[str, Optional[str]]] # Instance attribute set in __init__
# from_alias: bool # Instance attribute set in __init__
@property
def parent(self) -> Optional[Union[Path, Table, SubQuery]]:
"""Get the parent table, subquery, or path"""
@property
def parent_candidates(self) -> List[Union[Path, Table, SubQuery]]:
"""Get list of possible parent tables/subqueries"""
def to_source_columns(self, alias_mapping: Dict[str, Union[Path, Table, SubQuery]]) -> Set[Column]:
"""
Resolve source columns using alias mapping.
Parameters:
- alias_mapping: mapping of aliases to table/subquery objects
Returns:
Set of resolved source Column objects
"""
@staticmethod
def of(column: Any, **kwargs) -> "Column":
"""Abstract factory method for creating Column from parser objects"""Represents SQL subqueries with alias support and raw query preservation.
class SubQuery:
def __init__(self, subquery: Any, subquery_raw: str, alias: Optional[str]):
"""
Create a subquery object.
Parameters:
- subquery: parsed subquery object
- subquery_raw: raw SQL string of the subquery
- alias: subquery alias (optional)
"""
# query: Any # Instance attribute set in __init__
# query_raw: str # Instance attribute set in __init__
# alias: str # Instance attribute set in __init__
@staticmethod
def of(subquery: Any, alias: Optional[str]) -> "SubQuery":
"""Abstract factory method for creating SubQuery from parser objects"""Represents file paths and URIs for external data sources.
class Path:
def __init__(self, uri: str):
"""
Create a path object.
Parameters:
- uri: file path or URI
"""
# uri: str # Instance attribute set in __init__from sqllineage.core.models import Table, Column, Schema
# Create schema
analytics_schema = Schema("analytics")
# Create table with schema
customer_table = Table("customers", schema=analytics_schema)
print(customer_table) # analytics.customers
# Create table with qualified name
orders_table = Table("sales.orders")
print(orders_table.schema) # sales
print(orders_table.raw_name) # orders
# Create columns
customer_id = Column("customer_id")
order_total = Column("total_amount")# Table with alias
customer_table = Table("customers", alias="c")
print(customer_table.alias) # c
# Check column alias sources
column = Column("customer_name")
if column.from_alias:
print("Column comes from table alias")# Default schema
default_schema = Schema()
print(bool(default_schema)) # False (unknown schema)
# Named schema
named_schema = Schema("production")
print(bool(named_schema)) # True
print(named_schema.raw_name) # production# Subqueries are typically created by the parser
# but can be constructed manually for testing
subquery_sql = "(SELECT customer_id, COUNT(*) FROM orders GROUP BY customer_id)"
# subquery = SubQuery(parsed_query, subquery_sql, "order_counts")# Multi-level schema qualification
# Some databases support database.schema.table format
try:
table = Table("prod_db.analytics.customer_summary")
print(f"Schema: {table.schema}, Table: {table.raw_name}")
except SQLLineageException as e:
print(f"Invalid table format: {e}")# Columns can track their source relationships
source_col = Column("customer_id")
target_col = Column("cust_id")
# Parent table assignment (typically done by parser)
source_col.parent = Table("raw.customers")
target_col.parent = Table("analytics.customer_summary")
print(f"Source: {source_col.parent}.{source_col.raw_name}")
print(f"Target: {target_col.parent}.{target_col.raw_name}")# For SQL that references files (e.g., Spark, BigQuery)
data_path = Path("s3://data-lake/raw/customers.parquet")
print(data_path.uri) # s3://data-lake/raw/customers.parquet
# Local file paths
local_path = Path("/data/exports/customer_data.csv")
print(local_path.uri) # /data/exports/customer_data.csvInstall with Tessl CLI
npx tessl i tessl/pypi-sqllineage