CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-sqllineage

SQL Lineage Analysis Tool powered by Python

Overview
Eval results
Files

data-models.mddocs/

Data Models

Core data classes representing SQL entities like tables, columns, schemas, and subqueries. These models provide the foundation for lineage analysis and include support for complex SQL constructs like CTEs, subqueries, and cross-schema references.

Capabilities

Schema

Represents database schemas with support for default schema handling and cross-schema references.

class Schema:
    unknown: str = "<default>"  # Class attribute for unknown schema
    
    def __init__(self, name: Optional[str] = None):
        """
        Create a schema object.
        
        Parameters:
        - name: schema name (optional, uses default if not provided)
        """
    
    # raw_name: str  # Instance attribute set in __init__
    
    def __str__(self) -> str:
        """String representation of the schema"""
    
    def __bool__(self) -> bool:
        """Check if schema is known (not the default unknown schema)"""

Table

Represents database tables with schema qualification, alias support, and flexible name parsing.

class Table:
    def __init__(self, name: str, schema: Schema = Schema(), **kwargs):
        """
        Create a table object.
        
        Parameters:
        - name: table name, optionally qualified (schema.table format)
        - schema: Schema object (ignored if name is already qualified)
        - alias: table alias (passed via kwargs)
        """
    
    # schema: Schema        # Instance attribute set in __init__
    # raw_name: str         # Instance attribute set in __init__ 
    # alias: str            # Instance attribute set in __init__
    
    def __str__(self) -> str:
        """String representation as schema.table"""
    
    @staticmethod
    def of(table: Any) -> "Table":
        """Abstract factory method for creating Table from parser objects"""

Column

Represents table columns with parent table relationships, source column tracking, and alias resolution.

class Column:
    def __init__(self, name: str, **kwargs):
        """
        Create a column object.
        
        Parameters:
        - name: column name
        - Additional attributes passed via kwargs
        """
    
    # raw_name: str                     # Instance attribute set in __init__
    # source_columns: List[Tuple[str, Optional[str]]]  # Instance attribute set in __init__
    # from_alias: bool                  # Instance attribute set in __init__
    
    @property
    def parent(self) -> Optional[Union[Path, Table, SubQuery]]:
        """Get the parent table, subquery, or path"""
    
    @property
    def parent_candidates(self) -> List[Union[Path, Table, SubQuery]]:
        """Get list of possible parent tables/subqueries"""
    
    def to_source_columns(self, alias_mapping: Dict[str, Union[Path, Table, SubQuery]]) -> Set[Column]:
        """
        Resolve source columns using alias mapping.
        
        Parameters:
        - alias_mapping: mapping of aliases to table/subquery objects
        
        Returns:
        Set of resolved source Column objects
        """
    
    @staticmethod
    def of(column: Any, **kwargs) -> "Column":
        """Abstract factory method for creating Column from parser objects"""

SubQuery

Represents SQL subqueries with alias support and raw query preservation.

class SubQuery:
    def __init__(self, subquery: Any, subquery_raw: str, alias: Optional[str]):
        """
        Create a subquery object.
        
        Parameters:
        - subquery: parsed subquery object
        - subquery_raw: raw SQL string of the subquery
        - alias: subquery alias (optional)
        """
    
    # query: Any            # Instance attribute set in __init__
    # query_raw: str        # Instance attribute set in __init__
    # alias: str            # Instance attribute set in __init__
    
    @staticmethod
    def of(subquery: Any, alias: Optional[str]) -> "SubQuery":
        """Abstract factory method for creating SubQuery from parser objects"""

Path

Represents file paths and URIs for external data sources.

class Path:
    def __init__(self, uri: str):
        """
        Create a path object.
        
        Parameters:
        - uri: file path or URI
        """
    
    # uri: str              # Instance attribute set in __init__

Usage Examples

Basic Table and Column Creation

from sqllineage.core.models import Table, Column, Schema

# Create schema
analytics_schema = Schema("analytics")

# Create table with schema
customer_table = Table("customers", schema=analytics_schema)
print(customer_table)  # analytics.customers

# Create table with qualified name
orders_table = Table("sales.orders")
print(orders_table.schema)  # sales
print(orders_table.raw_name)  # orders

# Create columns
customer_id = Column("customer_id")
order_total = Column("total_amount")

Working with Aliases

# Table with alias
customer_table = Table("customers", alias="c")
print(customer_table.alias)  # c

# Check column alias sources
column = Column("customer_name")
if column.from_alias:
    print("Column comes from table alias")

Schema Handling

# Default schema
default_schema = Schema()
print(bool(default_schema))  # False (unknown schema)

# Named schema
named_schema = Schema("production")
print(bool(named_schema))  # True
print(named_schema.raw_name)  # production

Subquery Representation

# Subqueries are typically created by the parser
# but can be constructed manually for testing
subquery_sql = "(SELECT customer_id, COUNT(*) FROM orders GROUP BY customer_id)"
# subquery = SubQuery(parsed_query, subquery_sql, "order_counts")

Complex Table Relationships

# Multi-level schema qualification
# Some databases support database.schema.table format
try:
    table = Table("prod_db.analytics.customer_summary")
    print(f"Schema: {table.schema}, Table: {table.raw_name}")
except SQLLineageException as e:
    print(f"Invalid table format: {e}")

Column Lineage Tracking

# Columns can track their source relationships
source_col = Column("customer_id")
target_col = Column("cust_id")

# Parent table assignment (typically done by parser)
source_col.parent = Table("raw.customers")
target_col.parent = Table("analytics.customer_summary")

print(f"Source: {source_col.parent}.{source_col.raw_name}")
print(f"Target: {target_col.parent}.{target_col.raw_name}")

Working with File Paths

# For SQL that references files (e.g., Spark, BigQuery)
data_path = Path("s3://data-lake/raw/customers.parquet")
print(data_path.uri)  # s3://data-lake/raw/customers.parquet

# Local file paths
local_path = Path("/data/exports/customer_data.csv")
print(local_path.uri)  # /data/exports/customer_data.csv

Install with Tessl CLI

npx tessl i tessl/pypi-sqllineage

docs

cli-interface.md

configuration.md

data-models.md

index.md

lineage-runner.md

metadata-providers.md

visualization-export.md

tile.json