CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-sqllineage

SQL Lineage Analysis Tool powered by Python

Overview
Eval results
Files

visualization-export.mddocs/

Visualization and Export

Web-based visualization and export capabilities for lineage graphs, including support for Cytoscape.js format and interactive browser interface. These features enable interactive exploration of data lineage and integration with external visualization tools.

Capabilities

Web Visualization

Interactive web interface for exploring lineage graphs with zooming, panning, and filtering capabilities.

def draw_lineage_graph(**kwargs) -> None:
    """
    Launch interactive web visualization server for lineage graphs.
    
    Parameters:
    - **kwargs: configuration options for the web server
      - host: server host (default: "localhost")
      - port: server port (default: 5000)
      - debug: enable debug mode (default: False)
    """

class SQLLineageApp:
    """WSGI application for web visualization interface"""
    
    def __init__(self) -> None:
        """Initialize the web application"""
    
    def route(self, path: str) -> Callable:
        """
        Register a route handler.
        
        Parameters:
        - path: URL path to handle
        
        Returns:
        Decorator function for route handlers
        """
    
    def __call__(self, environ, start_response) -> List[bytes]:
        """
        WSGI application interface.
        
        Parameters:
        - environ: WSGI environment dictionary
        - start_response: WSGI start_response callable
        
        Returns:
        Response body as list of bytes
        """

Data Export

Functions for converting lineage graphs to various formats for external tools and custom visualizations.

def to_cytoscape(graph: DiGraph, compound=False) -> List[Dict[str, Dict[str, Any]]]:
    """
    Convert NetworkX graph to Cytoscape.js format for web visualization.
    
    Parameters:
    - graph: NetworkX directed graph containing lineage information
    - compound: whether to use compound nodes for hierarchical grouping
    
    Returns:
    List of dictionaries representing nodes and edges in Cytoscape.js format.
    Each dictionary has a 'data' key containing node/edge properties.
    """

LineageRunner Visualization Methods

Visualization and export methods integrated into the LineageRunner class.

def draw(self) -> None:
    """
    Launch interactive web visualization of the lineage graph.
    Opens a web browser with an interactive graph interface.
    """

def to_cytoscape(self, level: LineageLevel = LineageLevel.TABLE) -> List[Dict[str, Dict[str, str]]]:
    """
    Export lineage graph to Cytoscape.js format.
    
    Parameters:
    - level: lineage level (LineageLevel.TABLE or LineageLevel.COLUMN)
    
    Returns:
    List of dictionaries representing nodes and edges for Cytoscape.js
    """

def print_table_lineage(self) -> None:
    """Print table-level lineage to stdout in human-readable format"""

def print_column_lineage(self) -> None:
    """Print column-level lineage to stdout in human-readable format"""

Usage Examples

Basic Web Visualization

from sqllineage.runner import LineageRunner

sql = """
INSERT INTO analytics.customer_summary
SELECT 
    c.customer_id,
    c.customer_name,
    COUNT(o.order_id) as order_count,
    SUM(o.total_amount) as total_spent
FROM raw.customers c
LEFT JOIN raw.orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.customer_name
"""

runner = LineageRunner(sql)

# Launch web visualization (opens browser at http://localhost:5000)
runner.draw()

Cytoscape.js Export

from sqllineage.runner import LineageRunner
import json

runner = LineageRunner(sql)

# Export to Cytoscape.js format
cyto_data = runner.to_cytoscape()

# Save as JSON for web applications
with open('lineage_graph.json', 'w') as f:
    json.dump(cyto_data, f, indent=2)

print("Graph data structure:")
for item in cyto_data[:3]:  # Show first 3 items
    print(json.dumps(item, indent=2))

Custom Web Server Configuration

from sqllineage.drawing import draw_lineage_graph

# Launch with custom host and port
draw_lineage_graph(
    host="0.0.0.0",  # Accept connections from any IP
    port=8080,       # Custom port
    debug=True       # Enable debug mode
)

Production WSGI Deployment

from sqllineage.drawing import app

# The app object is WSGI-compatible and can be deployed with:
# - gunicorn: gunicorn sqllineage.drawing:app
# - uwsgi: uwsgi --http :8000 --wsgi-file sqllineage/drawing.py --callable app
# - mod_wsgi: WSGIScriptAlias / /path/to/sqllineage/drawing.py

# Custom WSGI configuration
def create_app():
    from sqllineage.drawing import SQLLineageApp
    return SQLLineageApp()

application = create_app()

Complex Graph Visualization

from sqllineage.runner import LineageRunner
from sqllineage.core.metadata.dummy import DummyMetaDataProvider

# Complex SQL with multiple CTEs and joins
complex_sql = """
WITH regional_sales AS (
    SELECT 
        region,
        SUM(sales_amount) as total_sales
    FROM raw.sales_data
    WHERE sale_date >= '2023-01-01'
    GROUP BY region
),
top_regions AS (
    SELECT region, total_sales
    FROM regional_sales 
    WHERE total_sales > 1000000
),
customer_metrics AS (
    SELECT 
        customer_id,
        region,
        COUNT(*) as order_count,
        AVG(order_value) as avg_order_value
    FROM raw.orders o
    JOIN raw.customers c ON o.customer_id = c.customer_id
    WHERE c.region IN (SELECT region FROM top_regions)
    GROUP BY customer_id, region
)
INSERT INTO analytics.high_value_customers
SELECT 
    cm.customer_id,
    cm.region,
    cm.order_count,
    cm.avg_order_value,
    tr.total_sales as region_total_sales
FROM customer_metrics cm
JOIN top_regions tr ON cm.region = tr.region
WHERE cm.avg_order_value > 500
"""

# Provide metadata for better visualization
metadata = {
    "raw.sales_data": ["region", "sales_amount", "sale_date"],
    "raw.orders": ["customer_id", "order_value", "order_date"],
    "raw.customers": ["customer_id", "region", "customer_name"],
    "analytics.high_value_customers": ["customer_id", "region", "order_count", "avg_order_value", "region_total_sales"]
}

provider = DummyMetaDataProvider(metadata)
runner = LineageRunner(complex_sql, metadata_provider=provider)

# Launch visualization for complex lineage
runner.draw()

Compound Node Visualization

from sqllineage.io import to_cytoscape
from sqllineage.runner import LineageRunner
import networkx as nx

runner = LineageRunner(sql)

# Access the internal graph for advanced manipulation
graph = runner._sql_holder.table_lineage_graph

# Export with compound nodes for hierarchical visualization
compound_data = to_cytoscape(graph, compound=True)

# Compound format includes parent-child relationships
print("Compound visualization data:")
for item in compound_data:
    if 'parent' in item.get('data', {}):
        print(f"Child node: {item['data']['id']} -> Parent: {item['data']['parent']}")

Custom Visualization Integration

import json
from sqllineage.runner import LineageRunner

class CustomLineageVisualizer:
    def __init__(self, runner):
        self.runner = runner
        self.cyto_data = runner.to_cytoscape()
    
    def to_d3_format(self):
        """Convert to D3.js force-directed graph format"""
        nodes = []
        links = []
        node_ids = set()
        
        for item in self.cyto_data:
            data = item['data']
            if 'source' in data and 'target' in data:
                # This is an edge
                links.append({
                    'source': data['source'],
                    'target': data['target']
                })
                node_ids.add(data['source'])
                node_ids.add(data['target'])
            else:
                # This is a node
                nodes.append({
                    'id': data['id'],
                    'group': 1  # Could be enhanced with table type info
                })
        
        return {'nodes': nodes, 'links': links}
    
    def to_graphviz_dot(self):
        """Convert to Graphviz DOT format"""
        dot_lines = ['digraph lineage {']
        
        for item in self.cyto_data:
            data = item['data']
            if 'source' in data and 'target' in data:
                dot_lines.append(f'  "{data["source"]}" -> "{data["target"]}";')
        
        dot_lines.append('}')
        return '\n'.join(dot_lines)

# Usage
runner = LineageRunner(sql)
visualizer = CustomLineageVisualizer(runner)

# Generate different format exports
d3_data = visualizer.to_d3_format()
with open('lineage_d3.json', 'w') as f:
    json.dump(d3_data, f, indent=2)

dot_format = visualizer.to_graphviz_dot()
with open('lineage.dot', 'w') as f:
    f.write(dot_format)

Text-Based Output

from sqllineage.runner import LineageRunner
import sys

runner = LineageRunner(sql)

# Print to console
print("=== TABLE LINEAGE ===")
runner.print_table_lineage()

print("\n=== COLUMN LINEAGE ===")
runner.print_column_lineage()

# Redirect to file
with open('lineage_report.txt', 'w') as f:
    # Temporarily redirect stdout
    original_stdout = sys.stdout
    sys.stdout = f
    
    print("SQL Lineage Analysis Report")
    print("=" * 50)
    print("\nTable-Level Lineage:")
    runner.print_table_lineage()
    
    print("\nColumn-Level Lineage:")
    runner.print_column_lineage()
    
    # Restore stdout
    sys.stdout = original_stdout

print("Report saved to lineage_report.txt")

Integration with Jupyter Notebooks

# In Jupyter notebook
from sqllineage.runner import LineageRunner
from IPython.display import display, HTML, JSON
import json

runner = LineageRunner(sql)

# Display Cytoscape data in notebook
cyto_data = runner.to_cytoscape()
display(JSON(cyto_data))

# Embed web visualization in notebook (requires iframe)
html_content = f"""
<iframe src="http://localhost:5000" width="100%" height="600px" 
        style="border: 1px solid #ccc;">
</iframe>
"""

# Start visualization server in background
import threading
viz_thread = threading.Thread(target=runner.draw)
viz_thread.daemon = True
viz_thread.start()

# Display iframe
display(HTML(html_content))

API Endpoint for Visualization

from sqllineage.drawing import SQLLineageApp
from sqllineage.runner import LineageRunner
import json

app = SQLLineageApp()

@app.route('/api/analyze')
def analyze_sql_endpoint(request_data):
    """Custom API endpoint for SQL analysis"""
    sql = request_data.get('sql', '')
    dialect = request_data.get('dialect', 'ansi')
    
    try:
        runner = LineageRunner(sql, dialect=dialect)
        return {
            'status': 'success',
            'source_tables': [str(t) for t in runner.source_tables],
            'target_tables': [str(t) for t in runner.target_tables],
            'cytoscape_data': runner.to_cytoscape()
        }
    except Exception as e:
        return {
            'status': 'error',
            'message': str(e)
        }

# The app can be extended with custom routes for specialized visualization needs

Install with Tessl CLI

npx tessl i tessl/pypi-sqllineage

docs

cli-interface.md

configuration.md

data-models.md

index.md

lineage-runner.md

metadata-providers.md

visualization-export.md

tile.json