SQL Lineage Analysis Tool powered by Python
Web-based visualization and export capabilities for lineage graphs, including support for Cytoscape.js format and interactive browser interface. These features enable interactive exploration of data lineage and integration with external visualization tools.
Interactive web interface for exploring lineage graphs with zooming, panning, and filtering capabilities.
def draw_lineage_graph(**kwargs) -> None:
"""
Launch interactive web visualization server for lineage graphs.
Parameters:
- **kwargs: configuration options for the web server
- host: server host (default: "localhost")
- port: server port (default: 5000)
- debug: enable debug mode (default: False)
"""
class SQLLineageApp:
"""WSGI application for web visualization interface"""
def __init__(self) -> None:
"""Initialize the web application"""
def route(self, path: str) -> Callable:
"""
Register a route handler.
Parameters:
- path: URL path to handle
Returns:
Decorator function for route handlers
"""
def __call__(self, environ, start_response) -> List[bytes]:
"""
WSGI application interface.
Parameters:
- environ: WSGI environment dictionary
- start_response: WSGI start_response callable
Returns:
Response body as list of bytes
"""Functions for converting lineage graphs to various formats for external tools and custom visualizations.
def to_cytoscape(graph: DiGraph, compound=False) -> List[Dict[str, Dict[str, Any]]]:
"""
Convert NetworkX graph to Cytoscape.js format for web visualization.
Parameters:
- graph: NetworkX directed graph containing lineage information
- compound: whether to use compound nodes for hierarchical grouping
Returns:
List of dictionaries representing nodes and edges in Cytoscape.js format.
Each dictionary has a 'data' key containing node/edge properties.
"""Visualization and export methods integrated into the LineageRunner class.
def draw(self) -> None:
"""
Launch interactive web visualization of the lineage graph.
Opens a web browser with an interactive graph interface.
"""
def to_cytoscape(self, level: LineageLevel = LineageLevel.TABLE) -> List[Dict[str, Dict[str, str]]]:
"""
Export lineage graph to Cytoscape.js format.
Parameters:
- level: lineage level (LineageLevel.TABLE or LineageLevel.COLUMN)
Returns:
List of dictionaries representing nodes and edges for Cytoscape.js
"""
def print_table_lineage(self) -> None:
"""Print table-level lineage to stdout in human-readable format"""
def print_column_lineage(self) -> None:
"""Print column-level lineage to stdout in human-readable format"""from sqllineage.runner import LineageRunner
sql = """
INSERT INTO analytics.customer_summary
SELECT
c.customer_id,
c.customer_name,
COUNT(o.order_id) as order_count,
SUM(o.total_amount) as total_spent
FROM raw.customers c
LEFT JOIN raw.orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.customer_name
"""
runner = LineageRunner(sql)
# Launch web visualization (opens browser at http://localhost:5000)
runner.draw()from sqllineage.runner import LineageRunner
import json
runner = LineageRunner(sql)
# Export to Cytoscape.js format
cyto_data = runner.to_cytoscape()
# Save as JSON for web applications
with open('lineage_graph.json', 'w') as f:
json.dump(cyto_data, f, indent=2)
print("Graph data structure:")
for item in cyto_data[:3]: # Show first 3 items
print(json.dumps(item, indent=2))from sqllineage.drawing import draw_lineage_graph
# Launch with custom host and port
draw_lineage_graph(
host="0.0.0.0", # Accept connections from any IP
port=8080, # Custom port
debug=True # Enable debug mode
)from sqllineage.drawing import app
# The app object is WSGI-compatible and can be deployed with:
# - gunicorn: gunicorn sqllineage.drawing:app
# - uwsgi: uwsgi --http :8000 --wsgi-file sqllineage/drawing.py --callable app
# - mod_wsgi: WSGIScriptAlias / /path/to/sqllineage/drawing.py
# Custom WSGI configuration
def create_app():
from sqllineage.drawing import SQLLineageApp
return SQLLineageApp()
application = create_app()from sqllineage.runner import LineageRunner
from sqllineage.core.metadata.dummy import DummyMetaDataProvider
# Complex SQL with multiple CTEs and joins
complex_sql = """
WITH regional_sales AS (
SELECT
region,
SUM(sales_amount) as total_sales
FROM raw.sales_data
WHERE sale_date >= '2023-01-01'
GROUP BY region
),
top_regions AS (
SELECT region, total_sales
FROM regional_sales
WHERE total_sales > 1000000
),
customer_metrics AS (
SELECT
customer_id,
region,
COUNT(*) as order_count,
AVG(order_value) as avg_order_value
FROM raw.orders o
JOIN raw.customers c ON o.customer_id = c.customer_id
WHERE c.region IN (SELECT region FROM top_regions)
GROUP BY customer_id, region
)
INSERT INTO analytics.high_value_customers
SELECT
cm.customer_id,
cm.region,
cm.order_count,
cm.avg_order_value,
tr.total_sales as region_total_sales
FROM customer_metrics cm
JOIN top_regions tr ON cm.region = tr.region
WHERE cm.avg_order_value > 500
"""
# Provide metadata for better visualization
metadata = {
"raw.sales_data": ["region", "sales_amount", "sale_date"],
"raw.orders": ["customer_id", "order_value", "order_date"],
"raw.customers": ["customer_id", "region", "customer_name"],
"analytics.high_value_customers": ["customer_id", "region", "order_count", "avg_order_value", "region_total_sales"]
}
provider = DummyMetaDataProvider(metadata)
runner = LineageRunner(complex_sql, metadata_provider=provider)
# Launch visualization for complex lineage
runner.draw()from sqllineage.io import to_cytoscape
from sqllineage.runner import LineageRunner
import networkx as nx
runner = LineageRunner(sql)
# Access the internal graph for advanced manipulation
graph = runner._sql_holder.table_lineage_graph
# Export with compound nodes for hierarchical visualization
compound_data = to_cytoscape(graph, compound=True)
# Compound format includes parent-child relationships
print("Compound visualization data:")
for item in compound_data:
if 'parent' in item.get('data', {}):
print(f"Child node: {item['data']['id']} -> Parent: {item['data']['parent']}")import json
from sqllineage.runner import LineageRunner
class CustomLineageVisualizer:
def __init__(self, runner):
self.runner = runner
self.cyto_data = runner.to_cytoscape()
def to_d3_format(self):
"""Convert to D3.js force-directed graph format"""
nodes = []
links = []
node_ids = set()
for item in self.cyto_data:
data = item['data']
if 'source' in data and 'target' in data:
# This is an edge
links.append({
'source': data['source'],
'target': data['target']
})
node_ids.add(data['source'])
node_ids.add(data['target'])
else:
# This is a node
nodes.append({
'id': data['id'],
'group': 1 # Could be enhanced with table type info
})
return {'nodes': nodes, 'links': links}
def to_graphviz_dot(self):
"""Convert to Graphviz DOT format"""
dot_lines = ['digraph lineage {']
for item in self.cyto_data:
data = item['data']
if 'source' in data and 'target' in data:
dot_lines.append(f' "{data["source"]}" -> "{data["target"]}";')
dot_lines.append('}')
return '\n'.join(dot_lines)
# Usage
runner = LineageRunner(sql)
visualizer = CustomLineageVisualizer(runner)
# Generate different format exports
d3_data = visualizer.to_d3_format()
with open('lineage_d3.json', 'w') as f:
json.dump(d3_data, f, indent=2)
dot_format = visualizer.to_graphviz_dot()
with open('lineage.dot', 'w') as f:
f.write(dot_format)from sqllineage.runner import LineageRunner
import sys
runner = LineageRunner(sql)
# Print to console
print("=== TABLE LINEAGE ===")
runner.print_table_lineage()
print("\n=== COLUMN LINEAGE ===")
runner.print_column_lineage()
# Redirect to file
with open('lineage_report.txt', 'w') as f:
# Temporarily redirect stdout
original_stdout = sys.stdout
sys.stdout = f
print("SQL Lineage Analysis Report")
print("=" * 50)
print("\nTable-Level Lineage:")
runner.print_table_lineage()
print("\nColumn-Level Lineage:")
runner.print_column_lineage()
# Restore stdout
sys.stdout = original_stdout
print("Report saved to lineage_report.txt")# In Jupyter notebook
from sqllineage.runner import LineageRunner
from IPython.display import display, HTML, JSON
import json
runner = LineageRunner(sql)
# Display Cytoscape data in notebook
cyto_data = runner.to_cytoscape()
display(JSON(cyto_data))
# Embed web visualization in notebook (requires iframe)
html_content = f"""
<iframe src="http://localhost:5000" width="100%" height="600px"
style="border: 1px solid #ccc;">
</iframe>
"""
# Start visualization server in background
import threading
viz_thread = threading.Thread(target=runner.draw)
viz_thread.daemon = True
viz_thread.start()
# Display iframe
display(HTML(html_content))from sqllineage.drawing import SQLLineageApp
from sqllineage.runner import LineageRunner
import json
app = SQLLineageApp()
@app.route('/api/analyze')
def analyze_sql_endpoint(request_data):
"""Custom API endpoint for SQL analysis"""
sql = request_data.get('sql', '')
dialect = request_data.get('dialect', 'ansi')
try:
runner = LineageRunner(sql, dialect=dialect)
return {
'status': 'success',
'source_tables': [str(t) for t in runner.source_tables],
'target_tables': [str(t) for t in runner.target_tables],
'cytoscape_data': runner.to_cytoscape()
}
except Exception as e:
return {
'status': 'error',
'message': str(e)
}
# The app can be extended with custom routes for specialized visualization needsInstall with Tessl CLI
npx tessl i tessl/pypi-sqllineage