0
# Visualization and Export
1
2
Web-based visualization and export capabilities for lineage graphs, including support for Cytoscape.js format and interactive browser interface. These features enable interactive exploration of data lineage and integration with external visualization tools.
3
4
## Capabilities
5
6
### Web Visualization
7
8
Interactive web interface for exploring lineage graphs with zooming, panning, and filtering capabilities.
9
10
```python { .api }
11
def draw_lineage_graph(**kwargs) -> None:
12
"""
13
Launch interactive web visualization server for lineage graphs.
14
15
Parameters:
16
- **kwargs: configuration options for the web server
17
- host: server host (default: "localhost")
18
- port: server port (default: 5000)
19
- debug: enable debug mode (default: False)
20
"""
21
22
class SQLLineageApp:
23
"""WSGI application for web visualization interface"""
24
25
def __init__(self) -> None:
26
"""Initialize the web application"""
27
28
def route(self, path: str) -> Callable:
29
"""
30
Register a route handler.
31
32
Parameters:
33
- path: URL path to handle
34
35
Returns:
36
Decorator function for route handlers
37
"""
38
39
def __call__(self, environ, start_response) -> List[bytes]:
40
"""
41
WSGI application interface.
42
43
Parameters:
44
- environ: WSGI environment dictionary
45
- start_response: WSGI start_response callable
46
47
Returns:
48
Response body as list of bytes
49
"""
50
```
51
52
### Data Export
53
54
Functions for converting lineage graphs to various formats for external tools and custom visualizations.
55
56
```python { .api }
57
def to_cytoscape(graph: DiGraph, compound=False) -> List[Dict[str, Dict[str, Any]]]:
58
"""
59
Convert NetworkX graph to Cytoscape.js format for web visualization.
60
61
Parameters:
62
- graph: NetworkX directed graph containing lineage information
63
- compound: whether to use compound nodes for hierarchical grouping
64
65
Returns:
66
List of dictionaries representing nodes and edges in Cytoscape.js format.
67
Each dictionary has a 'data' key containing node/edge properties.
68
"""
69
```
70
71
### LineageRunner Visualization Methods
72
73
Visualization and export methods integrated into the LineageRunner class.
74
75
```python { .api }
76
def draw(self) -> None:
77
"""
78
Launch interactive web visualization of the lineage graph.
79
Opens a web browser with an interactive graph interface.
80
"""
81
82
def to_cytoscape(self, level: LineageLevel = LineageLevel.TABLE) -> List[Dict[str, Dict[str, str]]]:
83
"""
84
Export lineage graph to Cytoscape.js format.
85
86
Parameters:
87
- level: lineage level (LineageLevel.TABLE or LineageLevel.COLUMN)
88
89
Returns:
90
List of dictionaries representing nodes and edges for Cytoscape.js
91
"""
92
93
def print_table_lineage(self) -> None:
94
"""Print table-level lineage to stdout in human-readable format"""
95
96
def print_column_lineage(self) -> None:
97
"""Print column-level lineage to stdout in human-readable format"""
98
```
99
100
## Usage Examples
101
102
### Basic Web Visualization
103
104
```python
105
from sqllineage.runner import LineageRunner
106
107
sql = """
108
INSERT INTO analytics.customer_summary
109
SELECT
110
c.customer_id,
111
c.customer_name,
112
COUNT(o.order_id) as order_count,
113
SUM(o.total_amount) as total_spent
114
FROM raw.customers c
115
LEFT JOIN raw.orders o ON c.customer_id = o.customer_id
116
GROUP BY c.customer_id, c.customer_name
117
"""
118
119
runner = LineageRunner(sql)
120
121
# Launch web visualization (opens browser at http://localhost:5000)
122
runner.draw()
123
```
124
125
### Cytoscape.js Export
126
127
```python
128
from sqllineage.runner import LineageRunner
129
import json
130
131
runner = LineageRunner(sql)
132
133
# Export to Cytoscape.js format
134
cyto_data = runner.to_cytoscape()
135
136
# Save as JSON for web applications
137
with open('lineage_graph.json', 'w') as f:
138
json.dump(cyto_data, f, indent=2)
139
140
print("Graph data structure:")
141
for item in cyto_data[:3]: # Show first 3 items
142
print(json.dumps(item, indent=2))
143
```
144
145
### Custom Web Server Configuration
146
147
```python
148
from sqllineage.drawing import draw_lineage_graph
149
150
# Launch with custom host and port
151
draw_lineage_graph(
152
host="0.0.0.0", # Accept connections from any IP
153
port=8080, # Custom port
154
debug=True # Enable debug mode
155
)
156
```
157
158
### Production WSGI Deployment
159
160
```python
161
from sqllineage.drawing import app
162
163
# The app object is WSGI-compatible and can be deployed with:
164
# - gunicorn: gunicorn sqllineage.drawing:app
165
# - uwsgi: uwsgi --http :8000 --wsgi-file sqllineage/drawing.py --callable app
166
# - mod_wsgi: WSGIScriptAlias / /path/to/sqllineage/drawing.py
167
168
# Custom WSGI configuration
169
def create_app():
170
from sqllineage.drawing import SQLLineageApp
171
return SQLLineageApp()
172
173
application = create_app()
174
```
175
176
### Complex Graph Visualization
177
178
```python
179
from sqllineage.runner import LineageRunner
180
from sqllineage.core.metadata.dummy import DummyMetaDataProvider
181
182
# Complex SQL with multiple CTEs and joins
183
complex_sql = """
184
WITH regional_sales AS (
185
SELECT
186
region,
187
SUM(sales_amount) as total_sales
188
FROM raw.sales_data
189
WHERE sale_date >= '2023-01-01'
190
GROUP BY region
191
),
192
top_regions AS (
193
SELECT region, total_sales
194
FROM regional_sales
195
WHERE total_sales > 1000000
196
),
197
customer_metrics AS (
198
SELECT
199
customer_id,
200
region,
201
COUNT(*) as order_count,
202
AVG(order_value) as avg_order_value
203
FROM raw.orders o
204
JOIN raw.customers c ON o.customer_id = c.customer_id
205
WHERE c.region IN (SELECT region FROM top_regions)
206
GROUP BY customer_id, region
207
)
208
INSERT INTO analytics.high_value_customers
209
SELECT
210
cm.customer_id,
211
cm.region,
212
cm.order_count,
213
cm.avg_order_value,
214
tr.total_sales as region_total_sales
215
FROM customer_metrics cm
216
JOIN top_regions tr ON cm.region = tr.region
217
WHERE cm.avg_order_value > 500
218
"""
219
220
# Provide metadata for better visualization
221
metadata = {
222
"raw.sales_data": ["region", "sales_amount", "sale_date"],
223
"raw.orders": ["customer_id", "order_value", "order_date"],
224
"raw.customers": ["customer_id", "region", "customer_name"],
225
"analytics.high_value_customers": ["customer_id", "region", "order_count", "avg_order_value", "region_total_sales"]
226
}
227
228
provider = DummyMetaDataProvider(metadata)
229
runner = LineageRunner(complex_sql, metadata_provider=provider)
230
231
# Launch visualization for complex lineage
232
runner.draw()
233
```
234
235
### Compound Node Visualization
236
237
```python
238
from sqllineage.io import to_cytoscape
239
from sqllineage.runner import LineageRunner
240
import networkx as nx
241
242
runner = LineageRunner(sql)
243
244
# Access the internal graph for advanced manipulation
245
graph = runner._sql_holder.table_lineage_graph
246
247
# Export with compound nodes for hierarchical visualization
248
compound_data = to_cytoscape(graph, compound=True)
249
250
# Compound format includes parent-child relationships
251
print("Compound visualization data:")
252
for item in compound_data:
253
if 'parent' in item.get('data', {}):
254
print(f"Child node: {item['data']['id']} -> Parent: {item['data']['parent']}")
255
```
256
257
### Custom Visualization Integration
258
259
```python
260
import json
261
from sqllineage.runner import LineageRunner
262
263
class CustomLineageVisualizer:
264
def __init__(self, runner):
265
self.runner = runner
266
self.cyto_data = runner.to_cytoscape()
267
268
def to_d3_format(self):
269
"""Convert to D3.js force-directed graph format"""
270
nodes = []
271
links = []
272
node_ids = set()
273
274
for item in self.cyto_data:
275
data = item['data']
276
if 'source' in data and 'target' in data:
277
# This is an edge
278
links.append({
279
'source': data['source'],
280
'target': data['target']
281
})
282
node_ids.add(data['source'])
283
node_ids.add(data['target'])
284
else:
285
# This is a node
286
nodes.append({
287
'id': data['id'],
288
'group': 1 # Could be enhanced with table type info
289
})
290
291
return {'nodes': nodes, 'links': links}
292
293
def to_graphviz_dot(self):
294
"""Convert to Graphviz DOT format"""
295
dot_lines = ['digraph lineage {']
296
297
for item in self.cyto_data:
298
data = item['data']
299
if 'source' in data and 'target' in data:
300
dot_lines.append(f' "{data["source"]}" -> "{data["target"]}";')
301
302
dot_lines.append('}')
303
return '\n'.join(dot_lines)
304
305
# Usage
306
runner = LineageRunner(sql)
307
visualizer = CustomLineageVisualizer(runner)
308
309
# Generate different format exports
310
d3_data = visualizer.to_d3_format()
311
with open('lineage_d3.json', 'w') as f:
312
json.dump(d3_data, f, indent=2)
313
314
dot_format = visualizer.to_graphviz_dot()
315
with open('lineage.dot', 'w') as f:
316
f.write(dot_format)
317
```
318
319
### Text-Based Output
320
321
```python
322
from sqllineage.runner import LineageRunner
323
import sys
324
325
runner = LineageRunner(sql)
326
327
# Print to console
328
print("=== TABLE LINEAGE ===")
329
runner.print_table_lineage()
330
331
print("\n=== COLUMN LINEAGE ===")
332
runner.print_column_lineage()
333
334
# Redirect to file
335
with open('lineage_report.txt', 'w') as f:
336
# Temporarily redirect stdout
337
original_stdout = sys.stdout
338
sys.stdout = f
339
340
print("SQL Lineage Analysis Report")
341
print("=" * 50)
342
print("\nTable-Level Lineage:")
343
runner.print_table_lineage()
344
345
print("\nColumn-Level Lineage:")
346
runner.print_column_lineage()
347
348
# Restore stdout
349
sys.stdout = original_stdout
350
351
print("Report saved to lineage_report.txt")
352
```
353
354
### Integration with Jupyter Notebooks
355
356
```python
357
# In Jupyter notebook
358
from sqllineage.runner import LineageRunner
359
from IPython.display import display, HTML, JSON
360
import json
361
362
runner = LineageRunner(sql)
363
364
# Display Cytoscape data in notebook
365
cyto_data = runner.to_cytoscape()
366
display(JSON(cyto_data))
367
368
# Embed web visualization in notebook (requires iframe)
369
html_content = f"""
370
<iframe src="http://localhost:5000" width="100%" height="600px"
371
style="border: 1px solid #ccc;">
372
</iframe>
373
"""
374
375
# Start visualization server in background
376
import threading
377
viz_thread = threading.Thread(target=runner.draw)
378
viz_thread.daemon = True
379
viz_thread.start()
380
381
# Display iframe
382
display(HTML(html_content))
383
```
384
385
### API Endpoint for Visualization
386
387
```python
388
from sqllineage.drawing import SQLLineageApp
389
from sqllineage.runner import LineageRunner
390
import json
391
392
app = SQLLineageApp()
393
394
@app.route('/api/analyze')
395
def analyze_sql_endpoint(request_data):
396
"""Custom API endpoint for SQL analysis"""
397
sql = request_data.get('sql', '')
398
dialect = request_data.get('dialect', 'ansi')
399
400
try:
401
runner = LineageRunner(sql, dialect=dialect)
402
return {
403
'status': 'success',
404
'source_tables': [str(t) for t in runner.source_tables],
405
'target_tables': [str(t) for t in runner.target_tables],
406
'cytoscape_data': runner.to_cytoscape()
407
}
408
except Exception as e:
409
return {
410
'status': 'error',
411
'message': str(e)
412
}
413
414
# The app can be extended with custom routes for specialized visualization needs
415
```