0
# Graph Analysis and Visualization
1
2
Integration with NetworkX for graph analysis and visualization capabilities, including conversion to/from graph formats and DOT export for graphical rendering. Enables provenance graph analysis and visual representation of PROV documents.
3
4
## Capabilities
5
6
### Graph Conversion Functions
7
8
Convert between PROV documents and NetworkX graph representations.
9
10
```python { .api }
11
def prov_to_graph(prov_document):
12
"""
13
Convert a PROV document to a NetworkX MultiDiGraph.
14
15
Args:
16
prov_document (ProvDocument): PROV document to convert
17
18
Returns:
19
networkx.MultiDiGraph: Graph representation of the provenance
20
21
Notes:
22
- Nodes represent PROV elements (entities, activities, agents)
23
- Edges represent PROV relationships
24
- Node and edge attributes preserve PROV metadata
25
- Multiple edges between same nodes are supported (MultiDiGraph)
26
"""
27
28
def graph_to_prov(g):
29
"""
30
Convert a NetworkX MultiDiGraph back to a PROV document.
31
32
Args:
33
g (networkx.MultiDiGraph): Graph to convert
34
35
Returns:
36
ProvDocument: PROV document reconstructed from graph
37
38
Notes:
39
- Requires properly formatted node and edge attributes
40
- Node types determine PROV element types
41
- Edge types determine PROV relationship types
42
"""
43
```
44
45
### DOT Visualization
46
47
Generate DOT format for graphical rendering with Graphviz.
48
49
```python { .api }
50
def prov_to_dot(bundle, show_nary=True, use_labels=False, direction="BT",
51
show_element_attributes=True, show_relation_attributes=True):
52
"""
53
Convert a PROV bundle to DOT graph format for visualization.
54
55
Args:
56
bundle (ProvBundle): PROV bundle to visualize
57
show_nary (bool): Show n-ary relations as nodes (default: True)
58
use_labels (bool): Use labels instead of identifiers (default: False)
59
direction (str): Graph direction - "BT", "TB", "LR", "RL" (default: "BT")
60
show_element_attributes (bool): Show element attributes (default: True)
61
show_relation_attributes (bool): Show relation attributes (default: True)
62
63
Returns:
64
pydot.Dot: DOT graph object that can be rendered to various formats
65
66
Notes:
67
- Requires pydot and graphviz for rendering
68
- Supports various output formats: PNG, SVG, PDF, etc.
69
- Direction: BT=bottom-to-top, TB=top-to-bottom, LR=left-to-right, RL=right-to-left
70
"""
71
```
72
73
### Bundle Plotting Method
74
75
Convenient plotting method available on ProvBundle objects.
76
77
```python { .api }
78
class ProvBundle:
79
def plot(self, filename=None, show_nary=True, use_labels=False, direction="BT"):
80
"""
81
Create a visualization of this bundle.
82
83
Args:
84
filename (str, optional): Output filename (format inferred from extension)
85
show_nary (bool): Show n-ary relations as nodes
86
use_labels (bool): Use labels instead of identifiers
87
direction (str): Graph layout direction
88
89
Returns:
90
Graph object that can be further customized
91
92
Notes:
93
- If filename provided, saves to file
94
- If no filename, returns graph object for interactive use
95
- Supports formats: PNG, SVG, PDF, DOT, etc.
96
"""
97
```
98
99
### Visualization Style Constants
100
101
Predefined styling for DOT graph elements.
102
103
```python { .api }
104
# Generic node styling
105
GENERIC_NODE_STYLE: dict
106
"""Base styling for all nodes."""
107
108
# PROV-specific DOT styling
109
DOT_PROV_STYLE: dict
110
"""PROV element type specific styling including colors and shapes."""
111
112
# Annotation styling
113
ANNOTATION_STYLE: dict
114
"""Styling for annotation elements."""
115
```
116
117
## Usage Examples
118
119
### Basic Graph Conversion
120
121
```python
122
from prov.model import ProvDocument
123
from prov.graph import prov_to_graph, graph_to_prov
124
import networkx as nx
125
126
# Create a PROV document
127
doc = ProvDocument()
128
doc.add_namespace('ex', 'http://example.org/')
129
130
entity1 = doc.entity('ex:entity1')
131
activity1 = doc.activity('ex:activity1')
132
agent1 = doc.agent('ex:agent1')
133
134
doc.generation(entity1, activity1)
135
doc.association(activity1, agent1)
136
137
# Convert to NetworkX graph
138
graph = prov_to_graph(doc)
139
140
# Analyze graph properties
141
print(f"Nodes: {graph.number_of_nodes()}")
142
print(f"Edges: {graph.number_of_edges()}")
143
print(f"Node types: {[graph.nodes[n].get('prov:type') for n in graph.nodes()]}")
144
145
# Graph analysis with NetworkX
146
print(f"Is directed acyclic graph: {nx.is_directed_acyclic_graph(graph)}")
147
print(f"Weakly connected components: {nx.number_weakly_connected_components(graph)}")
148
```
149
150
### Advanced Graph Analysis
151
152
```python
153
import networkx as nx
154
155
# Convert document to graph for analysis
156
graph = prov_to_graph(doc)
157
158
# Find paths between elements
159
try:
160
entity_nodes = [n for n in graph.nodes() if graph.nodes[n].get('prov:type') == 'prov:Entity']
161
if len(entity_nodes) >= 2:
162
paths = list(nx.all_simple_paths(graph, entity_nodes[0], entity_nodes[1]))
163
print(f"Paths between entities: {len(paths)}")
164
except nx.NetworkXNoPath:
165
print("No path found between entities")
166
167
# Analyze centrality
168
centrality = nx.degree_centrality(graph)
169
most_central = max(centrality, key=centrality.get)
170
print(f"Most central node: {most_central} (centrality: {centrality[most_central]:.3f})")
171
172
# Find strongly connected components
173
scc = list(nx.strongly_connected_components(graph))
174
print(f"Strongly connected components: {len(scc)}")
175
```
176
177
### DOT Visualization
178
179
```python
180
from prov.dot import prov_to_dot
181
182
# Create DOT graph for visualization
183
dot_graph = prov_to_dot(doc)
184
185
# Save to various formats
186
dot_graph.write_png('provenance.png')
187
dot_graph.write_svg('provenance.svg')
188
dot_graph.write_pdf('provenance.pdf')
189
dot_graph.write_dot('provenance.dot')
190
191
# Custom visualization options
192
custom_dot = prov_to_dot(doc,
193
show_nary=False, # Hide n-ary relations
194
use_labels=True, # Use labels instead of IDs
195
direction="LR", # Left-to-right layout
196
show_element_attributes=False, # Hide element attrs
197
show_relation_attributes=False) # Hide relation attrs
198
199
custom_dot.write_png('provenance_simple.png')
200
```
201
202
### Bundle Plotting Method
203
204
```python
205
# Direct plotting from bundle
206
doc.plot('visualization.png') # Save to PNG
207
doc.plot('visualization.svg', direction="TB") # Top-to-bottom layout
208
doc.plot('visualization.pdf', use_labels=True) # Use labels
209
210
# Interactive plotting (returns graph object)
211
graph_obj = doc.plot()
212
# Customize the returned graph object further
213
graph_obj.set_bgcolor('lightgray')
214
graph_obj.write_png('custom_viz.png')
215
```
216
217
### Working with Large Documents
218
219
```python
220
# For large documents, visualize specific bundles
221
large_doc = ProvDocument()
222
# ... populate with many records ...
223
224
# Create bundle with subset of data
225
analysis_bundle = large_doc.bundle('ex:analysis_subset')
226
227
# Add only relevant records to bundle
228
entities_of_interest = ['ex:dataset1', 'ex:result1', 'ex:report1']
229
for entity_id in entities_of_interest:
230
records = large_doc.get_record(entity_id)
231
for record in records:
232
analysis_bundle.add_record(record)
233
234
# Visualize the subset
235
analysis_bundle.plot('analysis_subset.png')
236
```
237
238
### Custom Graph Styling
239
240
```python
241
from prov.dot import prov_to_dot, DOT_PROV_STYLE
242
243
# Examine default styling
244
print("Default PROV styling:")
245
for prov_type, style in DOT_PROV_STYLE.items():
246
print(f" {prov_type}: {style}")
247
248
# Create custom visualization with modified styling
249
dot_graph = prov_to_dot(doc)
250
251
# Customize graph attributes
252
dot_graph.set_bgcolor('white')
253
dot_graph.set_fontsize('12')
254
dot_graph.set_rankdir('TB') # Top-to-bottom
255
256
# Save customized version
257
dot_graph.write_svg('custom_styled.svg')
258
```
259
260
### Graph Metrics and Analysis
261
262
```python
263
# Convert to graph for detailed analysis
264
graph = prov_to_graph(doc)
265
266
# Calculate various graph metrics
267
metrics = {
268
'nodes': graph.number_of_nodes(),
269
'edges': graph.number_of_edges(),
270
'density': nx.density(graph),
271
'is_dag': nx.is_directed_acyclic_graph(graph),
272
'weak_components': nx.number_weakly_connected_components(graph),
273
'strong_components': nx.number_strongly_connected_components(graph)
274
}
275
276
print("Graph Metrics:")
277
for metric, value in metrics.items():
278
print(f" {metric}: {value}")
279
280
# Analyze node types
281
node_types = {}
282
for node in graph.nodes():
283
prov_type = graph.nodes[node].get('prov:type', 'unknown')
284
node_types[prov_type] = node_types.get(prov_type, 0) + 1
285
286
print("\nNode Type Distribution:")
287
for node_type, count in node_types.items():
288
print(f" {node_type}: {count}")
289
```
290
291
### Round-trip Conversion
292
293
```python
294
# Test round-trip conversion (PROV -> Graph -> PROV)
295
original_doc = ProvDocument()
296
# ... create some PROV content ...
297
298
# Convert to graph and back
299
graph = prov_to_graph(original_doc)
300
reconstructed_doc = graph_to_prov(graph)
301
302
# Compare documents
303
print(f"Original records: {len(original_doc.records)}")
304
print(f"Reconstructed records: {len(reconstructed_doc.records)}")
305
306
# Check if documents are equivalent
307
print(f"Documents equal: {original_doc == reconstructed_doc}")
308
```
309
310
### Integration with Jupyter Notebooks
311
312
```python
313
from IPython.display import Image, SVG
314
import tempfile
315
import os
316
317
def display_prov_graph(bundle, format='svg'):
318
"""Display PROV graph inline in Jupyter notebook."""
319
with tempfile.NamedTemporaryFile(suffix=f'.{format}', delete=False) as tmp:
320
bundle.plot(tmp.name, use_labels=True)
321
322
if format == 'svg':
323
return SVG(tmp.name)
324
elif format == 'png':
325
return Image(tmp.name)
326
327
# Clean up
328
os.unlink(tmp.name)
329
330
# In Jupyter notebook cell:
331
# display_prov_graph(doc)
332
```
333
334
### Filtering and Subgraph Analysis
335
336
```python
337
# Create subgraphs based on element types
338
graph = prov_to_graph(doc)
339
340
# Extract entity-only subgraph
341
entity_nodes = [n for n in graph.nodes()
342
if graph.nodes[n].get('prov:type') == 'prov:Entity']
343
entity_subgraph = graph.subgraph(entity_nodes)
344
345
# Extract activity workflow
346
activity_nodes = [n for n in graph.nodes()
347
if graph.nodes[n].get('prov:type') == 'prov:Activity']
348
activity_subgraph = graph.subgraph(activity_nodes)
349
350
# Analyze workflows
351
if activity_subgraph.number_of_nodes() > 0:
352
workflow_length = nx.dag_longest_path_length(activity_subgraph)
353
print(f"Longest workflow path: {workflow_length}")
354
```