or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

document-management.mdidentifiers.mdindex.mdprov-elements.mdrelationships.mdserialization.mdvisualization.md

visualization.mddocs/

0

# Graph Analysis and Visualization

1

2

Integration with NetworkX for graph analysis and visualization capabilities, including conversion to/from graph formats and DOT export for graphical rendering. Enables provenance graph analysis and visual representation of PROV documents.

3

4

## Capabilities

5

6

### Graph Conversion Functions

7

8

Convert between PROV documents and NetworkX graph representations.

9

10

```python { .api }

11

def prov_to_graph(prov_document):

12

"""

13

Convert a PROV document to a NetworkX MultiDiGraph.

14

15

Args:

16

prov_document (ProvDocument): PROV document to convert

17

18

Returns:

19

networkx.MultiDiGraph: Graph representation of the provenance

20

21

Notes:

22

- Nodes represent PROV elements (entities, activities, agents)

23

- Edges represent PROV relationships

24

- Node and edge attributes preserve PROV metadata

25

- Multiple edges between same nodes are supported (MultiDiGraph)

26

"""

27

28

def graph_to_prov(g):

29

"""

30

Convert a NetworkX MultiDiGraph back to a PROV document.

31

32

Args:

33

g (networkx.MultiDiGraph): Graph to convert

34

35

Returns:

36

ProvDocument: PROV document reconstructed from graph

37

38

Notes:

39

- Requires properly formatted node and edge attributes

40

- Node types determine PROV element types

41

- Edge types determine PROV relationship types

42

"""

43

```

44

45

### DOT Visualization

46

47

Generate DOT format for graphical rendering with Graphviz.

48

49

```python { .api }

50

def prov_to_dot(bundle, show_nary=True, use_labels=False, direction="BT",

51

show_element_attributes=True, show_relation_attributes=True):

52

"""

53

Convert a PROV bundle to DOT graph format for visualization.

54

55

Args:

56

bundle (ProvBundle): PROV bundle to visualize

57

show_nary (bool): Show n-ary relations as nodes (default: True)

58

use_labels (bool): Use labels instead of identifiers (default: False)

59

direction (str): Graph direction - "BT", "TB", "LR", "RL" (default: "BT")

60

show_element_attributes (bool): Show element attributes (default: True)

61

show_relation_attributes (bool): Show relation attributes (default: True)

62

63

Returns:

64

pydot.Dot: DOT graph object that can be rendered to various formats

65

66

Notes:

67

- Requires pydot and graphviz for rendering

68

- Supports various output formats: PNG, SVG, PDF, etc.

69

- Direction: BT=bottom-to-top, TB=top-to-bottom, LR=left-to-right, RL=right-to-left

70

"""

71

```

72

73

### Bundle Plotting Method

74

75

Convenient plotting method available on ProvBundle objects.

76

77

```python { .api }

78

class ProvBundle:

79

def plot(self, filename=None, show_nary=True, use_labels=False, direction="BT"):

80

"""

81

Create a visualization of this bundle.

82

83

Args:

84

filename (str, optional): Output filename (format inferred from extension)

85

show_nary (bool): Show n-ary relations as nodes

86

use_labels (bool): Use labels instead of identifiers

87

direction (str): Graph layout direction

88

89

Returns:

90

Graph object that can be further customized

91

92

Notes:

93

- If filename provided, saves to file

94

- If no filename, returns graph object for interactive use

95

- Supports formats: PNG, SVG, PDF, DOT, etc.

96

"""

97

```

98

99

### Visualization Style Constants

100

101

Predefined styling for DOT graph elements.

102

103

```python { .api }

104

# Generic node styling

105

GENERIC_NODE_STYLE: dict

106

"""Base styling for all nodes."""

107

108

# PROV-specific DOT styling

109

DOT_PROV_STYLE: dict

110

"""PROV element type specific styling including colors and shapes."""

111

112

# Annotation styling

113

ANNOTATION_STYLE: dict

114

"""Styling for annotation elements."""

115

```

116

117

## Usage Examples

118

119

### Basic Graph Conversion

120

121

```python

122

from prov.model import ProvDocument

123

from prov.graph import prov_to_graph, graph_to_prov

124

import networkx as nx

125

126

# Create a PROV document

127

doc = ProvDocument()

128

doc.add_namespace('ex', 'http://example.org/')

129

130

entity1 = doc.entity('ex:entity1')

131

activity1 = doc.activity('ex:activity1')

132

agent1 = doc.agent('ex:agent1')

133

134

doc.generation(entity1, activity1)

135

doc.association(activity1, agent1)

136

137

# Convert to NetworkX graph

138

graph = prov_to_graph(doc)

139

140

# Analyze graph properties

141

print(f"Nodes: {graph.number_of_nodes()}")

142

print(f"Edges: {graph.number_of_edges()}")

143

print(f"Node types: {[graph.nodes[n].get('prov:type') for n in graph.nodes()]}")

144

145

# Graph analysis with NetworkX

146

print(f"Is directed acyclic graph: {nx.is_directed_acyclic_graph(graph)}")

147

print(f"Weakly connected components: {nx.number_weakly_connected_components(graph)}")

148

```

149

150

### Advanced Graph Analysis

151

152

```python

153

import networkx as nx

154

155

# Convert document to graph for analysis

156

graph = prov_to_graph(doc)

157

158

# Find paths between elements

159

try:

160

entity_nodes = [n for n in graph.nodes() if graph.nodes[n].get('prov:type') == 'prov:Entity']

161

if len(entity_nodes) >= 2:

162

paths = list(nx.all_simple_paths(graph, entity_nodes[0], entity_nodes[1]))

163

print(f"Paths between entities: {len(paths)}")

164

except nx.NetworkXNoPath:

165

print("No path found between entities")

166

167

# Analyze centrality

168

centrality = nx.degree_centrality(graph)

169

most_central = max(centrality, key=centrality.get)

170

print(f"Most central node: {most_central} (centrality: {centrality[most_central]:.3f})")

171

172

# Find strongly connected components

173

scc = list(nx.strongly_connected_components(graph))

174

print(f"Strongly connected components: {len(scc)}")

175

```

176

177

### DOT Visualization

178

179

```python

180

from prov.dot import prov_to_dot

181

182

# Create DOT graph for visualization

183

dot_graph = prov_to_dot(doc)

184

185

# Save to various formats

186

dot_graph.write_png('provenance.png')

187

dot_graph.write_svg('provenance.svg')

188

dot_graph.write_pdf('provenance.pdf')

189

dot_graph.write_dot('provenance.dot')

190

191

# Custom visualization options

192

custom_dot = prov_to_dot(doc,

193

show_nary=False, # Hide n-ary relations

194

use_labels=True, # Use labels instead of IDs

195

direction="LR", # Left-to-right layout

196

show_element_attributes=False, # Hide element attrs

197

show_relation_attributes=False) # Hide relation attrs

198

199

custom_dot.write_png('provenance_simple.png')

200

```

201

202

### Bundle Plotting Method

203

204

```python

205

# Direct plotting from bundle

206

doc.plot('visualization.png') # Save to PNG

207

doc.plot('visualization.svg', direction="TB") # Top-to-bottom layout

208

doc.plot('visualization.pdf', use_labels=True) # Use labels

209

210

# Interactive plotting (returns graph object)

211

graph_obj = doc.plot()

212

# Customize the returned graph object further

213

graph_obj.set_bgcolor('lightgray')

214

graph_obj.write_png('custom_viz.png')

215

```

216

217

### Working with Large Documents

218

219

```python

220

# For large documents, visualize specific bundles

221

large_doc = ProvDocument()

222

# ... populate with many records ...

223

224

# Create bundle with subset of data

225

analysis_bundle = large_doc.bundle('ex:analysis_subset')

226

227

# Add only relevant records to bundle

228

entities_of_interest = ['ex:dataset1', 'ex:result1', 'ex:report1']

229

for entity_id in entities_of_interest:

230

records = large_doc.get_record(entity_id)

231

for record in records:

232

analysis_bundle.add_record(record)

233

234

# Visualize the subset

235

analysis_bundle.plot('analysis_subset.png')

236

```

237

238

### Custom Graph Styling

239

240

```python

241

from prov.dot import prov_to_dot, DOT_PROV_STYLE

242

243

# Examine default styling

244

print("Default PROV styling:")

245

for prov_type, style in DOT_PROV_STYLE.items():

246

print(f" {prov_type}: {style}")

247

248

# Create custom visualization with modified styling

249

dot_graph = prov_to_dot(doc)

250

251

# Customize graph attributes

252

dot_graph.set_bgcolor('white')

253

dot_graph.set_fontsize('12')

254

dot_graph.set_rankdir('TB') # Top-to-bottom

255

256

# Save customized version

257

dot_graph.write_svg('custom_styled.svg')

258

```

259

260

### Graph Metrics and Analysis

261

262

```python

263

# Convert to graph for detailed analysis

264

graph = prov_to_graph(doc)

265

266

# Calculate various graph metrics

267

metrics = {

268

'nodes': graph.number_of_nodes(),

269

'edges': graph.number_of_edges(),

270

'density': nx.density(graph),

271

'is_dag': nx.is_directed_acyclic_graph(graph),

272

'weak_components': nx.number_weakly_connected_components(graph),

273

'strong_components': nx.number_strongly_connected_components(graph)

274

}

275

276

print("Graph Metrics:")

277

for metric, value in metrics.items():

278

print(f" {metric}: {value}")

279

280

# Analyze node types

281

node_types = {}

282

for node in graph.nodes():

283

prov_type = graph.nodes[node].get('prov:type', 'unknown')

284

node_types[prov_type] = node_types.get(prov_type, 0) + 1

285

286

print("\nNode Type Distribution:")

287

for node_type, count in node_types.items():

288

print(f" {node_type}: {count}")

289

```

290

291

### Round-trip Conversion

292

293

```python

294

# Test round-trip conversion (PROV -> Graph -> PROV)

295

original_doc = ProvDocument()

296

# ... create some PROV content ...

297

298

# Convert to graph and back

299

graph = prov_to_graph(original_doc)

300

reconstructed_doc = graph_to_prov(graph)

301

302

# Compare documents

303

print(f"Original records: {len(original_doc.records)}")

304

print(f"Reconstructed records: {len(reconstructed_doc.records)}")

305

306

# Check if documents are equivalent

307

print(f"Documents equal: {original_doc == reconstructed_doc}")

308

```

309

310

### Integration with Jupyter Notebooks

311

312

```python

313

from IPython.display import Image, SVG

314

import tempfile

315

import os

316

317

def display_prov_graph(bundle, format='svg'):

318

"""Display PROV graph inline in Jupyter notebook."""

319

with tempfile.NamedTemporaryFile(suffix=f'.{format}', delete=False) as tmp:

320

bundle.plot(tmp.name, use_labels=True)

321

322

if format == 'svg':

323

return SVG(tmp.name)

324

elif format == 'png':

325

return Image(tmp.name)

326

327

# Clean up

328

os.unlink(tmp.name)

329

330

# In Jupyter notebook cell:

331

# display_prov_graph(doc)

332

```

333

334

### Filtering and Subgraph Analysis

335

336

```python

337

# Create subgraphs based on element types

338

graph = prov_to_graph(doc)

339

340

# Extract entity-only subgraph

341

entity_nodes = [n for n in graph.nodes()

342

if graph.nodes[n].get('prov:type') == 'prov:Entity']

343

entity_subgraph = graph.subgraph(entity_nodes)

344

345

# Extract activity workflow

346

activity_nodes = [n for n in graph.nodes()

347

if graph.nodes[n].get('prov:type') == 'prov:Activity']

348

activity_subgraph = graph.subgraph(activity_nodes)

349

350

# Analyze workflows

351

if activity_subgraph.number_of_nodes() > 0:

352

workflow_length = nx.dag_longest_path_length(activity_subgraph)

353

print(f"Longest workflow path: {workflow_length}")

354

```