0
# Document and Bundle Management
1
2
Core functionality for creating, managing, and organizing PROV documents and bundles. ProvDocument serves as the root container while ProvBundle provides logical grouping with namespace management and record organization.
3
4
## Capabilities
5
6
### ProvDocument
7
8
The main container for all provenance information, extending ProvBundle with serialization capabilities and bundle management.
9
10
```python { .api }
11
class ProvDocument(ProvBundle):
12
def __init__(self, records=None, namespaces=None):
13
"""
14
Create a new PROV document.
15
16
Args:
17
records (iterable, optional): Initial records to add
18
namespaces (dict or iterable, optional): Initial namespaces
19
"""
20
21
def serialize(self, destination, format, **args):
22
"""
23
Serialize the document to various formats.
24
25
Args:
26
destination (str or file-like): Output destination
27
format (str): Output format ('json', 'xml', 'rdf', 'provn')
28
**args: Format-specific arguments
29
"""
30
31
@staticmethod
32
def deserialize(source=None, content=None, format="json", **args):
33
"""
34
Deserialize a ProvDocument from source (stream/file) or string content.
35
36
Args:
37
source (IOBase or PathLike, optional): Stream or file path to deserialize from
38
content (str or bytes, optional): String content to deserialize from
39
format (str): Serialization format ('json', 'xml', 'rdf', 'provn')
40
**args: Format-specific arguments
41
42
Returns:
43
ProvDocument: The deserialized document
44
45
Note:
46
Either source or content must be provided, not both
47
"""
48
49
def flattened(self):
50
"""
51
Return a flattened version with all bundles merged.
52
53
Returns:
54
ProvDocument: Flattened document
55
"""
56
57
def unified(self):
58
"""
59
Return unified document with equivalent records merged.
60
61
Returns:
62
ProvDocument: Unified document
63
"""
64
65
def update(self, other):
66
"""
67
Append all records from another document/bundle into this document.
68
69
Args:
70
other (ProvBundle): Source document or bundle to merge from
71
"""
72
73
def add_bundle(self, bundle, identifier=None):
74
"""
75
Add a bundle to this document.
76
77
Args:
78
bundle (ProvBundle): Bundle to add
79
identifier (QualifiedName, optional): Bundle identifier
80
81
Returns:
82
ProvBundle: The added bundle
83
"""
84
85
def bundle(self, identifier):
86
"""
87
Create or retrieve a bundle with the given identifier.
88
89
Args:
90
identifier (QualifiedName): Bundle identifier
91
92
Returns:
93
ProvBundle: New or existing bundle
94
"""
95
96
@property
97
def bundles(self):
98
"""
99
Iterable of all bundles in this document.
100
101
Returns:
102
Iterable[ProvBundle]: Document bundles
103
"""
104
```
105
106
### ProvBundle
107
108
Container for PROV records with namespace management and element creation methods.
109
110
```python { .api }
111
class ProvBundle:
112
def __init__(self, records=None, identifier=None, namespaces=None, document=None):
113
"""
114
Create a new PROV bundle.
115
116
Args:
117
records (iterable, optional): Initial records
118
identifier (QualifiedName, optional): Bundle identifier
119
namespaces (dict or iterable, optional): Initial namespaces
120
document (ProvDocument, optional): Parent document
121
"""
122
123
# Namespace Management
124
def add_namespace(self, namespace_or_prefix, uri=None):
125
"""
126
Add a namespace to this bundle.
127
128
Args:
129
namespace_or_prefix (Namespace or str): Namespace object or prefix
130
uri (str, optional): URI if prefix provided
131
132
Returns:
133
Namespace: The added namespace
134
"""
135
136
def set_default_namespace(self, uri):
137
"""
138
Set the default namespace URI.
139
140
Args:
141
uri (str): Default namespace URI
142
"""
143
144
def get_default_namespace(self):
145
"""
146
Get the default namespace.
147
148
Returns:
149
Namespace: Default namespace or None
150
"""
151
152
def get_registered_namespaces(self):
153
"""
154
Get all registered namespaces.
155
156
Returns:
157
Iterable[Namespace]: Registered namespaces
158
"""
159
160
# Element Creation
161
def entity(self, identifier, other_attributes=None):
162
"""
163
Create and add an entity.
164
165
Args:
166
identifier (QualifiedName or str): Entity identifier
167
other_attributes (dict, optional): Additional attributes
168
169
Returns:
170
ProvEntity: Created entity
171
"""
172
173
def activity(self, identifier, startTime=None, endTime=None, other_attributes=None):
174
"""
175
Create and add an activity.
176
177
Args:
178
identifier (QualifiedName or str): Activity identifier
179
startTime (datetime or str, optional): Start time
180
endTime (datetime or str, optional): End time
181
other_attributes (dict, optional): Additional attributes
182
183
Returns:
184
ProvActivity: Created activity
185
"""
186
187
def agent(self, identifier, other_attributes=None):
188
"""
189
Create and add an agent.
190
191
Args:
192
identifier (QualifiedName or str): Agent identifier
193
other_attributes (dict, optional): Additional attributes
194
195
Returns:
196
ProvAgent: Created agent
197
"""
198
199
def collection(self, identifier, other_attributes=None):
200
"""
201
Create and add a collection entity.
202
203
Args:
204
identifier (QualifiedName or str): Collection identifier
205
other_attributes (dict, optional): Additional attributes
206
207
Returns:
208
ProvEntity: Created collection entity
209
"""
210
211
# Record Management
212
def get_records(self, class_or_type_or_tuple=None):
213
"""
214
Get records filtered by type.
215
216
Args:
217
class_or_type_or_tuple (type or tuple, optional): Filter criteria
218
219
Returns:
220
list[ProvRecord]: Matching records
221
"""
222
223
def get_record(self, identifier):
224
"""
225
Get records with specific identifier.
226
227
Args:
228
identifier (QualifiedName): Record identifier
229
230
Returns:
231
list[ProvRecord]: Records with identifier
232
"""
233
234
def add_record(self, record):
235
"""
236
Add a record to this bundle.
237
238
Args:
239
record (ProvRecord): Record to add
240
"""
241
242
# Utilities
243
def unified(self):
244
"""
245
Return unified bundle with equivalent records merged.
246
247
Returns:
248
ProvBundle: Unified bundle
249
"""
250
251
def update(self, other):
252
"""
253
Update this bundle with records from another.
254
255
Args:
256
other (ProvBundle): Source bundle
257
"""
258
259
def is_document(self):
260
"""
261
Check if this is a document.
262
263
Returns:
264
bool: False for ProvBundle, True for ProvDocument
265
"""
266
267
def is_bundle(self):
268
"""
269
Check if this is a bundle.
270
271
Returns:
272
bool: True for ProvBundle
273
"""
274
275
def has_bundles(self):
276
"""
277
Check if this bundle contains sub-bundles.
278
279
Returns:
280
bool: True if contains bundles
281
"""
282
283
def plot(self, filename=None, show_nary=True, use_labels=False):
284
"""
285
Create a visualization of this bundle.
286
287
Args:
288
filename (str, optional): Output filename
289
show_nary (bool): Show n-ary relations
290
use_labels (bool): Use labels instead of identifiers
291
292
Returns:
293
Graph object
294
"""
295
296
def get_provn(self, _indent_level=0):
297
"""
298
Get PROV-N representation of this bundle.
299
300
Args:
301
_indent_level (int): Indentation level for formatting
302
303
Returns:
304
str: PROV-N string representation
305
"""
306
307
# Properties
308
@property
309
def identifier(self):
310
"""Bundle identifier."""
311
312
@property
313
def records(self):
314
"""List of all records in bundle."""
315
316
@property
317
def namespaces(self):
318
"""Set of namespaces in bundle."""
319
320
@property
321
def default_ns_uri(self):
322
"""Default namespace URI."""
323
```
324
325
### Convenience Functions
326
327
```python { .api }
328
def read(source, format=None):
329
"""
330
Convenience function for reading PROV documents with automatic format detection.
331
332
Args:
333
source (str or PathLike): Source file path or file-like object
334
format (str, optional): Format hint ('json', 'xml', 'rdf', 'provn')
335
336
Returns:
337
ProvDocument: Loaded document
338
339
Raises:
340
TypeError: If format cannot be detected
341
"""
342
```
343
344
## Usage Examples
345
346
### Creating and Managing Documents
347
348
```python
349
import prov
350
from prov.model import ProvDocument, Namespace
351
352
# Create a new document
353
doc = ProvDocument()
354
355
# Add namespaces
356
ex = Namespace('ex', 'http://example.org/')
357
doc.add_namespace(ex)
358
doc.set_default_namespace('http://example.org/')
359
360
# Create elements
361
entity1 = doc.entity('ex:entity1')
362
activity1 = doc.activity('ex:activity1')
363
364
# Serialize to file
365
doc.serialize('provenance.json', format='json')
366
doc.serialize('provenance.xml', format='xml')
367
368
# Load document
369
loaded_doc = prov.read('provenance.json')
370
```
371
372
### Working with Bundles
373
374
```python
375
# Create document with bundles
376
doc = ProvDocument()
377
bundle1 = doc.bundle('ex:bundle1')
378
bundle2 = doc.bundle('ex:bundle2')
379
380
# Add elements to specific bundles
381
bundle1.entity('ex:entity1')
382
bundle2.entity('ex:entity2')
383
384
# Access all bundles
385
for bundle in doc.bundles:
386
print(f"Bundle: {bundle.identifier}")
387
for record in bundle.records:
388
print(f" Record: {record.identifier}")
389
```
390
391
### Namespace Management
392
393
```python
394
# Multiple ways to add namespaces
395
doc.add_namespace('ex', 'http://example.org/')
396
doc.add_namespace(Namespace('foaf', 'http://xmlns.com/foaf/0.1/'))
397
398
# Use namespaces
399
ex = doc.get_namespace('http://example.org/')
400
entity = doc.entity(ex['myentity']) # Creates ex:myentity
401
402
# Set default namespace
403
doc.set_default_namespace('http://example.org/')
404
entity2 = doc.entity('entity2') # Uses default namespace
405
```