A library for W3C Provenance Data Model supporting PROV-JSON, PROV-XML and PROV-O (RDF)
—
Core functionality for creating, managing, and organizing PROV documents and bundles. ProvDocument serves as the root container while ProvBundle provides logical grouping with namespace management and record organization.
The main container for all provenance information, extending ProvBundle with serialization capabilities and bundle management.
class ProvDocument(ProvBundle):
def __init__(self, records=None, namespaces=None):
"""
Create a new PROV document.
Args:
records (iterable, optional): Initial records to add
namespaces (dict or iterable, optional): Initial namespaces
"""
def serialize(self, destination, format, **args):
"""
Serialize the document to various formats.
Args:
destination (str or file-like): Output destination
format (str): Output format ('json', 'xml', 'rdf', 'provn')
**args: Format-specific arguments
"""
@staticmethod
def deserialize(source=None, content=None, format="json", **args):
"""
Deserialize a ProvDocument from source (stream/file) or string content.
Args:
source (IOBase or PathLike, optional): Stream or file path to deserialize from
content (str or bytes, optional): String content to deserialize from
format (str): Serialization format ('json', 'xml', 'rdf', 'provn')
**args: Format-specific arguments
Returns:
ProvDocument: The deserialized document
Note:
Either source or content must be provided, not both
"""
def flattened(self):
"""
Return a flattened version with all bundles merged.
Returns:
ProvDocument: Flattened document
"""
def unified(self):
"""
Return unified document with equivalent records merged.
Returns:
ProvDocument: Unified document
"""
def update(self, other):
"""
Append all records from another document/bundle into this document.
Args:
other (ProvBundle): Source document or bundle to merge from
"""
def add_bundle(self, bundle, identifier=None):
"""
Add a bundle to this document.
Args:
bundle (ProvBundle): Bundle to add
identifier (QualifiedName, optional): Bundle identifier
Returns:
ProvBundle: The added bundle
"""
def bundle(self, identifier):
"""
Create or retrieve a bundle with the given identifier.
Args:
identifier (QualifiedName): Bundle identifier
Returns:
ProvBundle: New or existing bundle
"""
@property
def bundles(self):
"""
Iterable of all bundles in this document.
Returns:
Iterable[ProvBundle]: Document bundles
"""Container for PROV records with namespace management and element creation methods.
class ProvBundle:
def __init__(self, records=None, identifier=None, namespaces=None, document=None):
"""
Create a new PROV bundle.
Args:
records (iterable, optional): Initial records
identifier (QualifiedName, optional): Bundle identifier
namespaces (dict or iterable, optional): Initial namespaces
document (ProvDocument, optional): Parent document
"""
# Namespace Management
def add_namespace(self, namespace_or_prefix, uri=None):
"""
Add a namespace to this bundle.
Args:
namespace_or_prefix (Namespace or str): Namespace object or prefix
uri (str, optional): URI if prefix provided
Returns:
Namespace: The added namespace
"""
def set_default_namespace(self, uri):
"""
Set the default namespace URI.
Args:
uri (str): Default namespace URI
"""
def get_default_namespace(self):
"""
Get the default namespace.
Returns:
Namespace: Default namespace or None
"""
def get_registered_namespaces(self):
"""
Get all registered namespaces.
Returns:
Iterable[Namespace]: Registered namespaces
"""
# Element Creation
def entity(self, identifier, other_attributes=None):
"""
Create and add an entity.
Args:
identifier (QualifiedName or str): Entity identifier
other_attributes (dict, optional): Additional attributes
Returns:
ProvEntity: Created entity
"""
def activity(self, identifier, startTime=None, endTime=None, other_attributes=None):
"""
Create and add an activity.
Args:
identifier (QualifiedName or str): Activity identifier
startTime (datetime or str, optional): Start time
endTime (datetime or str, optional): End time
other_attributes (dict, optional): Additional attributes
Returns:
ProvActivity: Created activity
"""
def agent(self, identifier, other_attributes=None):
"""
Create and add an agent.
Args:
identifier (QualifiedName or str): Agent identifier
other_attributes (dict, optional): Additional attributes
Returns:
ProvAgent: Created agent
"""
def collection(self, identifier, other_attributes=None):
"""
Create and add a collection entity.
Args:
identifier (QualifiedName or str): Collection identifier
other_attributes (dict, optional): Additional attributes
Returns:
ProvEntity: Created collection entity
"""
# Record Management
def get_records(self, class_or_type_or_tuple=None):
"""
Get records filtered by type.
Args:
class_or_type_or_tuple (type or tuple, optional): Filter criteria
Returns:
list[ProvRecord]: Matching records
"""
def get_record(self, identifier):
"""
Get records with specific identifier.
Args:
identifier (QualifiedName): Record identifier
Returns:
list[ProvRecord]: Records with identifier
"""
def add_record(self, record):
"""
Add a record to this bundle.
Args:
record (ProvRecord): Record to add
"""
# Utilities
def unified(self):
"""
Return unified bundle with equivalent records merged.
Returns:
ProvBundle: Unified bundle
"""
def update(self, other):
"""
Update this bundle with records from another.
Args:
other (ProvBundle): Source bundle
"""
def is_document(self):
"""
Check if this is a document.
Returns:
bool: False for ProvBundle, True for ProvDocument
"""
def is_bundle(self):
"""
Check if this is a bundle.
Returns:
bool: True for ProvBundle
"""
def has_bundles(self):
"""
Check if this bundle contains sub-bundles.
Returns:
bool: True if contains bundles
"""
def plot(self, filename=None, show_nary=True, use_labels=False):
"""
Create a visualization of this bundle.
Args:
filename (str, optional): Output filename
show_nary (bool): Show n-ary relations
use_labels (bool): Use labels instead of identifiers
Returns:
Graph object
"""
def get_provn(self, _indent_level=0):
"""
Get PROV-N representation of this bundle.
Args:
_indent_level (int): Indentation level for formatting
Returns:
str: PROV-N string representation
"""
# Properties
@property
def identifier(self):
"""Bundle identifier."""
@property
def records(self):
"""List of all records in bundle."""
@property
def namespaces(self):
"""Set of namespaces in bundle."""
@property
def default_ns_uri(self):
"""Default namespace URI."""def read(source, format=None):
"""
Convenience function for reading PROV documents with automatic format detection.
Args:
source (str or PathLike): Source file path or file-like object
format (str, optional): Format hint ('json', 'xml', 'rdf', 'provn')
Returns:
ProvDocument: Loaded document
Raises:
TypeError: If format cannot be detected
"""import prov
from prov.model import ProvDocument, Namespace
# Create a new document
doc = ProvDocument()
# Add namespaces
ex = Namespace('ex', 'http://example.org/')
doc.add_namespace(ex)
doc.set_default_namespace('http://example.org/')
# Create elements
entity1 = doc.entity('ex:entity1')
activity1 = doc.activity('ex:activity1')
# Serialize to file
doc.serialize('provenance.json', format='json')
doc.serialize('provenance.xml', format='xml')
# Load document
loaded_doc = prov.read('provenance.json')# Create document with bundles
doc = ProvDocument()
bundle1 = doc.bundle('ex:bundle1')
bundle2 = doc.bundle('ex:bundle2')
# Add elements to specific bundles
bundle1.entity('ex:entity1')
bundle2.entity('ex:entity2')
# Access all bundles
for bundle in doc.bundles:
print(f"Bundle: {bundle.identifier}")
for record in bundle.records:
print(f" Record: {record.identifier}")# Multiple ways to add namespaces
doc.add_namespace('ex', 'http://example.org/')
doc.add_namespace(Namespace('foaf', 'http://xmlns.com/foaf/0.1/'))
# Use namespaces
ex = doc.get_namespace('http://example.org/')
entity = doc.entity(ex['myentity']) # Creates ex:myentity
# Set default namespace
doc.set_default_namespace('http://example.org/')
entity2 = doc.entity('entity2') # Uses default namespaceInstall with Tessl CLI
npx tessl i tessl/pypi-prov