Process mining library for discovering, analyzing and visualizing business processes from event data
—
Specialized operations for Object-Centric Event Logs (OCEL) that handle multi-dimensional process data with objects, relationships, and complex event-object interactions. OCEL extends traditional process mining to scenarios with multiple interacting business objects.
Basic operations for extracting information from Object-Centric Event Logs.
def ocel_get_object_types(ocel):
"""
Get list of object types present in the OCEL.
Parameters:
- ocel (OCEL): Object-centric event log
Returns:
List[str]: List of object type names
"""
def ocel_get_attribute_names(ocel):
"""
Get all attribute names from events and objects in OCEL.
Parameters:
- ocel (OCEL): Object-centric event log
Returns:
List[str]: List of attribute names
"""
def ocel_flattening(ocel, object_type):
"""
Flatten OCEL to traditional event log based on specific object type.
Creates a traditional case-activity-timestamp log from object lifecycle.
Parameters:
- ocel (OCEL): Object-centric event log
- object_type (str): Object type to use for flattening
Returns:
pd.DataFrame: Traditional event log format
"""
def ocel_object_type_activities(ocel):
"""
Get activities performed for each object type.
Parameters:
- ocel (OCEL): Object-centric event log
Returns:
Dict[str, Collection[str]]: Activities per object type
"""
def ocel_objects_ot_count(ocel):
"""
Get count of objects per object type.
Parameters:
- ocel (OCEL): Object-centric event log
Returns:
Dict[str, int]: Object counts per type
"""Analytical operations for understanding OCEL characteristics and patterns.
def ocel_objects_interactions_summary(ocel):
"""
Get comprehensive summary of object interactions in OCEL.
Analyzes how different object types interact through events.
Parameters:
- ocel (OCEL): Object-centric event log
Returns:
Dict[str, Any]: Interaction summary including frequencies and patterns
"""
def ocel_temporal_summary(ocel):
"""
Get temporal summary showing time-based patterns in OCEL.
Parameters:
- ocel (OCEL): Object-centric event log
Returns:
Dict[str, Any]: Temporal patterns including event frequencies over time
"""
def ocel_objects_summary(ocel):
"""
Get comprehensive summary of objects in OCEL.
Parameters:
- ocel (OCEL): Object-centric event log
Returns:
Dict[str, Any]: Object statistics including lifecycle information
"""Discover process models from Object-Centric Event Logs using specialized algorithms.
def discover_ocdfg(ocel, **kwargs):
"""
Discover Object-Centric Directly-Follows Graph.
Shows relationships between activities considering object interactions.
Parameters:
- ocel (OCEL): Object-centric event log
- **kwargs: Additional parameters for discovery algorithm
Returns:
Dict[str, Any]: Object-centric DFG structure with object type information
"""
def discover_oc_petri_net(ocel, **kwargs):
"""
Discover Object-Centric Petri Net from OCEL.
Creates Petri net that models object lifecycles and interactions.
Parameters:
- ocel (OCEL): Object-centric event log
- **kwargs: Additional parameters for discovery algorithm
Returns:
Tuple[PetriNet, Marking, Marking]: Object-centric Petri net model
"""
def discover_objects_graph(ocel, **kwargs):
"""
Discover object interaction graph showing object relationships.
Parameters:
- ocel (OCEL): Object-centric event log
- **kwargs: Additional parameters for graph construction
Returns:
Dict[str, Any]: Object interaction graph structure
"""Transform and modify OCEL data for specific analysis needs.
def sample_ocel_objects(ocel, num_objects, object_type=None):
"""
Sample OCEL by selecting specific number of objects.
Parameters:
- ocel (OCEL): Object-centric event log
- num_objects (int): Number of objects to sample
- object_type (Optional[str]): Specific object type to sample from
Returns:
OCEL: Sampled object-centric event log
"""
def sample_ocel_connected_components(ocel, num_cc):
"""
Sample OCEL by selecting connected components.
Maintains object relationships within components.
Parameters:
- ocel (OCEL): Object-centric event log
- num_cc (int): Number of connected components to sample
Returns:
OCEL: Sampled object-centric event log
"""
def ocel_drop_duplicates(ocel):
"""
Remove duplicate events from OCEL based on event attributes.
Parameters:
- ocel (OCEL): Object-centric event log
Returns:
OCEL: Cleaned object-centric event log
"""
def ocel_merge_duplicates(ocel):
"""
Merge duplicate events in OCEL by combining their attributes.
Parameters:
- ocel (OCEL): Object-centric event log
Returns:
OCEL: Merged object-centric event log
"""
def ocel_sort_by_additional_column(ocel, column):
"""
Sort OCEL events by additional column while preserving temporal order.
Parameters:
- ocel (OCEL): Object-centric event log
- column (str): Column name to sort by
Returns:
OCEL: Sorted object-centric event log
"""
def ocel_add_index_based_timedelta(ocel):
"""
Add index-based time delta to OCEL for synthetic temporal ordering.
Parameters:
- ocel (OCEL): Object-centric event log
Returns:
OCEL: Enhanced object-centric event log with time deltas
"""Enrich OCEL with additional information about object relationships and lifecycles.
def ocel_o2o_enrichment(ocel, **kwargs):
"""
Add object-to-object relationships to OCEL.
Discovers and adds relationships between objects based on event co-occurrence.
Parameters:
- ocel (OCEL): Object-centric event log
- **kwargs: Parameters for relationship discovery
Returns:
OCEL: Enriched OCEL with object relationships
"""
def ocel_e2o_lifecycle_enrichment(ocel, **kwargs):
"""
Add event-to-object lifecycle information to OCEL.
Enriches events with object lifecycle context.
Parameters:
- ocel (OCEL): Object-centric event log
- **kwargs: Parameters for lifecycle enrichment
Returns:
OCEL: Enriched OCEL with lifecycle information
"""
def cluster_equivalent_ocel(ocel, **kwargs):
"""
Cluster equivalent objects in OCEL based on behavior patterns.
Groups objects with similar lifecycles and interaction patterns.
Parameters:
- ocel (OCEL): Object-centric event log
- **kwargs: Parameters for clustering algorithm
Returns:
OCEL: OCEL with clustered object information
"""import pm4py
# Load OCEL
ocel = pm4py.read_ocel('ocel_data.csv', objects_path='objects.csv')
# Get basic information
object_types = pm4py.ocel_get_object_types(ocel)
print(f"Object types: {object_types}")
attributes = pm4py.ocel_get_attribute_names(ocel)
print(f"Attributes: {attributes}")
# Get object counts per type
object_counts = pm4py.ocel_objects_ot_count(ocel)
print(f"Object counts: {object_counts}")
# Get activities per object type
activities_per_type = pm4py.ocel_object_type_activities(ocel)
for obj_type, activities in activities_per_type.items():
print(f"{obj_type}: {list(activities)}")import pm4py
# Get comprehensive summaries
objects_summary = pm4py.ocel_objects_summary(ocel)
print("Objects Summary:")
print(f" Total objects: {objects_summary['total_objects']}")
print(f" Average lifecycle length: {objects_summary['avg_lifecycle_length']}")
interactions_summary = pm4py.ocel_objects_interactions_summary(ocel)
print("Interactions Summary:")
for interaction, count in interactions_summary['interaction_counts'].items():
print(f" {interaction}: {count}")
temporal_summary = pm4py.ocel_temporal_summary(ocel)
print("Temporal Summary:")
print(f" Time span: {temporal_summary['time_span']}")
print(f" Peak activity period: {temporal_summary['peak_period']}")import pm4py
# Flatten OCEL for each object type
object_types = pm4py.ocel_get_object_types(ocel)
flattened_logs = {}
for obj_type in object_types:
flattened_log = pm4py.ocel_flattening(ocel, obj_type)
flattened_logs[obj_type] = flattened_log
print(f"Flattened log for {obj_type}:")
print(f" Cases: {flattened_log['case:concept:name'].nunique()}")
print(f" Events: {len(flattened_log)}")
print(f" Activities: {flattened_log['concept:name'].nunique()}")
# Apply traditional process mining to flattened logs
for obj_type, log in flattened_logs.items():
print(f"\nProcess discovery for {obj_type}:")
net, im, fm = pm4py.discover_petri_net_inductive(log)
fitness = pm4py.fitness_alignments(log, net, im, fm)
print(f" Fitness: {fitness['log_fitness']:.3f}")import pm4py
# Discover Object-Centric DFG
ocdfg = pm4py.discover_ocdfg(ocel)
print("OC-DFG discovered")
# Visualize OC-DFG
pm4py.view_ocdfg(ocdfg)
pm4py.save_vis_ocdfg(ocdfg, 'ocdfg.png')
# Discover Object-Centric Petri Net
ocpn = pm4py.discover_oc_petri_net(ocel)
print("OC Petri Net discovered")
# Visualize OC Petri Net
pm4py.view_ocpn(ocpn)
pm4py.save_vis_ocpn(ocpn, 'ocpn.png')
# Discover object interaction graph
obj_graph = pm4py.discover_objects_graph(ocel)
pm4py.view_object_graph(ocel)
pm4py.save_vis_object_graph(ocel, 'object_graph.png')import pm4py
# Sample OCEL by number of objects
sampled_ocel = pm4py.sample_ocel_objects(ocel, 100)
print(f"Sampled OCEL: {len(sampled_ocel.events)} events")
# Sample by specific object type
order_sample = pm4py.sample_ocel_objects(ocel, 50, object_type='Order')
print(f"Order sample: {len(order_sample.events)} events")
# Sample by connected components
cc_sample = pm4py.sample_ocel_connected_components(ocel, 10)
print(f"Connected component sample: {len(cc_sample.events)} events")
# Clean OCEL data
clean_ocel = pm4py.ocel_drop_duplicates(ocel)
print(f"After removing duplicates: {len(clean_ocel.events)} events")
merged_ocel = pm4py.ocel_merge_duplicates(ocel)
print(f"After merging duplicates: {len(merged_ocel.events)} events")import pm4py
# Add object-to-object relationships
enriched_ocel = pm4py.ocel_o2o_enrichment(
ocel,
min_support=0.1, # Minimum support for relationship
window_size=5 # Event window for relationship detection
)
print("Added object-to-object relationships")
# Add lifecycle enrichment
lifecycle_ocel = pm4py.ocel_e2o_lifecycle_enrichment(
ocel,
lifecycle_stages=['start', 'active', 'complete']
)
print("Added lifecycle information")
# Cluster equivalent objects
clustered_ocel = pm4py.cluster_equivalent_ocel(
ocel,
similarity_threshold=0.8,
clustering_method='kmeans'
)
print("Clustered equivalent objects")import pm4py
def analyze_ocel_object_type(ocel, object_type):
"""Complete analysis pipeline for specific object type."""
print(f"Analyzing object type: {object_type}")
# Filter OCEL to focus on specific object type
filtered_ocel = pm4py.filter_ocel_object_types(ocel, [object_type])
# Get basic statistics
obj_count = pm4py.ocel_objects_ot_count(filtered_ocel)[object_type]
activities = pm4py.ocel_object_type_activities(filtered_ocel)[object_type]
print(f" Objects: {obj_count}")
print(f" Activities: {len(activities)} - {list(activities)}")
# Flatten for traditional analysis
flattened = pm4py.ocel_flattening(filtered_ocel, object_type)
# Discover process model
net, im, fm = pm4py.discover_petri_net_inductive(flattened)
# Measure quality
fitness = pm4py.fitness_alignments(flattened, net, im, fm)
precision = pm4py.precision_alignments(flattened, net, im, fm)
print(f" Model Quality - Fitness: {fitness['log_fitness']:.3f}, Precision: {precision:.3f}")
# Create visualizations
pm4py.save_vis_petri_net(net, im, fm, f'{object_type}_petri_net.png')
# Discover OC-DFG for this subset
ocdfg = pm4py.discover_ocdfg(filtered_ocel)
pm4py.save_vis_ocdfg(ocdfg, f'{object_type}_ocdfg.png')
return {
'object_count': obj_count,
'activities': list(activities),
'fitness': fitness['log_fitness'],
'precision': precision,
'flattened_log': flattened,
'petri_net': (net, im, fm)
}
# Analyze each object type
object_types = pm4py.ocel_get_object_types(ocel)
results = {}
for obj_type in object_types:
results[obj_type] = analyze_ocel_object_type(ocel, obj_type)
# Compare results
print("\nComparison across object types:")
for obj_type, result in results.items():
print(f"{obj_type}: {result['object_count']} objects, "
f"fitness={result['fitness']:.3f}, "
f"precision={result['precision']:.3f}")import pm4py
def analyze_object_interactions(ocel):
"""Analyze interactions between different object types."""
interactions = pm4py.ocel_objects_interactions_summary(ocel)
print("Object Type Interactions:")
for interaction, metrics in interactions['interaction_matrix'].items():
obj_type1, obj_type2 = interaction
frequency = metrics['frequency']
strength = metrics['strength']
print(f" {obj_type1} <-> {obj_type2}: {frequency} interactions (strength: {strength:.3f})")
# Discover object graph
obj_graph = pm4py.discover_objects_graph(ocel)
pm4py.save_vis_object_graph(ocel, 'full_object_graph.png')
# Create connected component analysis
object_types = pm4py.ocel_get_object_types(ocel)
for obj_type in object_types:
# Filter by connected components containing this object type
cc_filtered = pm4py.filter_ocel_cc_otype(ocel, obj_type)
# Analyze component characteristics
summary = pm4py.ocel_objects_summary(cc_filtered)
print(f"Connected components with {obj_type}:")
print(f" Components: {summary['num_components']}")
print(f" Avg component size: {summary['avg_component_size']:.1f}")
# Run interaction analysis
analyze_object_interactions(ocel)import pm4py
def assess_ocel_quality(ocel):
"""Comprehensive OCEL data quality assessment."""
print("OCEL Data Quality Assessment")
print("="*40)
# Basic statistics
object_types = pm4py.ocel_get_object_types(ocel)
object_counts = pm4py.ocel_objects_ot_count(ocel)
print(f"Object Types: {len(object_types)}")
for obj_type in object_types:
count = object_counts[obj_type]
activities = pm4py.ocel_object_type_activities(ocel)[obj_type]
print(f" {obj_type}: {count} objects, {len(activities)} activities")
# Temporal quality
temporal_summary = pm4py.ocel_temporal_summary(ocel)
print(f"\nTemporal Span: {temporal_summary['time_span']}")
print(f"Event Rate: {temporal_summary['avg_events_per_day']:.1f} events/day")
# Object interactions
interactions = pm4py.ocel_objects_interactions_summary(ocel)
print(f"\nObject Interactions: {interactions['total_interactions']}")
print(f"Interaction Density: {interactions['interaction_density']:.3f}")
# Check for duplicates
original_events = len(ocel.events)
deduplicated = pm4py.ocel_drop_duplicates(ocel)
duplicate_events = original_events - len(deduplicated.events)
print(f"\nData Quality:")
print(f" Total Events: {original_events}")
print(f" Duplicate Events: {duplicate_events} ({100*duplicate_events/original_events:.1f}%)")
# Completeness check
attributes = pm4py.ocel_get_attribute_names(ocel)
print(f" Attributes: {len(attributes)}")
return {
'object_types': len(object_types),
'total_objects': sum(object_counts.values()),
'total_events': original_events,
'duplicate_rate': duplicate_events/original_events,
'interaction_density': interactions['interaction_density']
}
# Assess quality
quality_metrics = assess_ocel_quality(ocel)Install with Tessl CLI
npx tessl i tessl/pypi-pm4py