Process mining library for discovering, analyzing and visualizing business processes from event data
npx @tessl/cli install tessl/pypi-pm4py@2.7.0A comprehensive Python library for process mining providing extensive functionality for reading, writing, discovering, analyzing, and visualizing process models and event logs. PM4PY supports traditional event logs and Object-Centric Event Logs (OCEL), offering 280+ API functions across multiple process mining paradigms.
pip install pm4pyimport pm4pyCommon pattern for accessing functionality:
# Read event logs
from pm4py import read_xes, read_ocel
# Process discovery
from pm4py import discover_petri_net_inductive, discover_dfg
# Conformance checking
from pm4py import fitness_alignments, conformance_diagnostics_alignments
# Visualization
from pm4py import view_petri_net, view_dfg
# Filtering
from pm4py import filter_variants_top_k, filter_start_activitiesimport pm4py
import pandas as pd
# Read event log from XES file
log = pm4py.read_xes('event_log.xes')
# Alternative: Work with DataFrame
df = pd.read_csv('event_data.csv')
log = pm4py.format_dataframe(df, case_id='case_id',
activity_key='activity',
timestamp_key='timestamp')
# Process discovery - discover process model
net, initial_marking, final_marking = pm4py.discover_petri_net_inductive(log)
# Conformance checking - measure fitness
fitness = pm4py.fitness_alignments(log, net, initial_marking, final_marking)
print(f"Fitness: {fitness['log_fitness']}")
# Visualization
pm4py.view_petri_net(net, initial_marking, final_marking)
# Filtering - keep top 10 most frequent variants
filtered_log = pm4py.filter_variants_top_k(log, 10)
# Statistics
start_activities = pm4py.get_start_activities(log)
variants = pm4py.get_variants_as_tuples(log)PM4PY is structured around several key components:
Comprehensive support for reading and writing process mining data in various formats including XES, PNML, BPMN, and Object-Centric Event Log formats.
def read_xes(file_path, variant=None, return_legacy_log_object=False, encoding='utf-8', **kwargs): ...
def write_xes(log, file_path, case_id_key='case:concept:name', extensions=None, encoding='utf-8', **kwargs): ...
def read_ocel(file_path, objects_path=None, encoding='utf-8'): ...
def write_ocel(ocel, file_path, objects_path=None, encoding='utf-8'): ...Reading and Writing Operations
Algorithms for discovering process models from event logs, including classical miners (Alpha, Heuristics) and modern techniques (Inductive Miner, POWL).
def discover_petri_net_inductive(log, noise_threshold=0.0, multi_processing=True, activity_key='concept:name', **kwargs): ...
def discover_process_tree_inductive(log, noise_threshold=0.0, multi_processing=True, **kwargs): ...
def discover_dfg(log, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'): ...
def discover_heuristics_net(log, dependency_threshold=0.5, and_threshold=0.65, **kwargs): ...Methods for measuring how well process models align with event logs, including fitness, precision, and diagnostic capabilities.
def fitness_alignments(log, petri_net, initial_marking, final_marking, multi_processing=True, **kwargs): ...
def conformance_diagnostics_alignments(log, petri_net, initial_marking, final_marking, **kwargs): ...
def fitness_token_based_replay(log, petri_net, initial_marking, final_marking, **kwargs): ...
def precision_alignments(log, petri_net, initial_marking, final_marking, **kwargs): ...Conformance Checking and Fitness
Comprehensive filtering capabilities for event logs and OCEL including behavioral, temporal, organizational, and structural filters.
def filter_variants_top_k(log, k, activity_key='concept:name', **kwargs): ...
def filter_start_activities(log, activities, retain=True, **kwargs): ...
def filter_time_range(log, dt1, dt2, **kwargs): ...
def filter_case_performance(log, min_performance, max_performance, **kwargs): ...Extensive visualization capabilities for process models, statistics, and analysis results with both viewing and saving options.
def view_petri_net(petri_net, initial_marking=None, final_marking=None, format='png', **kwargs): ...
def view_dfg(dfg, start_activities=None, end_activities=None, format='png', **kwargs): ...
def save_vis_process_tree(tree, file_path, **kwargs): ...
def view_dotted_chart(log, **kwargs): ...Specialized operations for Object-Centric Event Logs (OCEL) including discovery, analysis, and manipulation of multi-dimensional process data.
def ocel_flattening(ocel, object_type): ...
def discover_ocdfg(ocel, **kwargs): ...
def discover_oc_petri_net(ocel, **kwargs): ...
def ocel_objects_interactions_summary(ocel): ...Statistical analysis functions for process behavior, performance metrics, and advanced analytical operations.
def get_variants_as_tuples(log, activity_key='concept:name', **kwargs): ...
def get_case_duration(log, timestamp_key='time:timestamp', case_id_key='case:concept:name'): ...
def get_start_activities(log, **kwargs): ...
def check_soundness(petri_net, initial_marking, final_marking): ...Utility functions for data manipulation, format conversion, and model transformation between different representations.
def format_dataframe(df, case_id='case:concept:name', activity_key='concept:name', **kwargs): ...
def convert_to_petri_net(*args, **kwargs): ...
def convert_to_process_tree(*args, **kwargs): ...
def serialize(obj, file_path): ...Machine learning features for predictive process analytics and organizational mining for resource and social network analysis.
def extract_features_dataframe(log, **kwargs): ...
def split_train_test(log, train_percentage=0.8, **kwargs): ...
def discover_handover_of_work_network(log, beta=0, **kwargs): ...
def discover_organizational_roles(log, **kwargs): ...Machine Learning and Organizational Mining
Complete type definitions for PM4PY objects referenced in the API.
# Core Data Types
from typing import Dict, List, Tuple, Optional, Union, Any
import pandas as pd
# Event Log Types
EventLog = List[Dict[str, Any]] # Collection of events with attributes
EventStream = List[Dict[str, Any]] # Ordered sequence of events
# Process Model Types
class PetriNet:
"""Petri net with places, transitions, and arcs."""
places: List[Any]
transitions: List[Any]
arcs: List[Any]
class ProcessTree:
"""Hierarchical process tree representation."""
operator: str
children: List['ProcessTree']
label: Optional[str]
class BPMN:
"""Business Process Model and Notation object."""
nodes: List[Any]
flows: List[Any]
class HeuristicsNet:
"""Heuristics net representation."""
activities: List[str]
dependencies: Dict[Tuple[str, str], float]
# Discovery Types
DFG = Dict[Tuple[str, str], int] # Directly-Follows Graph
PerformanceDFG = Dict[Tuple[str, str], float] # Performance-annotated DFG
# OCEL Types
class OCEL:
"""Object-Centric Event Log."""
events: pd.DataFrame
objects: pd.DataFrame
relations: pd.DataFrame
class OCDFG:
"""Object-Centric Directly-Follows Graph."""
activities: List[str]
objects: List[str]
edges: Dict[Tuple[str, str], int]
# Conformance Types
AlignmentResult = Dict[str, Any] # Alignment computation results
FitnessResult = Dict[str, float] # Fitness measurement results
ReplayResult = Dict[str, Any] # Token-based replay results
# Marking Types
Marking = Dict[Any, int] # Petri net marking (place -> tokens)
# Analysis Types
VariantDict = Dict[Tuple[str, ...], int] # Process variants with frequencies
CaseDuration = Dict[str, float] # Case durations by case ID
ActivityStats = Dict[str, Any] # Activity statistics