Process mining library for discovering, analyzing and visualizing business processes from event data
—
Comprehensive conformance checking methods for measuring how well process models align with event logs. PM4PY provides token-based replay, alignments-based methods, and specialized techniques for different model types and conformance dimensions.
Token-based replay simulates the execution of event log traces on process models to measure fitness and detect deviations.
def conformance_diagnostics_token_based_replay(log, petri_net, initial_marking, final_marking, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name', return_diagnostics_dataframe=False, opt_parameters=None):
"""
Full token-based replay diagnostics with detailed trace analysis.
Parameters:
- log (Union[EventLog, pd.DataFrame]): Event log data
- petri_net (PetriNet): Petri net model
- initial_marking (Marking): Initial marking
- final_marking (Marking): Final marking
- activity_key (str): Activity attribute name
- timestamp_key (str): Timestamp attribute name
- case_id_key (str): Case ID attribute name
- return_diagnostics_dataframe (bool): Return results as DataFrame
- opt_parameters (Optional[Dict]): Additional parameters
Returns:
List[Dict[str, Any]]: Detailed diagnostics per trace
"""
def fitness_token_based_replay(log, petri_net, initial_marking, final_marking, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):
"""
Calculate fitness using token-based replay.
Fast method for measuring model-log fitness.
Parameters:
- log (Union[EventLog, pd.DataFrame]): Event log data
- petri_net (PetriNet): Petri net model
- initial_marking (Marking): Initial marking
- final_marking (Marking): Final marking
- activity_key (str): Activity attribute name
- timestamp_key (str): Timestamp attribute name
- case_id_key (str): Case ID attribute name
Returns:
Dict[str, float]: Fitness metrics including log_fitness, average_trace_fitness
"""
def precision_token_based_replay(log, petri_net, initial_marking, final_marking, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):
"""
Calculate precision using token-based replay.
Parameters:
- log (Union[EventLog, pd.DataFrame]): Event log data
- petri_net (PetriNet): Petri net model
- initial_marking (Marking): Initial marking
- final_marking (Marking): Final marking
- activity_key (str): Activity attribute name
- timestamp_key (str): Timestamp attribute name
- case_id_key (str): Case ID attribute name
Returns:
float: Precision value between 0 and 1
"""Alignments provide optimal mappings between log traces and model executions, offering detailed diagnostics and accurate conformance measurements.
def conformance_diagnostics_alignments(log, *args, multi_processing=True, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name', variant_str=None, return_diagnostics_dataframe=False, **kwargs):
"""
Full alignment diagnostics with optimal trace-model mappings.
Most accurate method for conformance analysis.
Parameters:
- log (Union[EventLog, pd.DataFrame]): Event log data
- *args: Model components (petri_net, initial_marking, final_marking)
- multi_processing (bool): Enable parallel processing
- activity_key (str): Activity attribute name
- timestamp_key (str): Timestamp attribute name
- case_id_key (str): Case ID attribute name
- variant_str (Optional[str]): Algorithm variant
- return_diagnostics_dataframe (bool): Return results as DataFrame
- **kwargs: Additional parameters
Returns:
List[Dict[str, Any]]: Detailed alignment diagnostics per trace
"""
def fitness_alignments(log, petri_net, initial_marking, final_marking, multi_processing=True, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name', variant_str=None):
"""
Calculate fitness using optimal alignments.
Gold standard for fitness measurement.
Parameters:
- log (Union[EventLog, pd.DataFrame]): Event log data
- petri_net (PetriNet): Petri net model
- initial_marking (Marking): Initial marking
- final_marking (Marking): Final marking
- multi_processing (bool): Enable parallel processing
- activity_key (str): Activity attribute name
- timestamp_key (str): Timestamp attribute name
- case_id_key (str): Case ID attribute name
- variant_str (Optional[str]): Algorithm variant
Returns:
Dict[str, float]: Fitness metrics with detailed breakdown
"""
def precision_alignments(log, petri_net, initial_marking, final_marking, multi_processing=True, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):
"""
Calculate precision using alignments.
Parameters:
- log (Union[EventLog, pd.DataFrame]): Event log data
- petri_net (PetriNet): Petri net model
- initial_marking (Marking): Initial marking
- final_marking (Marking): Final marking
- multi_processing (bool): Enable parallel processing
- activity_key (str): Activity attribute name
- timestamp_key (str): Timestamp attribute name
- case_id_key (str): Case ID attribute name
Returns:
float: Precision value between 0 and 1
"""Legacy footprints-based conformance checking methods (deprecated in version 2.3.0).
def conformance_diagnostics_footprints(*args):
"""
Footprints-based conformance diagnostics (deprecated in 2.3.0).
Parameters:
- *args: Log and model arguments
Returns:
Union[List[Dict[str, Any]], Dict[str, Any]]: Conformance results
"""
def fitness_footprints(*args):
"""
Footprints-based fitness calculation (deprecated in 2.3.0).
Parameters:
- *args: Log and model arguments
Returns:
Dict[str, float]: Fitness metrics
"""
def precision_footprints(*args):
"""
Footprints-based precision calculation (deprecated in 2.3.0).
Parameters:
- *args: Log and model arguments
Returns:
float: Precision value
"""Specialized conformance checking methods for different model types and quality dimensions.
def generalization_tbr(log, petri_net, initial_marking, final_marking, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):
"""
Calculate generalization using token-based replay.
Measures model's ability to handle unseen behavior.
Parameters:
- log (Union[EventLog, pd.DataFrame]): Event log data
- petri_net (PetriNet): Petri net model
- initial_marking (Marking): Initial marking
- final_marking (Marking): Final marking
- activity_key (str): Activity attribute name
- timestamp_key (str): Timestamp attribute name
- case_id_key (str): Case ID attribute name
Returns:
float: Generalization value between 0 and 1
"""
def replay_prefix_tbr(prefix, net, im, fm, activity_key='concept:name'):
"""
Replay activity prefix using token-based replay.
Useful for predictive process monitoring.
Parameters:
- prefix (List[str]): Activity sequence prefix
- net (PetriNet): Petri net model
- im (Marking): Initial marking
- fm (Marking): Final marking
- activity_key (str): Activity attribute name
Returns:
Marking: Resulting marking after prefix replay
"""Conformance checking for temporal constraints and time-aware models.
def conformance_temporal_profile(log, temporal_profile, zeta=1.0, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name', return_diagnostics_dataframe=False):
"""
Temporal profile conformance checking for time constraints.
Parameters:
- log (Union[EventLog, pd.DataFrame]): Event log data
- temporal_profile (Dict): Temporal constraints between activities
- zeta (float): Deviation factor for time constraints
- activity_key (str): Activity attribute name
- timestamp_key (str): Timestamp attribute name
- case_id_key (str): Case ID attribute name
- return_diagnostics_dataframe (bool): Return results as DataFrame
Returns:
List[List[Tuple[float, float, float, float]]]: Temporal conformance results per trace
"""Conformance checking for declarative models with temporal logic constraints.
def conformance_declare(log, declare_model, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name', return_diagnostics_dataframe=False):
"""
DECLARE model conformance checking for temporal logic constraints.
Parameters:
- log (Union[EventLog, pd.DataFrame]): Event log data
- declare_model (Dict): DECLARE model with constraints
- activity_key (str): Activity attribute name
- timestamp_key (str): Timestamp attribute name
- case_id_key (str): Case ID attribute name
- return_diagnostics_dataframe (bool): Return results as DataFrame
Returns:
List[Dict[str, Any]]: Constraint violation diagnostics per trace
"""
def conformance_log_skeleton(log, log_skeleton, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name', return_diagnostics_dataframe=False):
"""
Log skeleton conformance checking for behavioral constraints.
Parameters:
- log (Union[EventLog, pd.DataFrame]): Event log data
- log_skeleton (Dict): Log skeleton constraints
- activity_key (str): Activity attribute name
- timestamp_key (str): Timestamp attribute name
- case_id_key (str): Case ID attribute name
- return_diagnostics_dataframe (bool): Return results as DataFrame
Returns:
List[Set[Any]]: Constraint violations per trace
"""Helper functions for conformance checking operations.
def check_is_fitting(*args, activity_key='concept:name'):
"""
Check if trace fits model (deprecated in 2.3.0).
Simple boolean fitness check.
Parameters:
- *args: Trace and model arguments
- activity_key (str): Activity attribute name
Returns:
bool: True if trace fits the model
"""import pm4py
# Load event log and discover model
log = pm4py.read_xes('event_log.xes')
net, initial_marking, final_marking = pm4py.discover_petri_net_inductive(log)
# Calculate fitness using alignments (recommended)
fitness = pm4py.fitness_alignments(log, net, initial_marking, final_marking)
print(f"Log fitness: {fitness['log_fitness']}")
print(f"Average trace fitness: {fitness['average_trace_fitness']}")
# Calculate precision
precision = pm4py.precision_alignments(log, net, initial_marking, final_marking)
print(f"Precision: {precision}")
# Calculate generalization
generalization = pm4py.generalization_tbr(log, net, initial_marking, final_marking)
print(f"Generalization: {generalization}")import pm4py
# Get detailed alignment diagnostics
diagnostics = pm4py.conformance_diagnostics_alignments(
log, net, initial_marking, final_marking,
multi_processing=True,
return_diagnostics_dataframe=True
)
# Analyze diagnostics
for trace_diagnostic in diagnostics:
case_id = trace_diagnostic['case_id']
fitness = trace_diagnostic['fitness']
cost = trace_diagnostic['cost']
alignment = trace_diagnostic['alignment']
print(f"Case {case_id}: fitness={fitness}, cost={cost}")
# Analyze alignment moves
for move in alignment:
move_type = move['type'] # 'sync', 'log', 'model'
if move_type == 'log':
print(f" Missing in model: {move['activity']}")
elif move_type == 'model':
print(f" Extra in model: {move['activity']}")import pm4py
# Use token-based replay for faster computation
fitness_tbr = pm4py.fitness_token_based_replay(log, net, initial_marking, final_marking)
precision_tbr = pm4py.precision_token_based_replay(log, net, initial_marking, final_marking)
print(f"TBR Fitness: {fitness_tbr['log_fitness']}")
print(f"TBR Precision: {precision_tbr}")
# Get detailed token-based diagnostics
tbr_diagnostics = pm4py.conformance_diagnostics_token_based_replay(
log, net, initial_marking, final_marking
)
for diag in tbr_diagnostics:
print(f"Case: {diag['case_id']}, Fitness: {diag['trace_fitness']}")
print(f" Missing tokens: {diag['missing_tokens']}")
print(f" Remaining tokens: {diag['remaining_tokens']}")import pm4py
# Discover temporal profile
temporal_profile = pm4py.discover_temporal_profile(log)
# Check temporal conformance
temporal_conformance = pm4py.conformance_temporal_profile(
log, temporal_profile,
zeta=1.5 # Allow 50% deviation from expected times
)
# Analyze temporal violations
for trace_idx, trace_result in enumerate(temporal_conformance):
print(f"Trace {trace_idx}:")
for constraint_result in trace_result:
expected_min, expected_max, actual_time, deviation = constraint_result
if deviation > 0:
print(f" Time violation: expected [{expected_min}, {expected_max}], actual {actual_time}")import pm4py
# Discover DECLARE constraints
declare_model = pm4py.discover_declare(log, min_support_ratio=0.8)
# Check DECLARE conformance
declare_conformance = pm4py.conformance_declare(log, declare_model)
# Analyze constraint violations
for trace_result in declare_conformance:
case_id = trace_result['case_id']
violations = trace_result['violations']
if violations:
print(f"Case {case_id} violations:")
for constraint, violation_info in violations.items():
print(f" {constraint}: {violation_info}")import pm4py
def compute_quality_metrics(log, net, initial_marking, final_marking):
"""Compute comprehensive quality metrics for a process model."""
# Fitness (how well the model explains the log)
fitness = pm4py.fitness_alignments(log, net, initial_marking, final_marking)
# Precision (how much extra behavior the model allows)
precision = pm4py.precision_alignments(log, net, initial_marking, final_marking)
# Generalization (model's ability to handle unseen behavior)
generalization = pm4py.generalization_tbr(log, net, initial_marking, final_marking)
# Simplicity (model complexity)
simplicity = pm4py.simplicity_petri_net(net)
return {
'fitness': fitness['log_fitness'],
'precision': precision,
'generalization': generalization,
'simplicity': simplicity
}
# Evaluate model quality
metrics = compute_quality_metrics(log, net, initial_marking, final_marking)
print("Model Quality Metrics:")
for metric, value in metrics.items():
print(f" {metric.capitalize()}: {value:.3f}")Install with Tessl CLI
npx tessl i tessl/pypi-pm4py