Tessl Tile for pypi/pm4py@2.7.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

conformance-checking.md filtering.md index.md ml-organizational.md object-centric.md process-discovery.md reading-writing.md statistics-analysis.md utilities-conversion.md visualization.md

utilities-conversion.mddocs/

0
# Utilities and Conversion
1

2
Utility functions for data manipulation, format conversion, and model transformation between different representations. PM4PY provides comprehensive tools for data preprocessing, serialization, and interoperability between various process mining formats and tools.
3

4
## Capabilities
5

6
### Data Format Utilities
7

8
Core utilities for formatting and preparing data for process mining analysis.
9

10
```python { .api }
11
def format_dataframe(df, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp', start_timestamp_key='start_timestamp', timest_format=None):
12
    """
13
    Format DataFrame for process mining with proper column names and data types.
14
    
15
    Parameters:
16
    - df (pd.DataFrame): Input DataFrame to format
17
    - case_id (str): Column name for case identifier
18
    - activity_key (str): Column name for activity
19
    - timestamp_key (str): Column name for timestamp
20
    - start_timestamp_key (str): Column name for start timestamp (optional)
21
    - timest_format (Optional[str]): Timestamp format string
22
    
23
    Returns:
24
    pd.DataFrame: Formatted DataFrame ready for process mining
25
    """
26

27
def get_properties(log, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name', resource_key='org:resource', group_key=None, start_timestamp_key=None, **kwargs):
28
    """
29
    Retrieve properties from a log object.
30
    
31
    Parameters:
32
    - log (Union[EventLog, pd.DataFrame]): Event log to extract properties from
33
    - activity_key (str): Column name for activity attribute
34
    - timestamp_key (str): Column name for timestamp attribute
35
    - case_id_key (str): Column name for case identifier attribute
36
    - resource_key (str): Column name for resource attribute
37
    - group_key (Optional[str]): Optional column name for group identifier
38
    - start_timestamp_key (Optional[str]): Optional column name for start timestamp
39
    - **kwargs: Additional keyword arguments
40
    
41
    Returns:
42
    dict: Dictionary of properties extracted from the log
43
    """
44
```
45

46
### Model Parsing and Serialization
47

48
Parse model representations from strings and serialize/deserialize PM4PY objects.
49

50
```python { .api }
51
def parse_process_tree(process_tree_string):
52
    """
53
    Parse process tree from string representation.
54
    
55
    Parameters:
56
    - process_tree_string (str): String representation of process tree
57
    
58
    Returns:
59
    ProcessTree: Parsed process tree object
60
    """
61

62
def parse_powl_model_string(powl_string):
63
    """
64
    Parse POWL model from string representation.
65
    
66
    Parameters:
67
    - powl_string (str): String representation of POWL model
68
    
69
    Returns:
70
    POWL: Parsed POWL model object
71
    """
72

73
def parse_event_log_string(event_log_string):
74
    """
75
    Parse event log from string representation.
76
    
77
    Parameters:
78
    - event_log_string (str): String representation of event log
79
    
80
    Returns:
81
    EventLog: Parsed event log object
82
    """
83

84
def serialize(obj, file_path):
85
    """
86
    Serialize PM4PY object to file for persistence.
87
    
88
    Parameters:
89
    - obj (Any): PM4PY object to serialize
90
    - file_path (str): Path to save serialized object
91
    
92
    Returns:
93
    None
94
    """
95

96
def deserialize(file_path):
97
    """
98
    Deserialize PM4PY object from file.
99
    
100
    Parameters:
101
    - file_path (str): Path to serialized object file
102
    
103
    Returns:
104
    Any: Deserialized PM4PY object
105
    """
106
```
107

108
### Log Manipulation Utilities
109

110
Functions for modifying and manipulating event log data.
111

112
```python { .api }
113
def set_classifier(log, classifier_key):
114
    """
115
    Set event classifier for log (changes the activity attribute used).
116
    
117
    Parameters:
118
    - log (Union[EventLog, pd.DataFrame]): Event log to modify
119
    - classifier_key (str): New classifier attribute name
120
    
121
    Returns:
122
    Union[EventLog, pd.DataFrame]: Log with updated classifier
123
    """
124

125
def project_on_event_attribute(log, attribute_key):
126
    """
127
    Project log on specific event attribute (filter events by attribute presence).
128
    
129
    Parameters:
130
    - log (Union[EventLog, pd.DataFrame]): Event log data
131
    - attribute_key (str): Attribute to project on
132
    
133
    Returns:
134
    Union[EventLog, pd.DataFrame]: Projected event log
135
    """
136

137
def sample_cases(log, n_cases):
138
    """
139
    Sample n random cases from event log.
140
    
141
    Parameters:
142
    - log (Union[EventLog, pd.DataFrame]): Event log data
143
    - n_cases (int): Number of cases to sample
144
    
145
    Returns:
146
    Union[EventLog, pd.DataFrame]: Sampled event log
147
    """
148

149
def sample_events(log, n_events):
150
    """
151
    Sample n random events from event log.
152
    
153
    Parameters:
154
    - log (Union[EventLog, pd.DataFrame]): Event log data
155
    - n_events (int): Number of events to sample
156
    
157
    Returns:
158
    Union[EventLog, pd.DataFrame]: Sampled event log
159
    """
160

161
def rebase(log, case_id_key='case:concept:name'):
162
    """
163
    Rebase log timestamps to start from zero (normalize temporal data).
164
    
165
    Parameters:
166
    - log (Union[EventLog, pd.DataFrame]): Event log data
167
    - case_id_key (str): Case ID attribute name
168
    
169
    Returns:
170
    Union[EventLog, pd.DataFrame]: Rebased event log
171
    """
172
```
173

174
### Log Format Conversions
175

176
Convert between different event log representations and formats.
177

178
```python { .api }
179
def convert_to_event_log(obj, case_id_key='case:concept:name', **kwargs):
180
    """
181
    Convert DataFrame or EventStream to EventLog object.
182
    
183
    Parameters:
184
    - obj (Union[pd.DataFrame, EventStream]): Object to convert
185
    - case_id_key (str): Case identifier column
186
    - **kwargs: Additional conversion parameters
187
    
188
    Returns:
189
    EventLog: Converted event log object
190
    """
191

192
def convert_to_event_stream(obj, case_id_key='case:concept:name', **kwargs):
193
    """
194
    Convert log or DataFrame to EventStream format.
195
    
196
    Parameters:
197
    - obj (Union[EventLog, pd.DataFrame]): Object to convert
198
    - case_id_key (str): Case identifier column
199
    - **kwargs: Additional conversion parameters
200
    
201
    Returns:
202
    EventStream: Converted event stream object
203
    """
204

205
def convert_to_dataframe(obj, case_id_key='case:concept:name', **kwargs):
206
    """
207
    Convert log objects to pandas DataFrame.
208
    
209
    Parameters:
210
    - obj (Union[EventLog, EventStream]): Object to convert
211
    - case_id_key (str): Case identifier column
212
    - **kwargs: Additional conversion parameters
213
    
214
    Returns:
215
    pd.DataFrame: Converted DataFrame
216
    """
217
```
218

219
### Model Conversions
220

221
Convert between different process model representations.
222

223
```python { .api }
224
def convert_to_bpmn(*args, **kwargs):
225
    """
226
    Convert various models (Petri net, process tree) to BPMN format.
227
    
228
    Parameters:
229
    - *args: Model objects to convert
230
    - **kwargs: Conversion parameters
231
    
232
    Returns:
233
    BPMN: Converted BPMN model
234
    """
235

236
def convert_to_petri_net(*args, **kwargs):
237
    """
238
    Convert various models (process tree, BPMN, DFG) to Petri net.
239
    
240
    Parameters:
241
    - *args: Model objects to convert
242
    - **kwargs: Conversion parameters
243
    
244
    Returns:
245
    Tuple[PetriNet, Marking, Marking]: Converted Petri net with markings
246
    """
247

248
def convert_to_process_tree(*args, **kwargs):
249
    """
250
    Convert various models (Petri net, BPMN) to process tree.
251
    
252
    Parameters:
253
    - *args: Model objects to convert
254
    - **kwargs: Conversion parameters
255
    
256
    Returns:
257
    ProcessTree: Converted process tree
258
    """
259

260
def convert_to_reachability_graph(petri_net, initial_marking, **kwargs):
261
    """
262
    Convert Petri net to reachability graph (state space exploration).
263
    
264
    Parameters:
265
    - petri_net (PetriNet): Petri net model
266
    - initial_marking (Marking): Initial marking
267
    - **kwargs: Conversion parameters
268
    
269
    Returns:
270
    TransitionSystem: Reachability graph as transition system
271
    """
272

273
def convert_to_powl(*args, **kwargs):
274
    """
275
    Convert various models to POWL (Partially Ordered Workflow Language).
276
    
277
    Parameters:
278
    - *args: Model objects to convert
279
    - **kwargs: Conversion parameters
280
    
281
    Returns:
282
    POWL: Converted POWL model
283
    """
284
```
285

286
### NetworkX Integration
287

288
Convert process mining objects to NetworkX graphs for network analysis.
289

290
```python { .api }
291
def convert_log_to_networkx(log, **kwargs):
292
    """
293
    Convert event log to NetworkX directed graph.
294
    
295
    Parameters:
296
    - log (Union[EventLog, pd.DataFrame]): Event log data
297
    - **kwargs: Graph construction parameters
298
    
299
    Returns:
300
    nx.DiGraph: NetworkX directed graph representation
301
    """
302

303
def convert_ocel_to_networkx(ocel, **kwargs):
304
    """
305
    Convert Object-Centric Event Log to NetworkX graph.
306
    
307
    Parameters:
308
    - ocel (OCEL): Object-centric event log
309
    - **kwargs: Graph construction parameters
310
    
311
    Returns:
312
    nx.DiGraph: NetworkX directed graph representation
313
    """
314

315
def convert_petri_net_to_networkx(petri_net, initial_marking, **kwargs):
316
    """
317
    Convert Petri net to NetworkX graph representation.
318
    
319
    Parameters:
320
    - petri_net (PetriNet): Petri net model
321
    - initial_marking (Marking): Initial marking
322
    - **kwargs: Graph construction parameters
323
    
324
    Returns:
325
    nx.DiGraph: NetworkX directed graph representation
326
    """
327
```
328

329
### Special Conversions
330

331
Specialized conversion functions for specific use cases.
332

333
```python { .api }
334
def convert_log_to_ocel(log, **kwargs):
335
    """
336
    Convert traditional event log to Object-Centric Event Log.
337
    
338
    Parameters:
339
    - log (Union[EventLog, pd.DataFrame]): Traditional event log
340
    - **kwargs: OCEL conversion parameters
341
    
342
    Returns:
343
    OCEL: Converted object-centric event log
344
    """
345

346
def convert_log_to_time_intervals(log, **kwargs):
347
    """
348
    Convert event log to time interval representation.
349
    
350
    Parameters:
351
    - log (Union[EventLog, pd.DataFrame]): Event log data
352
    - **kwargs: Interval conversion parameters
353
    
354
    Returns:
355
    pd.DataFrame: Time intervals DataFrame
356
    """
357

358
def convert_petri_net_type(petri_net, target_type, **kwargs):
359
    """
360
    Convert between different Petri net types and representations.
361
    
362
    Parameters:
363
    - petri_net (PetriNet): Source Petri net
364
    - target_type (str): Target Petri net type
365
    - **kwargs: Conversion parameters
366
    
367
    Returns:
368
    PetriNet: Converted Petri net
369
    """
370
```
371

372
## Usage Examples
373

374
### Data Formatting and Preparation
375

376
```python
377
import pm4py
378
import pandas as pd
379

380
# Load raw data
381
raw_data = pd.read_csv('process_data.csv')
382

383
# Format for process mining
384
formatted_log = pm4py.format_dataframe(
385
    raw_data,
386
    case_id='CaseID',
387
    activity_key='Activity',
388
    timestamp_key='Timestamp'
389
)
390

391
print("Formatted log ready for process mining")
392
print(f"Cases: {formatted_log['case:concept:name'].nunique()}")
393
print(f"Events: {len(formatted_log)}")
394
print(f"Activities: {formatted_log['concept:name'].nunique()}")
395

396
# Rebase timestamps to start from zero
397
rebased_log = pm4py.rebase(formatted_log)
398
print("Timestamps rebased to start from zero")
399
```
400

401
### Log Sampling and Manipulation
402

403
```python
404
import pm4py
405

406
# Load full event log
407
log = pm4py.read_xes('large_event_log.xes')
408
print(f"Original log: {len(log)} cases")
409

410
# Sample subset for analysis
411
sample_log = pm4py.sample_cases(log, 1000)
412
print(f"Sampled log: {len(sample_log)} cases")
413

414
# Sample specific number of events
415
event_sample = pm4py.sample_events(log, 10000)
416
print(f"Event sample: {len(event_sample)} events")
417

418
# Change classifier
419
classified_log = pm4py.set_classifier(log, 'org:resource')
420
print("Changed classifier to resource")
421

422
# Project on specific attribute
423
projected_log = pm4py.project_on_event_attribute(log, 'lifecycle:transition')
424
print("Projected on lifecycle transition")
425
```
426

427
### Model Format Conversions
428

429
```python
430
import pm4py
431

432
# Discover process tree
433
log = pm4py.read_xes('event_log.xes')
434
tree = pm4py.discover_process_tree_inductive(log)
435

436
# Convert to different model formats
437
net, initial_marking, final_marking = pm4py.convert_to_petri_net(tree)
438
print("Converted process tree to Petri net")
439

440
bpmn_model = pm4py.convert_to_bpmn(tree)
441
print("Converted process tree to BPMN")
442

443
powl_model = pm4py.convert_to_powl(tree)
444
print("Converted process tree to POWL")
445

446
# Convert Petri net to reachability graph
447
reachability = pm4py.convert_to_reachability_graph(net, initial_marking)
448
print("Converted Petri net to reachability graph")
449
```
450

451
### Log Format Conversions
452

453
```python
454
import pm4py
455

456
# Convert between different log formats
457
log = pm4py.read_xes('event_log.xes')
458

459
# Convert to DataFrame
460
df = pm4py.convert_to_dataframe(log)
461
print(f"Converted to DataFrame: {len(df)} rows")
462

463
# Convert DataFrame back to EventLog
464
event_log = pm4py.convert_to_event_log(df)
465
print("Converted DataFrame back to EventLog")
466

467
# Convert to EventStream
468
event_stream = pm4py.convert_to_event_stream(log)
469
print("Converted to EventStream")
470

471
# Convert traditional log to OCEL
472
ocel = pm4py.convert_log_to_ocel(
473
    log,
474
    object_type_column='case:concept:name',
475
    object_type_name='Case'
476
)
477
print("Converted traditional log to OCEL")
478
```
479

480
### NetworkX Integration
481

482
```python
483
import pm4py
484
import networkx as nx
485

486
# Convert log to NetworkX graph
487
log = pm4py.read_xes('event_log.xes')
488
G = pm4py.convert_log_to_networkx(log)
489

490
print(f"NetworkX graph: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
491

492
# Analyze graph properties
493
print(f"Graph density: {nx.density(G):.3f}")
494
print(f"Strongly connected: {nx.is_strongly_connected(G)}")
495

496
# Convert Petri net to NetworkX
497
net, im, fm = pm4py.discover_petri_net_inductive(log)
498
net_graph = pm4py.convert_petri_net_to_networkx(net, im)
499
print(f"Petri net graph: {net_graph.number_of_nodes()} nodes")
500

501
# Convert OCEL to NetworkX
502
ocel = pm4py.read_ocel('ocel_data.csv')
503
ocel_graph = pm4py.convert_ocel_to_networkx(ocel)
504
print(f"OCEL graph: {ocel_graph.number_of_nodes()} nodes")
505
```
506

507
### Model Parsing and Serialization
508

509
```python
510
import pm4py
511

512
# Parse models from string representations
513
tree_string = "->('A', +('B', 'C'), 'D')"
514
tree = pm4py.parse_process_tree(tree_string)
515
print("Parsed process tree from string")
516

517
# Serialize model for later use
518
pm4py.serialize(tree, 'process_tree.pkl')
519
print("Serialized process tree")
520

521
# Deserialize model
522
loaded_tree = pm4py.deserialize('process_tree.pkl')
523
print("Deserialized process tree")
524

525
# Parse event log from string
526
log_string = """
527
Case1: A, B, C
528
Case2: A, C, B
529
Case3: A, B, B, C
530
"""
531
parsed_log = pm4py.parse_event_log_string(log_string)
532
print("Parsed event log from string")
533
```
534

535
### Time Interval Analysis
536

537
```python
538
import pm4py
539

540
# Convert log to time intervals
541
log = pm4py.read_xes('event_log.xes')
542
intervals = pm4py.convert_log_to_time_intervals(log)
543

544
print("Time Intervals Analysis:")
545
print(f"Intervals: {len(intervals)}")
546
print(intervals[['case_id', 'activity', 'start_time', 'end_time', 'duration']].head())
547

548
# Analyze interval patterns
549
avg_duration = intervals['duration'].mean()
550
max_duration = intervals['duration'].max()
551
print(f"Average interval duration: {avg_duration/60:.1f} minutes")
552
print(f"Maximum interval duration: {max_duration/3600:.1f} hours")
553
```
554

555
### Batch Conversion Pipeline
556

557
```python
558
import pm4py
559
import os
560

561
def convert_process_models(input_dir, output_dir):
562
    """Convert all process trees in directory to multiple formats."""
563
    
564
    os.makedirs(output_dir, exist_ok=True)
565
    
566
    for filename in os.listdir(input_dir):
567
        if filename.endswith('.ptml'):
568
            filepath = os.path.join(input_dir, filename)
569
            base_name = filename[:-5]  # Remove .ptml extension
570
            
571
            # Read process tree
572
            tree = pm4py.read_ptml(filepath)
573
            
574
            # Convert to different formats
575
            net, im, fm = pm4py.convert_to_petri_net(tree)
576
            bpmn = pm4py.convert_to_bpmn(tree)
577
            
578
            # Save in multiple formats
579
            pm4py.write_pnml(net, im, fm, os.path.join(output_dir, f"{base_name}.pnml"))
580
            pm4py.write_bpmn(bpmn, os.path.join(output_dir, f"{base_name}.bpmn"))
581
            
582
            # Serialize for PM4PY
583
            pm4py.serialize(tree, os.path.join(output_dir, f"{base_name}.pkl"))
584
            
585
            print(f"Converted {filename} to multiple formats")
586

587
# Run batch conversion
588
convert_process_models('input_models/', 'output_models/')
589
```
590

591
### Data Quality Enhancement Pipeline
592

593
```python
594
import pm4py
595
import pandas as pd
596

597
def enhance_log_quality(raw_log):
598
    """Comprehensive data quality enhancement pipeline."""
599
    
600
    print("Starting data quality enhancement...")
601
    
602
    # 1. Format DataFrame properly
603
    if isinstance(raw_log, pd.DataFrame):
604
        log = pm4py.format_dataframe(raw_log)
605
    else:
606
        log = raw_log
607
    
608
    print(f"Original: {len(log)} events")
609
    
610
    # 2. Remove incomplete cases (less than 2 events)
611
    log = pm4py.filter_case_size(log, min_size=2, max_size=float('inf'))
612
    print(f"After min size filter: {len(log)} events")
613
    
614
    # 3. Remove extreme durations (outliers)
615
    durations = pm4py.get_all_case_durations(log)
616
    q1 = pd.Series(durations).quantile(0.25)
617
    q3 = pd.Series(durations).quantile(0.75)
618
    iqr = q3 - q1
619
    lower_bound = q1 - 1.5 * iqr
620
    upper_bound = q3 + 1.5 * iqr
621
    
622
    log = pm4py.filter_case_performance(log, lower_bound, upper_bound)
623
    print(f"After duration outlier removal: {len(log)} events")
624
    
625
    # 4. Keep only most frequent variants (80% coverage)
626
    log = pm4py.filter_variants_by_coverage_percentage(log, 0.8)
627
    print(f"After variant filtering: {len(log)} events")
628
    
629
    # 5. Rebase timestamps
630
    log = pm4py.rebase(log)
631
    print("Timestamps rebased")
632
    
633
    # 6. Sample if too large
634
    if len(set(log['case:concept:name'])) > 5000:
635
        log = pm4py.sample_cases(log, 5000)
636
        print(f"Sampled to: {len(log)} events")
637
    
638
    return log
639

640
# Apply quality enhancement
641
raw_data = pd.read_csv('messy_process_data.csv')
642
clean_log = enhance_log_quality(raw_data)
643
```
644

645
### Multi-Format Export
646

647
```python
648
import pm4py
649

650
def export_analysis_results(log, output_prefix='analysis'):
651
    """Export process mining analysis in multiple formats."""
652
    
653
    # Discover models
654
    net, im, fm = pm4py.discover_petri_net_inductive(log)
655
    tree = pm4py.discover_process_tree_inductive(log)
656
    dfg, start_acts, end_acts = pm4py.discover_dfg(log)
657
    
658
    # Export logs
659
    pm4py.write_xes(log, f'{output_prefix}_log.xes')
660
    df = pm4py.convert_to_dataframe(log)
661
    df.to_csv(f'{output_prefix}_log.csv', index=False)
662
    
663
    # Export models
664
    pm4py.write_pnml(net, im, fm, f'{output_prefix}_petri_net.pnml')
665
    pm4py.write_ptml(tree, f'{output_prefix}_process_tree.ptml')
666
    pm4py.write_dfg(dfg, start_acts, end_acts, f'{output_prefix}_dfg.dfg')
667
    
668
    # Convert to NetworkX and export
669
    G = pm4py.convert_log_to_networkx(log)
670
    # Could export with nx.write_gml(G, f'{output_prefix}_graph.gml')
671
    
672
    # Serialize PM4PY objects
673
    pm4py.serialize(net, f'{output_prefix}_petri_net.pkl')
674
    pm4py.serialize(tree, f'{output_prefix}_process_tree.pkl')
675
    
676
    print(f"Analysis results exported with prefix: {output_prefix}")
677

678
# Export everything
679
export_analysis_results(log, 'my_process_analysis')
680
```

Version

Tile

Files

utilities-conversion.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

utilities-conversion.mddocs/