or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

conformance-checking.mdfiltering.mdindex.mdml-organizational.mdobject-centric.mdprocess-discovery.mdreading-writing.mdstatistics-analysis.mdutilities-conversion.mdvisualization.md

statistics-analysis.mddocs/

0

# Statistics and Analysis

1

2

Comprehensive statistical analysis functions and advanced analytical operations for process behavior, performance metrics, model analysis, and process intelligence. PM4PY provides both descriptive statistics and advanced analytical capabilities.

3

4

## Capabilities

5

6

### Basic Statistics

7

8

Fundamental statistical functions for extracting basic information from event logs.

9

10

```python { .api }

11

def get_start_activities(log, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):

12

"""

13

Get start activities and their frequencies across all cases.

14

15

Parameters:

16

- log (Union[EventLog, pd.DataFrame]): Event log data

17

- activity_key (str): Activity attribute name

18

- timestamp_key (str): Timestamp attribute name

19

- case_id_key (str): Case ID attribute name

20

21

Returns:

22

Dict[str, int]: Start activities with their frequencies

23

"""

24

25

def get_end_activities(log, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):

26

"""

27

Get end activities and their frequencies across all cases.

28

29

Parameters:

30

- log (Union[EventLog, pd.DataFrame]): Event log data

31

- activity_key (str): Activity attribute name

32

- timestamp_key (str): Timestamp attribute name

33

- case_id_key (str): Case ID attribute name

34

35

Returns:

36

Dict[str, int]: End activities with their frequencies

37

"""

38

39

def get_event_attributes(log):

40

"""

41

Get list of all event attribute names in the log.

42

43

Parameters:

44

- log (Union[EventLog, pd.DataFrame]): Event log data

45

46

Returns:

47

List[str]: List of event attribute names

48

"""

49

50

def get_event_attribute_values(log, attribute_key):

51

"""

52

Get all unique values for a specific event attribute.

53

54

Parameters:

55

- log (Union[EventLog, pd.DataFrame]): Event log data

56

- attribute_key (str): Attribute name to extract values for

57

58

Returns:

59

List[Any]: Unique values of the specified attribute

60

"""

61

62

def get_trace_attributes(log):

63

"""

64

Get list of all trace (case-level) attribute names.

65

66

Parameters:

67

- log (Union[EventLog, pd.DataFrame]): Event log data

68

69

Returns:

70

List[str]: List of trace attribute names

71

"""

72

73

def get_trace_attribute_values(log, attribute_key):

74

"""

75

Get all unique values for a specific trace attribute.

76

77

Parameters:

78

- log (Union[EventLog, pd.DataFrame]): Event log data

79

- attribute_key (str): Trace attribute name

80

81

Returns:

82

List[Any]: Unique values of the specified trace attribute

83

"""

84

```

85

86

### Variant Analysis

87

88

Analyze process variants (unique activity sequences) and their characteristics.

89

90

```python { .api }

91

def get_variants(log, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):

92

"""

93

Get trace variants with their corresponding case IDs.

94

95

Parameters:

96

- log (Union[EventLog, pd.DataFrame]): Event log data

97

- activity_key (str): Activity attribute name

98

- timestamp_key (str): Timestamp attribute name

99

- case_id_key (str): Case ID attribute name

100

101

Returns:

102

Dict[Tuple[str, ...], List[str]]: Variants mapped to list of case IDs

103

"""

104

105

def get_variants_as_tuples(log, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):

106

"""

107

Get variants as tuples with their frequencies.

108

109

Parameters:

110

- log (Union[EventLog, pd.DataFrame]): Event log data

111

- activity_key (str): Activity attribute name

112

- timestamp_key (str): Timestamp attribute name

113

- case_id_key (str): Case ID attribute name

114

115

Returns:

116

Dict[Tuple[str, ...], int]: Variants with their frequencies

117

"""

118

119

def split_by_process_variant(log, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):

120

"""

121

Split log into separate logs by process variant.

122

123

Parameters:

124

- log (Union[EventLog, pd.DataFrame]): Event log data

125

- activity_key (str): Activity attribute name

126

- timestamp_key (str): Timestamp attribute name

127

- case_id_key (str): Case ID attribute name

128

129

Returns:

130

Dict[Tuple[str, ...], Union[EventLog, pd.DataFrame]]: Variants mapped to their sub-logs

131

"""

132

```

133

134

### Temporal Statistics

135

136

Analyze temporal patterns including case durations, arrival rates, and performance metrics.

137

138

```python { .api }

139

def get_case_arrival_average(log, timestamp_key='time:timestamp', case_id_key='case:concept:name'):

140

"""

141

Calculate average case arrival rate (cases per time unit).

142

143

Parameters:

144

- log (Union[EventLog, pd.DataFrame]): Event log data

145

- timestamp_key (str): Timestamp attribute name

146

- case_id_key (str): Case ID attribute name

147

148

Returns:

149

float: Average case arrival rate (cases per second)

150

"""

151

152

def get_all_case_durations(log, timestamp_key='time:timestamp', case_id_key='case:concept:name'):

153

"""

154

Get durations of all cases in the log.

155

156

Parameters:

157

- log (Union[EventLog, pd.DataFrame]): Event log data

158

- timestamp_key (str): Timestamp attribute name

159

- case_id_key (str): Case ID attribute name

160

161

Returns:

162

List[float]: List of case durations in seconds

163

"""

164

165

def get_case_duration(log, timestamp_key='time:timestamp', case_id_key='case:concept:name'):

166

"""

167

Calculate average case duration across all cases.

168

169

Parameters:

170

- log (Union[EventLog, pd.DataFrame]): Event log data

171

- timestamp_key (str): Timestamp attribute name

172

- case_id_key (str): Case ID attribute name

173

174

Returns:

175

float: Average case duration in seconds

176

"""

177

178

def get_cycle_time(log, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):

179

"""

180

Calculate cycle time of the process (end-to-end duration).

181

182

Parameters:

183

- log (Union[EventLog, pd.DataFrame]): Event log data

184

- activity_key (str): Activity attribute name

185

- timestamp_key (str): Timestamp attribute name

186

- case_id_key (str): Case ID attribute name

187

188

Returns:

189

float: Average cycle time in seconds

190

"""

191

192

def get_service_time(log, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):

193

"""

194

Calculate service time for each activity.

195

196

Parameters:

197

- log (Union[EventLog, pd.DataFrame]): Event log data

198

- activity_key (str): Activity attribute name

199

- timestamp_key (str): Timestamp attribute name

200

- case_id_key (str): Case ID attribute name

201

202

Returns:

203

Dict[str, float]: Service times per activity in seconds

204

"""

205

206

def get_variants_paths_duration(log, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):

207

"""

208

Get durations for each variant path.

209

210

Parameters:

211

- log (Union[EventLog, pd.DataFrame]): Event log data

212

- activity_key (str): Activity attribute name

213

- timestamp_key (str): Timestamp attribute name

214

- case_id_key (str): Case ID attribute name

215

216

Returns:

217

Dict[Tuple[str, ...], List[float]]: Durations per variant

218

"""

219

```

220

221

### Advanced Statistics

222

223

Complex statistical analysis including loops, segments, and behavioral patterns.

224

225

```python { .api }

226

def get_minimum_self_distances(log, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):

227

"""

228

Calculate minimum self-distances for activities (loop detection).

229

230

Parameters:

231

- log (Union[EventLog, pd.DataFrame]): Event log data

232

- activity_key (str): Activity attribute name

233

- timestamp_key (str): Timestamp attribute name

234

- case_id_key (str): Case ID attribute name

235

236

Returns:

237

Dict[str, int]: Minimum self-distances per activity

238

"""

239

240

def get_minimum_self_distance_witnesses(log, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):

241

"""

242

Get witness traces for minimum self-distances.

243

244

Parameters:

245

- log (Union[EventLog, pd.DataFrame]): Event log data

246

- activity_key (str): Activity attribute name

247

- timestamp_key (str): Timestamp attribute name

248

- case_id_key (str): Case ID attribute name

249

250

Returns:

251

Dict[str, List[str]]: Witness cases per activity

252

"""

253

254

def get_frequent_trace_segments(log, min_length=2, max_length=5, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):

255

"""

256

Extract frequent trace segments of specified lengths.

257

258

Parameters:

259

- log (Union[EventLog, pd.DataFrame]): Event log data

260

- min_length (int): Minimum segment length

261

- max_length (int): Maximum segment length

262

- activity_key (str): Activity attribute name

263

- timestamp_key (str): Timestamp attribute name

264

- case_id_key (str): Case ID attribute name

265

266

Returns:

267

Dict[Tuple[str, ...], int]: Frequent segments with frequencies

268

"""

269

270

def get_rework_cases_per_activity(log, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):

271

"""

272

Get count of cases with rework per activity.

273

274

Parameters:

275

- log (Union[EventLog, pd.DataFrame]): Event log data

276

- activity_key (str): Activity attribute name

277

- timestamp_key (str): Timestamp attribute name

278

- case_id_key (str): Case ID attribute name

279

280

Returns:

281

Dict[str, int]: Rework cases count per activity

282

"""

283

284

def get_case_overlap(log, timestamp_key='time:timestamp', case_id_key='case:concept:name'):

285

"""

286

Calculate case overlap measure (parallel case execution).

287

288

Parameters:

289

- log (Union[EventLog, pd.DataFrame]): Event log data

290

- timestamp_key (str): Timestamp attribute name

291

- case_id_key (str): Case ID attribute name

292

293

Returns:

294

float: Case overlap ratio

295

"""

296

297

def get_activity_position_summary(log, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):

298

"""

299

Get position summary statistics for each activity.

300

301

Parameters:

302

- log (Union[EventLog, pd.DataFrame]): Event log data

303

- activity_key (str): Activity attribute name

304

- timestamp_key (str): Timestamp attribute name

305

- case_id_key (str): Case ID attribute name

306

307

Returns:

308

Dict[str, Dict[str, Any]]: Position statistics per activity

309

"""

310

```

311

312

### Stochastic Language

313

314

Generate probabilistic representations of process behavior.

315

316

```python { .api }

317

def get_stochastic_language(log, activity_key='concept:name', timestamp_key='time:timestamp', case_id_key='case:concept:name'):

318

"""

319

Generate stochastic language from log or model.

320

Creates probabilistic representation of process behavior.

321

322

Parameters:

323

- log (Union[EventLog, pd.DataFrame]): Event log data

324

- activity_key (str): Activity attribute name

325

- timestamp_key (str): Timestamp attribute name

326

- case_id_key (str): Case ID attribute name

327

328

Returns:

329

Dict[List[str], float]: Traces mapped to their probabilities

330

"""

331

```

332

333

### Model Analysis

334

335

Advanced analytical functions for process model evaluation and manipulation.

336

337

```python { .api }

338

def check_soundness(petri_net, initial_marking, final_marking):

339

"""

340

Check if Petri net is sound (proper termination, no deadlocks).

341

342

Parameters:

343

- petri_net (PetriNet): Petri net model

344

- initial_marking (Marking): Initial marking

345

- final_marking (Marking): Final marking

346

347

Returns:

348

bool: True if the Petri net is sound

349

"""

350

351

def check_is_workflow_net(petri_net):

352

"""

353

Check if Petri net is a workflow net (single source, single sink).

354

355

Parameters:

356

- petri_net (PetriNet): Petri net model

357

358

Returns:

359

bool: True if it's a workflow net

360

"""

361

362

def simplicity_petri_net(petri_net):

363

"""

364

Calculate simplicity metric of Petri net.

365

366

Parameters:

367

- petri_net (PetriNet): Petri net model

368

369

Returns:

370

float: Simplicity value between 0 and 1

371

"""

372

```

373

374

### Mathematical Operations

375

376

Mathematical analysis functions for process models and languages.

377

378

```python { .api }

379

def compute_emd(language1, language2):

380

"""

381

Compute Earth Mover Distance between two stochastic languages.

382

383

Parameters:

384

- language1 (Dict): First stochastic language

385

- language2 (Dict): Second stochastic language

386

387

Returns:

388

float: Earth Mover Distance value

389

"""

390

391

def solve_marking_equation(petri_net, initial_marking, final_marking, cost_function=None):

392

"""

393

Solve marking equation for Petri net reachability.

394

395

Parameters:

396

- petri_net (PetriNet): Petri net model

397

- initial_marking (Marking): Initial marking

398

- final_marking (Marking): Target marking

399

- cost_function (Optional[Callable]): Cost function for optimization

400

401

Returns:

402

float: Solution cost or distance

403

"""

404

405

def solve_extended_marking_equation(petri_net, initial_marking, final_marking, **kwargs):

406

"""

407

Solve extended marking equation with additional constraints.

408

409

Parameters:

410

- petri_net (PetriNet): Petri net model

411

- initial_marking (Marking): Initial marking

412

- final_marking (Marking): Target marking

413

- **kwargs: Additional parameters and constraints

414

415

Returns:

416

Dict[str, Any]: Solution with detailed information

417

"""

418

```

419

420

### Similarity Analysis

421

422

Calculate similarity between models, logs, and process representations.

423

424

```python { .api }

425

def behavioral_similarity(model1, model2, **kwargs):

426

"""

427

Calculate behavioral similarity between two process models.

428

429

Parameters:

430

- model1 (Any): First process model

431

- model2 (Any): Second process model

432

- **kwargs: Similarity computation parameters

433

434

Returns:

435

float: Behavioral similarity score (0-1)

436

"""

437

438

def structural_similarity(model1, model2, **kwargs):

439

"""

440

Calculate structural similarity between two process models.

441

442

Parameters:

443

- model1 (Any): First process model

444

- model2 (Any): Second process model

445

- **kwargs: Similarity computation parameters

446

447

Returns:

448

float: Structural similarity score (0-1)

449

"""

450

451

def embeddings_similarity(log1, log2, **kwargs):

452

"""

453

Calculate embeddings-based similarity between event logs.

454

455

Parameters:

456

- log1 (Union[EventLog, pd.DataFrame]): First event log

457

- log2 (Union[EventLog, pd.DataFrame]): Second event log

458

- **kwargs: Embedding parameters

459

460

Returns:

461

float: Embeddings similarity score (0-1)

462

"""

463

464

def label_sets_similarity(model1, model2, **kwargs):

465

"""

466

Calculate label set similarity between models.

467

468

Parameters:

469

- model1 (Any): First process model

470

- model2 (Any): Second process model

471

- **kwargs: Similarity parameters

472

473

Returns:

474

float: Label set similarity score (0-1)

475

"""

476

```

477

478

### Utility Analysis Functions

479

480

Utility functions for model manipulation and analysis.

481

482

```python { .api }

483

def get_enabled_transitions(petri_net, marking):

484

"""

485

Get list of transitions enabled in specific marking.

486

487

Parameters:

488

- petri_net (PetriNet): Petri net model

489

- marking (Marking): Current marking

490

491

Returns:

492

List[PetriNet.Transition]: List of enabled transitions

493

"""

494

495

def get_activity_labels(model):

496

"""

497

Get set of activity labels from process model.

498

499

Parameters:

500

- model (Any): Process model (Petri net, process tree, etc.)

501

502

Returns:

503

Set[str]: Set of activity labels

504

"""

505

506

def replace_activity_labels(model, replacement_dict):

507

"""

508

Replace activity labels in process model.

509

510

Parameters:

511

- model (Any): Process model to modify

512

- replacement_dict (Dict[str, str]): Label replacement mapping

513

514

Returns:

515

Any: Modified process model

516

"""

517

518

def map_labels_from_second_model(model1, model2):

519

"""

520

Create label mapping between two models.

521

522

Parameters:

523

- model1 (Any): First process model

524

- model2 (Any): Second process model

525

526

Returns:

527

Dict[str, str]: Label mapping from model1 to model2

528

"""

529

```

530

531

## Usage Examples

532

533

### Basic Statistical Analysis

534

535

```python

536

import pm4py

537

538

# Load event log

539

log = pm4py.read_xes('event_log.xes')

540

541

# Basic statistics

542

start_activities = pm4py.get_start_activities(log)

543

end_activities = pm4py.get_end_activities(log)

544

545

print("Start Activities:")

546

for activity, count in sorted(start_activities.items(), key=lambda x: x[1], reverse=True):

547

print(f" {activity}: {count}")

548

549

print("End Activities:")

550

for activity, count in sorted(end_activities.items(), key=lambda x: x[1], reverse=True):

551

print(f" {activity}: {count}")

552

553

# Attribute analysis

554

event_attributes = pm4py.get_event_attributes(log)

555

trace_attributes = pm4py.get_trace_attributes(log)

556

557

print(f"Event attributes: {event_attributes}")

558

print(f"Trace attributes: {trace_attributes}")

559

```

560

561

### Variant Analysis

562

563

```python

564

import pm4py

565

566

# Get variants with frequencies

567

variants = pm4py.get_variants_as_tuples(log)

568

569

print(f"Total variants: {len(variants)}")

570

print("Top 10 variants:")

571

for variant, count in sorted(variants.items(), key=lambda x: x[1], reverse=True)[:10]:

572

print(f" {' -> '.join(variant)}: {count} cases")

573

574

# Split log by variants

575

variant_logs = pm4py.split_by_process_variant(log)

576

577

print("Variant analysis:")

578

for variant, sub_log in variant_logs.items():

579

case_count = len(sub_log)

580

avg_duration = pm4py.get_case_duration(sub_log)

581

print(f" Variant with {len(variant)} steps: {case_count} cases, avg duration: {avg_duration/3600:.1f} hours")

582

```

583

584

### Temporal Analysis

585

586

```python

587

import pm4py

588

589

# Case duration analysis

590

all_durations = pm4py.get_all_case_durations(log)

591

avg_duration = pm4py.get_case_duration(log)

592

cycle_time = pm4py.get_cycle_time(log)

593

594

print(f"Case Duration Statistics:")

595

print(f" Average: {avg_duration/3600:.1f} hours")

596

print(f" Cycle time: {cycle_time/3600:.1f} hours")

597

print(f" Min: {min(all_durations)/3600:.1f} hours")

598

print(f" Max: {max(all_durations)/3600:.1f} hours")

599

600

# Arrival rate analysis

601

arrival_rate = pm4py.get_case_arrival_average(log)

602

print(f" Arrival rate: {arrival_rate*3600:.1f} cases/hour")

603

604

# Service time analysis

605

service_times = pm4py.get_service_time(log)

606

print("Service Times:")

607

for activity, time in sorted(service_times.items(), key=lambda x: x[1], reverse=True):

608

print(f" {activity}: {time/60:.1f} minutes")

609

```

610

611

### Advanced Behavioral Analysis

612

613

```python

614

import pm4py

615

616

# Loop analysis

617

self_distances = pm4py.get_minimum_self_distances(log)

618

witnesses = pm4py.get_minimum_self_distance_witnesses(log)

619

620

print("Loop Analysis:")

621

for activity, distance in self_distances.items():

622

if distance > 1: # Activity can loop

623

print(f" {activity}: min distance {distance} (witness cases: {len(witnesses[activity])})")

624

625

# Rework analysis

626

rework_cases = pm4py.get_rework_cases_per_activity(log)

627

total_cases = len(set(log['case:concept:name']) if isinstance(log, pd.DataFrame) else log)

628

629

print("Rework Analysis:")

630

for activity, rework_count in rework_cases.items():

631

rework_percentage = (rework_count / total_cases) * 100

632

print(f" {activity}: {rework_count} cases ({rework_percentage:.1f}%)")

633

634

# Case overlap analysis

635

overlap = pm4py.get_case_overlap(log)

636

print(f"Case Overlap: {overlap:.3f}")

637

638

# Activity position analysis

639

position_summary = pm4py.get_activity_position_summary(log)

640

print("Activity Position Summary:")

641

for activity, stats in position_summary.items():

642

print(f" {activity}:")

643

print(f" Avg position: {stats['mean_position']:.1f}")

644

print(f" Position range: {stats['min_position']} - {stats['max_position']}")

645

```

646

647

### Frequent Pattern Mining

648

649

```python

650

import pm4py

651

652

# Find frequent trace segments

653

frequent_segments = pm4py.get_frequent_trace_segments(

654

log,

655

min_length=2,

656

max_length=4

657

)

658

659

print("Frequent Trace Segments:")

660

for segment, frequency in sorted(frequent_segments.items(), key=lambda x: x[1], reverse=True)[:20]:

661

print(f" {' -> '.join(segment)}: {frequency} occurrences")

662

663

# Variant duration analysis

664

variant_durations = pm4py.get_variants_paths_duration(log)

665

print("Variant Performance Analysis:")

666

for variant, durations in variant_durations.items():

667

if len(durations) >= 5: # Only variants with sufficient data

668

avg_duration = sum(durations) / len(durations)

669

print(f" {' -> '.join(variant[:3])}: {avg_duration/3600:.1f}h avg ({len(durations)} cases)")

670

```

671

672

### Model Quality Assessment

673

674

```python

675

import pm4py

676

677

# Discover model

678

net, initial_marking, final_marking = pm4py.discover_petri_net_inductive(log)

679

680

# Check model properties

681

is_sound = pm4py.check_soundness(net, initial_marking, final_marking)

682

is_workflow = pm4py.check_is_workflow_net(net)

683

simplicity = pm4py.simplicity_petri_net(net)

684

685

print("Model Quality Assessment:")

686

print(f" Sound: {is_sound}")

687

print(f" Workflow net: {is_workflow}")

688

print(f" Simplicity: {simplicity:.3f}")

689

690

# Get model labels

691

activity_labels = pm4py.get_activity_labels(net)

692

print(f" Activities in model: {len(activity_labels)}")

693

print(f" Activity labels: {sorted(activity_labels)}")

694

695

# Check enabled transitions in initial marking

696

enabled = pm4py.get_enabled_transitions(net, initial_marking)

697

print(f" Initially enabled transitions: {len(enabled)}")

698

```

699

700

### Stochastic Language Analysis

701

702

```python

703

import pm4py

704

705

# Generate stochastic language

706

stochastic_lang = pm4py.get_stochastic_language(log)

707

708

print("Stochastic Language Analysis:")

709

print(f" Unique traces: {len(stochastic_lang)}")

710

print(f" Most probable traces:")

711

712

# Show top traces by probability

713

sorted_traces = sorted(stochastic_lang.items(), key=lambda x: x[1], reverse=True)[:10]

714

for trace, prob in sorted_traces:

715

trace_str = ' -> '.join(trace[:5]) # Limit length for display

716

if len(trace) > 5:

717

trace_str += "..."

718

print(f" {trace_str}: {prob:.4f}")

719

720

# Calculate entropy

721

import math

722

entropy = -sum(p * math.log2(p) for p in stochastic_lang.values() if p > 0)

723

print(f" Process entropy: {entropy:.3f} bits")

724

```

725

726

### Model Comparison and Similarity

727

728

```python

729

import pm4py

730

731

# Discover two different models

732

net1, im1, fm1 = pm4py.discover_petri_net_inductive(log)

733

net2, im2, fm2 = pm4py.discover_petri_net_heuristics(log)

734

735

# Calculate similarities

736

behavioral_sim = pm4py.behavioral_similarity(net1, net2)

737

structural_sim = pm4py.structural_similarity(net1, net2)

738

label_sim = pm4py.label_sets_similarity(net1, net2)

739

740

print("Model Similarity Analysis:")

741

print(f" Behavioral similarity: {behavioral_sim:.3f}")

742

print(f" Structural similarity: {structural_sim:.3f}")

743

print(f" Label set similarity: {label_sim:.3f}")

744

745

# Create label mapping

746

label_mapping = pm4py.map_labels_from_second_model(net1, net2)

747

print(f" Common labels: {len(label_mapping)}")

748

749

# Compare model languages

750

lang1 = pm4py.get_stochastic_language(log) # Would use model if available

751

lang2 = pm4py.get_stochastic_language(log) # Would use different model

752

753

# emd_distance = pm4py.compute_emd(lang1, lang2)

754

# print(f" Earth Mover Distance: {emd_distance:.3f}")

755

```

756

757

### Comprehensive Process Analysis Dashboard

758

759

```python

760

import pm4py

761

762

def comprehensive_process_analysis(log):

763

"""Generate comprehensive process analysis report."""

764

765

print("=" * 60)

766

print("COMPREHENSIVE PROCESS ANALYSIS REPORT")

767

print("=" * 60)

768

769

# Basic statistics

770

total_cases = len(set(log['case:concept:name']) if isinstance(log, pd.DataFrame) else log)

771

total_events = len(log)

772

773

print(f"Dataset Overview:")

774

print(f" Cases: {total_cases:,}")

775

print(f" Events: {total_events:,}")

776

print(f" Events per case: {total_events/total_cases:.1f}")

777

778

# Temporal analysis

779

durations = pm4py.get_all_case_durations(log)

780

avg_duration = sum(durations) / len(durations)

781

782

print(f"\nTemporal Analysis:")

783

print(f" Average case duration: {avg_duration/3600:.1f} hours")

784

print(f" Shortest case: {min(durations)/60:.1f} minutes")

785

print(f" Longest case: {max(durations)/3600:.1f} hours")

786

787

# Variant analysis

788

variants = pm4py.get_variants_as_tuples(log)

789

variant_coverage = sum(sorted(variants.values(), reverse=True)[:10]) / total_cases

790

791

print(f"\nVariant Analysis:")

792

print(f" Total variants: {len(variants)}")

793

print(f" Top 10 variants cover: {variant_coverage:.1%} of cases")

794

795

# Behavioral patterns

796

rework = pm4py.get_rework_cases_per_activity(log)

797

total_rework = sum(rework.values())

798

799

print(f"\nBehavioral Patterns:")

800

print(f" Cases with rework: {total_rework} ({total_rework/total_cases:.1%})")

801

802

# Process model quality

803

net, im, fm = pm4py.discover_petri_net_inductive(log)

804

fitness = pm4py.fitness_alignments(log, net, im, fm)

805

precision = pm4py.precision_alignments(log, net, im, fm)

806

807

print(f"\nProcess Model Quality:")

808

print(f" Fitness: {fitness['log_fitness']:.3f}")

809

print(f" Precision: {precision:.3f}")

810

print(f" Soundness: {pm4py.check_soundness(net, im, fm)}")

811

812

return {

813

'cases': total_cases,

814

'events': total_events,

815

'avg_duration': avg_duration,

816

'variants': len(variants),

817

'rework_rate': total_rework/total_cases,

818

'fitness': fitness['log_fitness'],

819

'precision': precision

820

}

821

822

# Run comprehensive analysis

823

analysis_results = comprehensive_process_analysis(log)

824

```