0
# Evaluation and Metrics
1
2
Functions for evaluating model quality, computing metrics, and validating learned structures. pgmpy provides comprehensive evaluation tools for assessing both model fit and predictive performance.
3
4
## Capabilities
5
6
### Model-Data Fit Evaluation
7
8
#### Correlation Testing
9
10
```python { .api }
11
def correlation_score(model, data, test="chi_square", significance_level=0.05,
12
score=log_likelihood_score, return_summary=False):
13
"""
14
Test correlation between model and data using statistical tests.
15
16
Parameters:
17
- model: DiscreteBayesianNetwork to evaluate
18
- data: pandas.DataFrame with observed data
19
- test: statistical test to use ("chi_square", "freeman_tuckey", "log_likelihood", "modified_log_likelihood", "neyman", "cressie_read")
20
- significance_level: significance threshold for hypothesis testing
21
- score: scoring function for model comparison
22
- return_summary: whether to return detailed test summary
23
24
Returns:
25
dict: Test statistic, p-value, and test result
26
"""
27
28
def fisher_c(model, data, ci_test, show_progress=True):
29
"""
30
Compute Fisher's C test statistic for model evaluation.
31
32
Parameters:
33
- model: DiscreteBayesianNetwork to test
34
- data: pandas.DataFrame with data
35
- ci_test: conditional independence test function
36
- show_progress: whether to show progress bar
37
38
Returns:
39
dict: Fisher's C statistic and p-value
40
"""
41
```
42
43
#### Likelihood-Based Metrics
44
45
```python { .api }
46
def log_likelihood_score(model, data):
47
"""
48
Compute log-likelihood of data given model.
49
50
Parameters:
51
- model: DiscreteBayesianNetwork fitted model
52
- data: pandas.DataFrame with observed data
53
54
Returns:
55
float: Log-likelihood score (higher is better)
56
"""
57
58
def structure_score(model, data, scoring_method="bic-g", **kwargs):
59
"""
60
Compute structure quality score.
61
62
Parameters:
63
- model: DiscreteBayesianNetwork to score
64
- data: pandas.DataFrame with data
65
- scoring_method: scoring method ("bic", "aic", "bdeu", "k2", "bic-g", "aic-g")
66
- kwargs: additional arguments for scoring method
67
68
Returns:
69
float: Structure score
70
"""
71
```
72
73
### Independence Testing
74
75
```python { .api }
76
def implied_cis(model, data, ci_test, show_progress=True):
77
"""
78
Test all conditional independencies implied by the model.
79
80
Parameters:
81
- model: DiscreteBayesianNetwork with structure to test
82
- data: pandas.DataFrame with observed data
83
- ci_test: conditional independence test function
84
- show_progress: whether to show progress bar
85
86
Returns:
87
dict: Results of all CI tests including test statistics and p-values
88
"""
89
```
90
91
### Structure Comparison
92
93
```python { .api }
94
def SHD(true_model, est_model):
95
"""
96
Compute Structural Hamming Distance between two graph structures.
97
98
Parameters:
99
- true_model: DiscreteBayesianNetwork with true structure
100
- est_model: DiscreteBayesianNetwork with estimated structure
101
102
Returns:
103
int: Number of edge differences (additions + deletions + reversals)
104
"""
105
```
106
107
### Model Probability Assessment
108
109
```python { .api }
110
class BayesianModelProbability:
111
def __init__(self, data):
112
"""
113
Compute model probabilities for structure comparison.
114
115
Parameters:
116
- data: pandas.DataFrame with observed data
117
"""
118
119
def score(self, model):
120
"""
121
Compute log marginal likelihood of model.
122
123
Parameters:
124
- model: DiscreteBayesianNetwork to score
125
126
Returns:
127
float: Log marginal likelihood
128
"""
129
130
def get_model_probability(self, models):
131
"""
132
Get posterior probabilities over set of models.
133
134
Parameters:
135
- models: list of DiscreteBayesianNetwork objects
136
137
Returns:
138
dict: Model probabilities {model_index: probability}
139
"""
140
```
141
142
### Cross-Validation and Performance Metrics
143
144
```python { .api }
145
def cross_validate(model, data, folds=5, scoring_method="log_likelihood"):
146
"""
147
Perform k-fold cross-validation for model evaluation.
148
149
Parameters:
150
- model: DiscreteBayesianNetwork to evaluate
151
- data: pandas.DataFrame with complete data
152
- folds: number of cross-validation folds
153
- scoring_method: evaluation metric to use
154
155
Returns:
156
dict: Cross-validation scores and statistics
157
"""
158
159
def prediction_accuracy(model, test_data, variables=None):
160
"""
161
Compute prediction accuracy on test data.
162
163
Parameters:
164
- model: fitted DiscreteBayesianNetwork
165
- test_data: pandas.DataFrame with test data (may have missing values)
166
- variables: list of variables to evaluate predictions for
167
168
Returns:
169
dict: Accuracy metrics for each predicted variable
170
"""
171
172
def classification_metrics(y_true, y_pred, labels=None):
173
"""
174
Compute classification performance metrics.
175
176
Parameters:
177
- y_true: true class labels
178
- y_pred: predicted class labels
179
- labels: list of class label names
180
181
Returns:
182
dict: Precision, recall, F1-score, and accuracy
183
"""
184
```
185
186
### Information-Theoretic Measures
187
188
```python { .api }
189
def mutual_information(model, var1, var2, evidence=None):
190
"""
191
Compute mutual information between variables.
192
193
Parameters:
194
- model: DiscreteBayesianNetwork
195
- var1: first variable name
196
- var2: second variable name
197
- evidence: dict of conditioning variables
198
199
Returns:
200
float: Mutual information I(var1; var2 | evidence)
201
"""
202
203
def conditional_entropy(model, var, evidence):
204
"""
205
Compute conditional entropy H(var | evidence).
206
207
Parameters:
208
- model: DiscreteBayesianNetwork
209
- var: variable name
210
- evidence: dict of conditioning variables
211
212
Returns:
213
float: Conditional entropy
214
"""
215
216
def kl_divergence(model1, model2, variables=None):
217
"""
218
Compute KL divergence between two models.
219
220
Parameters:
221
- model1: first DiscreteBayesianNetwork
222
- model2: second DiscreteBayesianNetwork
223
- variables: list of variables to compute divergence over
224
225
Returns:
226
float: KL divergence D(model1 || model2)
227
"""
228
```
229
230
### Sensitivity Analysis
231
232
```python { .api }
233
def parameter_sensitivity(model, data, parameter, delta=0.01):
234
"""
235
Analyze sensitivity of model predictions to parameter changes.
236
237
Parameters:
238
- model: DiscreteBayesianNetwork
239
- data: pandas.DataFrame with query data
240
- parameter: parameter to perturb (CPD entry)
241
- delta: perturbation amount
242
243
Returns:
244
dict: Sensitivity measures for model outputs
245
"""
246
247
def structure_sensitivity(model, data, edge_modifications):
248
"""
249
Analyze impact of structural changes on model performance.
250
251
Parameters:
252
- model: DiscreteBayesianNetwork base model
253
- data: pandas.DataFrame with evaluation data
254
- edge_modifications: list of edge changes to test
255
256
Returns:
257
dict: Performance changes for each structural modification
258
"""
259
```
260
261
## Usage Examples
262
263
### Model-Data Correlation Testing
264
265
```python
266
from pgmpy.metrics import correlation_score, log_likelihood_score
267
from pgmpy.models import DiscreteBayesianNetwork
268
import pandas as pd
269
270
# Assume we have a fitted model and test data
271
model = DiscreteBayesianNetwork([('A', 'C'), ('B', 'C')])
272
# ... add CPDs and fit model ...
273
274
test_data = pd.DataFrame({
275
'A': [0, 1, 0, 1],
276
'B': [1, 0, 1, 0],
277
'C': [0, 1, 1, 0]
278
})
279
280
# Test model-data correlation
281
correlation_result = correlation_score(
282
model, test_data,
283
test="chi_square",
284
significance_level=0.05
285
)
286
print("Correlation test:", correlation_result)
287
288
# Compute log-likelihood
289
ll_score = log_likelihood_score(model, test_data)
290
print(f"Log-likelihood: {ll_score}")
291
```
292
293
### Structure Comparison
294
295
```python
296
from pgmpy.metrics import SHD, structure_score
297
298
# Compare two model structures
299
true_model = DiscreteBayesianNetwork([('A', 'C'), ('B', 'C')])
300
learned_model = DiscreteBayesianNetwork([('A', 'B'), ('B', 'C')])
301
302
# Compute structural hamming distance
303
distance = SHD(true_model, learned_model)
304
print(f"Structural Hamming Distance: {distance}")
305
306
# Score structures
307
true_score = structure_score(true_model, test_data, scoring_method="bic")
308
learned_score = structure_score(learned_model, test_data, scoring_method="bic")
309
310
print(f"True model BIC: {true_score}")
311
print(f"Learned model BIC: {learned_score}")
312
```
313
314
### Independence Testing
315
316
```python
317
from pgmpy.metrics import implied_cis
318
from pgmpy.estimators.CITests import chi_square
319
320
# Test all conditional independencies implied by model
321
ci_results = implied_cis(
322
model, test_data,
323
ci_test=chi_square,
324
show_progress=True
325
)
326
327
print("CI test results:")
328
for independence, result in ci_results.items():
329
print(f"{independence}: p-value = {result['p_value']:.4f}")
330
```
331
332
### Cross-Validation
333
334
```python
335
# Perform cross-validation (assuming function exists)
336
cv_results = cross_validate(
337
model, test_data,
338
folds=5,
339
scoring_method="log_likelihood"
340
)
341
342
print("Cross-validation results:")
343
print(f"Mean score: {cv_results['mean_score']:.4f}")
344
print(f"Std deviation: {cv_results['std_score']:.4f}")
345
print(f"Fold scores: {cv_results['fold_scores']}")
346
```
347
348
### Information-Theoretic Analysis
349
350
```python
351
# Compute mutual information between variables
352
mi_ac = mutual_information(model, 'A', 'C')
353
mi_bc = mutual_information(model, 'B', 'C')
354
mi_ac_given_b = mutual_information(model, 'A', 'C', evidence={'B': 1})
355
356
print(f"I(A; C) = {mi_ac:.4f}")
357
print(f"I(B; C) = {mi_bc:.4f}")
358
print(f"I(A; C | B=1) = {mi_ac_given_b:.4f}")
359
360
# Compute conditional entropy
361
h_c_given_ab = conditional_entropy(model, 'C', {'A': 0, 'B': 1})
362
print(f"H(C | A=0, B=1) = {h_c_given_ab:.4f}")
363
```