0
# Statistical Analysis and Diagnostics
1
2
MCMC diagnostics, model comparison, and statistical functions for Bayesian analysis. Compute convergence diagnostics, effective sample sizes, information criteria, and perform Bayesian model comparison with advanced techniques like PSIS-LOO.
3
4
## Summary Statistics
5
6
```python { .api }
7
def summary(data: InferenceData, *, var_names: list = None, hdi_prob: float = 0.94, stat_focus: str = "mean", extend: bool = True, **kwargs) -> pd.DataFrame:
8
"""
9
Create comprehensive summary statistics table for inference data.
10
11
Args:
12
data (InferenceData): Bayesian inference data
13
var_names (list, optional): Variables to include in summary
14
hdi_prob (float): Probability for highest density interval (default 0.94)
15
stat_focus (str): Focus statistic ("mean", "median", "mode")
16
extend (bool): Whether to include extended diagnostics
17
**kwargs: Additional summary parameters
18
19
Returns:
20
pd.DataFrame: Summary statistics table with columns for mean, std, HDI bounds, diagnostics
21
"""
22
23
def hdi(data, hdi_prob: float = 0.94, *, circular: bool = False, multimodal: bool = False, **kwargs):
24
"""
25
Compute highest density interval for posterior samples.
26
27
Args:
28
data: Posterior samples (array-like or InferenceData)
29
hdi_prob (float): Probability mass for HDI (default 0.94)
30
circular (bool): Whether data is circular (default False)
31
multimodal (bool): Whether to detect multiple modes (default False)
32
**kwargs: Additional HDI parameters
33
34
Returns:
35
array: HDI bounds [lower, upper] or multimodal intervals
36
"""
37
```
38
39
### Basic Usage
40
41
```python
42
import arviz as az
43
44
# Load example data
45
idata = az.load_arviz_data("centered_eight")
46
47
# Generate comprehensive summary
48
summary_df = az.summary(idata, var_names=["mu", "tau"])
49
print(summary_df)
50
51
# Compute HDI for specific variables
52
hdi_bounds = az.hdi(idata.posterior["mu"], hdi_prob=0.89)
53
print(f"89% HDI for mu: {hdi_bounds}")
54
```
55
56
## MCMC Convergence Diagnostics
57
58
### Core Diagnostics
59
60
```python { .api }
61
def rhat(data: InferenceData, *, var_names: list = None, method: str = "rank") -> xr.Dataset:
62
"""
63
Compute R-hat convergence diagnostic for MCMC chains.
64
65
Args:
66
data (InferenceData): MCMC inference data with multiple chains
67
var_names (list, optional): Variables to analyze
68
method (str): Method to use ("rank", "split", "folded", "identity")
69
70
Returns:
71
xr.Dataset: R-hat values for each variable (values near 1.0 indicate convergence)
72
"""
73
74
def ess(data: InferenceData, *, var_names: list = None, method: str = "bulk", relative: bool = False) -> xr.Dataset:
75
"""
76
Compute effective sample size for MCMC chains.
77
78
Args:
79
data (InferenceData): MCMC inference data
80
var_names (list, optional): Variables to analyze
81
method (str): ESS method ("bulk", "tail", "quantile", "mean", "sd", "median", "mad")
82
relative (bool): Whether to return relative ESS (ESS/total_samples)
83
84
Returns:
85
xr.Dataset: Effective sample size values for each variable
86
"""
87
88
def mcse(data: InferenceData, *, var_names: list = None, method: str = "mean", prob: float = None) -> xr.Dataset:
89
"""
90
Compute Monte Carlo standard error for MCMC estimates.
91
92
Args:
93
data (InferenceData): MCMC inference data
94
var_names (list, optional): Variables to analyze
95
method (str): Statistic method ("mean", "sd", "quantile")
96
prob (float, optional): Probability for quantile method
97
98
Returns:
99
xr.Dataset: MCSE values for each variable
100
"""
101
```
102
103
### Advanced Diagnostics
104
105
```python { .api }
106
def bfmi(data: InferenceData) -> float:
107
"""
108
Compute Bayesian Fraction of Missing Information.
109
Diagnostic for HMC/NUTS efficiency (values < 0.2 indicate problems).
110
111
Args:
112
data (InferenceData): MCMC inference data with energy information
113
114
Returns:
115
float: BFMI value (higher is better, > 0.2 recommended)
116
"""
117
118
def autocorr(data, *, var_names: list = None, coords: dict = None, max_lag: int = None, **kwargs) -> xr.Dataset:
119
"""
120
Compute autocorrelation function for MCMC chains.
121
122
Args:
123
data: MCMC samples (InferenceData or array-like)
124
var_names (list, optional): Variables to analyze
125
coords (dict, optional): Coordinate specifications
126
max_lag (int, optional): Maximum lag to compute
127
**kwargs: Additional autocorrelation parameters
128
129
Returns:
130
xr.Dataset: Autocorrelation values at different lags
131
"""
132
133
def autocov(data, *, var_names: list = None, coords: dict = None, max_lag: int = None, **kwargs) -> xr.Dataset:
134
"""
135
Compute autocovariance function for MCMC chains.
136
137
Args:
138
data: MCMC samples (InferenceData or array-like)
139
var_names (list, optional): Variables to analyze
140
coords (dict, optional): Coordinate specifications
141
max_lag (int, optional): Maximum lag to compute
142
**kwargs: Additional autocovariance parameters
143
144
Returns:
145
xr.Dataset: Autocovariance values at different lags
146
"""
147
148
def bayes_factor(data: InferenceData, *, var_name: str, ref_val: float = 0, prior=None, return_ref_vals: bool = False):
149
"""
150
Compute Bayes factor for point hypotheses.
151
152
Args:
153
data (InferenceData): Posterior samples
154
var_name (str): Variable name to test
155
ref_val (float): Reference value for hypothesis test (default 0)
156
prior: Prior distribution for Bayes factor calculation
157
return_ref_vals (bool): Whether to return reference values
158
159
Returns:
160
dict or float: Bayes factor results
161
"""
162
163
def psislw(log_weights, *, reff: float = 1.0, normalize: bool = True) -> tuple:
164
"""
165
Pareto smoothed importance sampling leave-one-out weights.
166
167
Args:
168
log_weights: Log importance weights
169
reff (float): Relative effective sample size (default 1.0)
170
normalize (bool): Whether to normalize weights (default True)
171
172
Returns:
173
tuple: (smoothed_log_weights, pareto_k_values)
174
"""
175
176
def kde(x, *, circular: bool = False, **kwargs) -> tuple:
177
"""
178
Kernel density estimation for continuous data.
179
180
Args:
181
x: Input data for density estimation
182
circular (bool): Whether data is circular (default False)
183
**kwargs: Additional KDE parameters (bandwidth, kernel type, etc.)
184
185
Returns:
186
tuple: (x_values, density_values) for plotting or further analysis
187
"""
188
189
def r2_samples(y_true, y_pred) -> xr.Dataset:
190
"""
191
Compute R-squared coefficient for posterior samples.
192
193
Args:
194
y_true: True/observed values
195
y_pred: Predicted values (posterior samples)
196
197
Returns:
198
xr.Dataset: R-squared values for each posterior sample
199
"""
200
201
def r2_score(y_true, y_pred) -> float:
202
"""
203
Compute R-squared coefficient for point estimates.
204
205
Args:
206
y_true: True/observed values
207
y_pred: Predicted point estimates
208
209
Returns:
210
float: R-squared coefficient
211
"""
212
213
def autocov(data: InferenceData, *, var_names: list = None, max_lag: int = None) -> xr.Dataset:
214
"""
215
Compute autocovariance function for MCMC chains.
216
217
Args:
218
data (InferenceData): MCMC inference data
219
var_names (list, optional): Variables to analyze
220
max_lag (int, optional): Maximum lag to compute
221
222
Returns:
223
xr.Dataset: Autocovariance values by lag for each variable
224
"""
225
```
226
227
### Usage Examples
228
229
```python
230
# Check convergence diagnostics
231
rhat_values = az.rhat(idata)
232
print("R-hat values:", rhat_values)
233
234
# Compute effective sample sizes
235
ess_bulk = az.ess(idata, method="bulk")
236
ess_tail = az.ess(idata, method="tail")
237
print("Bulk ESS:", ess_bulk)
238
print("Tail ESS:", ess_tail)
239
240
# Check MCMC efficiency
241
bfmi_value = az.bfmi(idata)
242
print(f"BFMI: {bfmi_value:.3f}")
243
244
# Analyze autocorrelation
245
autocorr_vals = az.autocorr(idata, var_names=["mu"])
246
```
247
248
## Model Comparison
249
250
### Information Criteria
251
252
```python { .api }
253
def compare(data_dict: dict, *, ic: str = "loo", method: str = "stacking", scale: str = "log") -> pd.DataFrame:
254
"""
255
Compare multiple models using information criteria and model weights.
256
257
Args:
258
data_dict (dict): Dictionary of model names to InferenceData objects
259
ic (str): Information criterion ("loo", "waic")
260
method (str): Weighting method ("stacking", "bb-pseudo-bma", "pseudo-bma")
261
scale (str): Scale for comparison ("log", "negative_log", "deviance")
262
263
Returns:
264
pd.DataFrame: Model comparison table with IC values, weights, and rankings
265
"""
266
267
def loo(data: InferenceData, *, pointwise: bool = False, var_name: str = None, reff: float = 1.0) -> ELPDData:
268
"""
269
Compute Leave-One-Out Cross-Validation using Pareto Smoothed Importance Sampling.
270
271
Args:
272
data (InferenceData): Inference data with log_likelihood group
273
pointwise (bool): Whether to return pointwise values (default False)
274
var_name (str, optional): Variable name in log_likelihood group
275
reff (float): Relative efficiency of MCMC chains (default 1.0)
276
277
Returns:
278
ELPDData: LOO results with ELPD estimate, standard error, and Pareto k diagnostics
279
"""
280
281
def waic(data: InferenceData, *, pointwise: bool = False, var_name: str = None, scale: str = "log") -> ELPDData:
282
"""
283
Compute Widely Applicable Information Criterion.
284
285
Args:
286
data (InferenceData): Inference data with log_likelihood group
287
pointwise (bool): Whether to return pointwise values (default False)
288
var_name (str, optional): Variable name in log_likelihood group
289
scale (str): Scale for computation ("log", "negative_log", "deviance")
290
291
Returns:
292
ELPDData: WAIC results with ELPD estimate and effective parameters
293
"""
294
```
295
296
### Specialized Comparison Methods
297
298
```python { .api }
299
def psislw(log_weights: np.ndarray, *, reff: float = 1.0, normalize: bool = True) -> tuple:
300
"""
301
Pareto Smoothed Importance Sampling with diagnostics.
302
303
Args:
304
log_weights (np.ndarray): Log importance weights
305
reff (float): Relative efficiency of weights (default 1.0)
306
normalize (bool): Whether to normalize weights (default True)
307
308
Returns:
309
tuple: (smoothed_weights, pareto_k_values) with diagnostics
310
"""
311
312
def bayes_factor(data: InferenceData, *, var_name: str = None, prior_odds: float = 1.0) -> float:
313
"""
314
Compute Bayes factor for model comparison.
315
316
Args:
317
data (InferenceData): Inference data
318
var_name (str, optional): Variable for comparison
319
prior_odds (float): Prior odds ratio (default 1.0)
320
321
Returns:
322
float: Bayes factor value
323
"""
324
```
325
326
### Usage Examples
327
328
```python
329
# Compare multiple models
330
model_dict = {
331
"model_1": idata1,
332
"model_2": idata2,
333
"model_3": idata3
334
}
335
336
# Perform model comparison
337
comparison_df = az.compare(model_dict, ic="loo", method="stacking")
338
print(comparison_df)
339
340
# Compute LOO for single model
341
loo_results = az.loo(idata1, pointwise=True)
342
print(f"LOO ELPD: {loo_results.elpd_loo:.2f} ± {loo_results.se:.2f}")
343
print(f"Problematic observations (k > 0.7): {(loo_results.pareto_k > 0.7).sum()}")
344
345
# Compute WAIC
346
waic_results = az.waic(idata1)
347
print(f"WAIC: {waic_results.waic:.2f}")
348
```
349
350
## Predictive Model Checking
351
352
```python { .api }
353
def loo_pit(data: InferenceData, *, y: str = None, y_hat: str = None) -> np.ndarray:
354
"""
355
Compute Leave-One-Out Probability Integral Transform values.
356
357
Args:
358
data (InferenceData): Inference data with posterior predictive samples
359
y (str, optional): Observed data variable name
360
y_hat (str, optional): Posterior predictive variable name
361
362
Returns:
363
np.ndarray: LOO-PIT values for model checking
364
"""
365
366
def weight_predictions(data: dict, *, weights: np.ndarray = None) -> InferenceData:
367
"""
368
Weight predictions from multiple models using model weights.
369
370
Args:
371
data (dict): Dictionary of model predictions
372
weights (np.ndarray, optional): Model weights (if None, uses equal weights)
373
374
Returns:
375
InferenceData: Weighted prediction ensemble
376
"""
377
```
378
379
## Kernel Density Estimation
380
381
```python { .api }
382
def kde(data, *, bw: str = "default", adaptive: bool = False, extend: bool = True, **kwargs):
383
"""
384
Compute kernel density estimation for continuous data.
385
386
Args:
387
data: Input data (array-like)
388
bw (str): Bandwidth selection method ("default", "scott", "silverman")
389
adaptive (bool): Whether to use adaptive bandwidth (default False)
390
extend (bool): Whether to extend domain beyond data range (default True)
391
**kwargs: Additional KDE parameters
392
393
Returns:
394
tuple: (x_values, density_values) for plotting and analysis
395
"""
396
```
397
398
## Regression Diagnostics
399
400
```python { .api }
401
def r2_score(y_true: np.ndarray, y_pred: np.ndarray) -> float:
402
"""
403
Compute R-squared coefficient of determination.
404
405
Args:
406
y_true (np.ndarray): True target values
407
y_pred (np.ndarray): Predicted values
408
409
Returns:
410
float: R-squared score (1.0 is perfect prediction)
411
"""
412
413
def r2_samples(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
414
"""
415
Compute sample-wise R-squared values for uncertainty quantification.
416
417
Args:
418
y_true (np.ndarray): True target values
419
y_pred (np.ndarray): Predicted values (samples x observations)
420
421
Returns:
422
np.ndarray: R-squared values for each sample
423
"""
424
```
425
426
## Advanced Statistical Methods
427
428
### Model Refitting and Sensitivity Analysis
429
430
```python { .api }
431
def reloo(data: InferenceData, *, loo_kwargs: dict = None, refit_kwargs: dict = None) -> ELPDData:
432
"""
433
Refit models for problematic observations in LOO-CV.
434
435
Automatically identifies observations with high Pareto k values
436
and refits the model excluding those observations for more
437
accurate LOO estimates.
438
439
Args:
440
data (InferenceData): Inference data with log_likelihood group
441
loo_kwargs (dict, optional): Arguments passed to az.loo()
442
refit_kwargs (dict, optional): Arguments for model refitting
443
444
Returns:
445
ELPDData: Improved LOO results with refitted estimates
446
"""
447
448
def psens(data: InferenceData, *, perturbation: float = 0.1, var_names: list = None) -> dict:
449
"""
450
Perform prior sensitivity analysis.
451
452
Evaluates how sensitive posterior estimates are to changes
453
in prior specifications by perturbing priors and measuring
454
the impact on posterior quantities.
455
456
Args:
457
data (InferenceData): Inference data with prior samples
458
perturbation (float): Size of prior perturbation (default 0.1)
459
var_names (list, optional): Variables to analyze
460
461
Returns:
462
dict: Sensitivity analysis results for each variable
463
"""
464
```
465
466
### Utility Functions
467
468
```python { .api }
469
def apply_test_function(data, func, *, var_names: list = None, **kwargs):
470
"""
471
Apply test function to inference data variables.
472
473
Applies custom test functions to posterior samples for
474
model checking, hypothesis testing, or custom diagnostics.
475
476
Args:
477
data: Inference data or array-like data
478
func: Test function to apply
479
var_names (list, optional): Variables to apply function to
480
**kwargs: Additional arguments passed to function
481
482
Returns:
483
Results of applying test function
484
"""
485
486
def make_ufunc(func, *, signature: str = None, **kwargs):
487
"""
488
Create universal function for broadcasting operations.
489
490
Converts regular functions into universal functions that
491
can operate on ArviZ data structures with proper broadcasting.
492
493
Args:
494
func: Function to convert to ufunc
495
signature (str, optional): Function signature for broadcasting
496
**kwargs: Additional ufunc creation parameters
497
498
Returns:
499
Universal function compatible with xarray operations
500
"""
501
502
def wrap_xarray_ufunc(func, *, dask: str = "forbidden", **kwargs):
503
"""
504
Wrap functions for xarray universal function operations.
505
506
Creates xarray-compatible wrapped functions that handle
507
coordinates, dimensions, and attributes properly.
508
509
Args:
510
func: Function to wrap for xarray
511
dask (str): Dask handling strategy ("forbidden", "allowed")
512
**kwargs: Additional wrapping parameters
513
514
Returns:
515
Wrapped function compatible with xarray apply_ufunc
516
"""
517
518
def smooth_data(data, *, method: str = "gaussian", **kwargs):
519
"""
520
Apply smoothing operations to data.
521
522
Provides various smoothing algorithms for data preprocessing
523
and noise reduction in statistical analysis.
524
525
Args:
526
data: Input data to smooth
527
method (str): Smoothing method ("gaussian", "median", "savgol")
528
**kwargs: Method-specific smoothing parameters
529
530
Returns:
531
Smoothed data with same structure as input
532
"""
533
```
534
535
### Usage Examples
536
537
```python
538
# Model refitting for problematic LOO observations
539
loo_initial = az.loo(idata)
540
problematic_k = (loo_initial.pareto_k > 0.7).sum()
541
print(f"Problematic observations: {problematic_k}")
542
543
if problematic_k > 0:
544
loo_refit = az.reloo(idata)
545
print(f"Improved LOO: {loo_refit.elpd_loo:.2f}")
546
547
# Prior sensitivity analysis
548
sensitivity = az.psens(idata, perturbation=0.2, var_names=["mu", "sigma"])
549
for var, result in sensitivity.items():
550
print(f"{var} sensitivity: {result['sensitivity_score']:.3f}")
551
552
# Apply custom test function
553
def custom_test(samples):
554
return np.mean(samples > 0) # Probability of being positive
555
556
prob_positive = az.apply_test_function(idata, custom_test, var_names=["mu"])
557
print(f"P(mu > 0): {prob_positive:.3f}")
558
559
# Create and use universal function
560
def custom_transform(x):
561
return np.log(1 + np.exp(x)) # Softplus transformation
562
563
softplus_ufunc = az.make_ufunc(custom_transform)
564
transformed_data = softplus_ufunc(idata.posterior["theta"])
565
```
566
567
## Data Types
568
569
```python { .api }
570
class ELPDData:
571
"""
572
Expected log pointwise predictive density data container.
573
574
Contains information criteria results from loo() and waic()
575
including pointwise values, diagnostics, and summary statistics.
576
577
Attributes:
578
elpd_loo (float): LOO expected log pointwise predictive density
579
elpd_waic (float): WAIC expected log pointwise predictive density
580
p_loo (float): Effective number of parameters (LOO)
581
p_waic (float): Effective number of parameters (WAIC)
582
se (float): Standard error of ELPD estimate
583
pareto_k (np.ndarray): Pareto k diagnostic values
584
waic (float): WAIC value
585
loo (float): LOO value
586
"""
587
```
588
589
### Usage Examples
590
591
```python
592
# Model checking with LOO-PIT
593
loo_pit_vals = az.loo_pit(idata, y="y_obs", y_hat="y_pred")
594
595
# Weight predictions from multiple models
596
weighted_preds = az.weight_predictions(prediction_dict, weights=model_weights)
597
598
# Compute KDE for posterior samples
599
x_vals, density = az.kde(idata.posterior["mu"].values.flatten())
600
601
# Regression diagnostics
602
r2 = az.r2_score(y_true, y_pred_mean)
603
r2_posterior = az.r2_samples(y_true, y_pred_samples)
604
```