Tessl Tile for pypi/arviz@0.22.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

configuration-management.md data-operations.md framework-integrations.md index.md performance-utilities.md statistical-analysis.md visualization-plotting.md

statistical-analysis.mddocs/

0
# Statistical Analysis and Diagnostics
1

2
MCMC diagnostics, model comparison, and statistical functions for Bayesian analysis. Compute convergence diagnostics, effective sample sizes, information criteria, and perform Bayesian model comparison with advanced techniques like PSIS-LOO.
3

4
## Summary Statistics
5

6
```python { .api }
7
def summary(data: InferenceData, *, var_names: list = None, hdi_prob: float = 0.94, stat_focus: str = "mean", extend: bool = True, **kwargs) -> pd.DataFrame:
8
    """
9
    Create comprehensive summary statistics table for inference data.
10
    
11
    Args:
12
        data (InferenceData): Bayesian inference data
13
        var_names (list, optional): Variables to include in summary
14
        hdi_prob (float): Probability for highest density interval (default 0.94)
15
        stat_focus (str): Focus statistic ("mean", "median", "mode")
16
        extend (bool): Whether to include extended diagnostics
17
        **kwargs: Additional summary parameters
18
    
19
    Returns:
20
        pd.DataFrame: Summary statistics table with columns for mean, std, HDI bounds, diagnostics
21
    """
22

23
def hdi(data, hdi_prob: float = 0.94, *, circular: bool = False, multimodal: bool = False, **kwargs):
24
    """
25
    Compute highest density interval for posterior samples.
26
    
27
    Args:
28
        data: Posterior samples (array-like or InferenceData)
29
        hdi_prob (float): Probability mass for HDI (default 0.94)
30
        circular (bool): Whether data is circular (default False)
31
        multimodal (bool): Whether to detect multiple modes (default False)
32
        **kwargs: Additional HDI parameters
33
    
34
    Returns:
35
        array: HDI bounds [lower, upper] or multimodal intervals
36
    """
37
```
38

39
### Basic Usage
40

41
```python
42
import arviz as az
43

44
# Load example data
45
idata = az.load_arviz_data("centered_eight")
46

47
# Generate comprehensive summary
48
summary_df = az.summary(idata, var_names=["mu", "tau"])
49
print(summary_df)
50

51
# Compute HDI for specific variables
52
hdi_bounds = az.hdi(idata.posterior["mu"], hdi_prob=0.89)
53
print(f"89% HDI for mu: {hdi_bounds}")
54
```
55

56
## MCMC Convergence Diagnostics
57

58
### Core Diagnostics
59

60
```python { .api }
61
def rhat(data: InferenceData, *, var_names: list = None, method: str = "rank") -> xr.Dataset:
62
    """
63
    Compute R-hat convergence diagnostic for MCMC chains.
64
    
65
    Args:
66
        data (InferenceData): MCMC inference data with multiple chains
67
        var_names (list, optional): Variables to analyze
68
        method (str): Method to use ("rank", "split", "folded", "identity")
69
    
70
    Returns:
71
        xr.Dataset: R-hat values for each variable (values near 1.0 indicate convergence)
72
    """
73

74
def ess(data: InferenceData, *, var_names: list = None, method: str = "bulk", relative: bool = False) -> xr.Dataset:
75
    """
76
    Compute effective sample size for MCMC chains.
77
    
78
    Args:
79
        data (InferenceData): MCMC inference data
80
        var_names (list, optional): Variables to analyze
81
        method (str): ESS method ("bulk", "tail", "quantile", "mean", "sd", "median", "mad")
82
        relative (bool): Whether to return relative ESS (ESS/total_samples)
83
    
84
    Returns:
85
        xr.Dataset: Effective sample size values for each variable
86
    """
87

88
def mcse(data: InferenceData, *, var_names: list = None, method: str = "mean", prob: float = None) -> xr.Dataset:
89
    """
90
    Compute Monte Carlo standard error for MCMC estimates.
91
    
92
    Args:
93
        data (InferenceData): MCMC inference data
94
        var_names (list, optional): Variables to analyze
95
        method (str): Statistic method ("mean", "sd", "quantile")
96
        prob (float, optional): Probability for quantile method
97
    
98
    Returns:
99
        xr.Dataset: MCSE values for each variable
100
    """
101
```
102

103
### Advanced Diagnostics
104

105
```python { .api }
106
def bfmi(data: InferenceData) -> float:
107
    """
108
    Compute Bayesian Fraction of Missing Information.
109
    Diagnostic for HMC/NUTS efficiency (values < 0.2 indicate problems).
110
    
111
    Args:
112
        data (InferenceData): MCMC inference data with energy information
113
    
114
    Returns:
115
        float: BFMI value (higher is better, > 0.2 recommended)
116
    """
117

118
def autocorr(data, *, var_names: list = None, coords: dict = None, max_lag: int = None, **kwargs) -> xr.Dataset:
119
    """
120
    Compute autocorrelation function for MCMC chains.
121
    
122
    Args:
123
        data: MCMC samples (InferenceData or array-like)
124
        var_names (list, optional): Variables to analyze
125
        coords (dict, optional): Coordinate specifications
126
        max_lag (int, optional): Maximum lag to compute
127
        **kwargs: Additional autocorrelation parameters
128
    
129
    Returns:
130
        xr.Dataset: Autocorrelation values at different lags
131
    """
132

133
def autocov(data, *, var_names: list = None, coords: dict = None, max_lag: int = None, **kwargs) -> xr.Dataset:
134
    """
135
    Compute autocovariance function for MCMC chains.
136
    
137
    Args:
138
        data: MCMC samples (InferenceData or array-like)
139
        var_names (list, optional): Variables to analyze
140
        coords (dict, optional): Coordinate specifications
141
        max_lag (int, optional): Maximum lag to compute
142
        **kwargs: Additional autocovariance parameters
143
    
144
    Returns:
145
        xr.Dataset: Autocovariance values at different lags
146
    """
147

148
def bayes_factor(data: InferenceData, *, var_name: str, ref_val: float = 0, prior=None, return_ref_vals: bool = False):
149
    """
150
    Compute Bayes factor for point hypotheses.
151
    
152
    Args:
153
        data (InferenceData): Posterior samples
154
        var_name (str): Variable name to test
155
        ref_val (float): Reference value for hypothesis test (default 0)
156
        prior: Prior distribution for Bayes factor calculation
157
        return_ref_vals (bool): Whether to return reference values
158
    
159
    Returns:
160
        dict or float: Bayes factor results
161
    """
162

163
def psislw(log_weights, *, reff: float = 1.0, normalize: bool = True) -> tuple:
164
    """
165
    Pareto smoothed importance sampling leave-one-out weights.
166
    
167
    Args:
168
        log_weights: Log importance weights
169
        reff (float): Relative effective sample size (default 1.0)
170
        normalize (bool): Whether to normalize weights (default True)
171
    
172
    Returns:
173
        tuple: (smoothed_log_weights, pareto_k_values)
174
    """
175

176
def kde(x, *, circular: bool = False, **kwargs) -> tuple:
177
    """
178
    Kernel density estimation for continuous data.
179
    
180
    Args:
181
        x: Input data for density estimation
182
        circular (bool): Whether data is circular (default False)
183
        **kwargs: Additional KDE parameters (bandwidth, kernel type, etc.)
184
    
185
    Returns:
186
        tuple: (x_values, density_values) for plotting or further analysis
187
    """
188

189
def r2_samples(y_true, y_pred) -> xr.Dataset:
190
    """
191
    Compute R-squared coefficient for posterior samples.
192
    
193
    Args:
194
        y_true: True/observed values
195
        y_pred: Predicted values (posterior samples)
196
    
197
    Returns:
198
        xr.Dataset: R-squared values for each posterior sample
199
    """
200

201
def r2_score(y_true, y_pred) -> float:
202
    """
203
    Compute R-squared coefficient for point estimates.
204
    
205
    Args:
206
        y_true: True/observed values  
207
        y_pred: Predicted point estimates
208
    
209
    Returns:
210
        float: R-squared coefficient
211
    """
212

213
def autocov(data: InferenceData, *, var_names: list = None, max_lag: int = None) -> xr.Dataset:
214
    """
215
    Compute autocovariance function for MCMC chains.
216
    
217
    Args:
218
        data (InferenceData): MCMC inference data
219
        var_names (list, optional): Variables to analyze
220
        max_lag (int, optional): Maximum lag to compute
221
    
222
    Returns:
223
        xr.Dataset: Autocovariance values by lag for each variable
224
    """
225
```
226

227
### Usage Examples
228

229
```python
230
# Check convergence diagnostics
231
rhat_values = az.rhat(idata)
232
print("R-hat values:", rhat_values)
233

234
# Compute effective sample sizes
235
ess_bulk = az.ess(idata, method="bulk")
236
ess_tail = az.ess(idata, method="tail")
237
print("Bulk ESS:", ess_bulk)
238
print("Tail ESS:", ess_tail)
239

240
# Check MCMC efficiency
241
bfmi_value = az.bfmi(idata)
242
print(f"BFMI: {bfmi_value:.3f}")
243

244
# Analyze autocorrelation
245
autocorr_vals = az.autocorr(idata, var_names=["mu"])
246
```
247

248
## Model Comparison
249

250
### Information Criteria
251

252
```python { .api }
253
def compare(data_dict: dict, *, ic: str = "loo", method: str = "stacking", scale: str = "log") -> pd.DataFrame:
254
    """
255
    Compare multiple models using information criteria and model weights.
256
    
257
    Args:
258
        data_dict (dict): Dictionary of model names to InferenceData objects
259
        ic (str): Information criterion ("loo", "waic")
260
        method (str): Weighting method ("stacking", "bb-pseudo-bma", "pseudo-bma")
261
        scale (str): Scale for comparison ("log", "negative_log", "deviance")
262
    
263
    Returns:
264
        pd.DataFrame: Model comparison table with IC values, weights, and rankings
265
    """
266

267
def loo(data: InferenceData, *, pointwise: bool = False, var_name: str = None, reff: float = 1.0) -> ELPDData:
268
    """
269
    Compute Leave-One-Out Cross-Validation using Pareto Smoothed Importance Sampling.
270
    
271
    Args:
272
        data (InferenceData): Inference data with log_likelihood group
273
        pointwise (bool): Whether to return pointwise values (default False)
274
        var_name (str, optional): Variable name in log_likelihood group
275
        reff (float): Relative efficiency of MCMC chains (default 1.0)
276
    
277
    Returns:
278
        ELPDData: LOO results with ELPD estimate, standard error, and Pareto k diagnostics
279
    """
280

281
def waic(data: InferenceData, *, pointwise: bool = False, var_name: str = None, scale: str = "log") -> ELPDData:
282
    """
283
    Compute Widely Applicable Information Criterion.
284
    
285
    Args:
286
        data (InferenceData): Inference data with log_likelihood group
287
        pointwise (bool): Whether to return pointwise values (default False)
288
        var_name (str, optional): Variable name in log_likelihood group
289
        scale (str): Scale for computation ("log", "negative_log", "deviance")
290
    
291
    Returns:
292
        ELPDData: WAIC results with ELPD estimate and effective parameters
293
    """
294
```
295

296
### Specialized Comparison Methods
297

298
```python { .api }
299
def psislw(log_weights: np.ndarray, *, reff: float = 1.0, normalize: bool = True) -> tuple:
300
    """
301
    Pareto Smoothed Importance Sampling with diagnostics.
302
    
303
    Args:
304
        log_weights (np.ndarray): Log importance weights
305
        reff (float): Relative efficiency of weights (default 1.0)
306
        normalize (bool): Whether to normalize weights (default True)
307
    
308
    Returns:
309
        tuple: (smoothed_weights, pareto_k_values) with diagnostics
310
    """
311

312
def bayes_factor(data: InferenceData, *, var_name: str = None, prior_odds: float = 1.0) -> float:
313
    """
314
    Compute Bayes factor for model comparison.
315
    
316
    Args:
317
        data (InferenceData): Inference data
318
        var_name (str, optional): Variable for comparison
319
        prior_odds (float): Prior odds ratio (default 1.0)
320
    
321
    Returns:
322
        float: Bayes factor value
323
    """
324
```
325

326
### Usage Examples
327

328
```python
329
# Compare multiple models
330
model_dict = {
331
    "model_1": idata1,
332
    "model_2": idata2, 
333
    "model_3": idata3
334
}
335

336
# Perform model comparison
337
comparison_df = az.compare(model_dict, ic="loo", method="stacking")
338
print(comparison_df)
339

340
# Compute LOO for single model
341
loo_results = az.loo(idata1, pointwise=True)
342
print(f"LOO ELPD: {loo_results.elpd_loo:.2f} ± {loo_results.se:.2f}")
343
print(f"Problematic observations (k > 0.7): {(loo_results.pareto_k > 0.7).sum()}")
344

345
# Compute WAIC
346
waic_results = az.waic(idata1)
347
print(f"WAIC: {waic_results.waic:.2f}")
348
```
349

350
## Predictive Model Checking
351

352
```python { .api }
353
def loo_pit(data: InferenceData, *, y: str = None, y_hat: str = None) -> np.ndarray:
354
    """
355
    Compute Leave-One-Out Probability Integral Transform values.
356
    
357
    Args:
358
        data (InferenceData): Inference data with posterior predictive samples
359
        y (str, optional): Observed data variable name
360
        y_hat (str, optional): Posterior predictive variable name
361
    
362
    Returns:
363
        np.ndarray: LOO-PIT values for model checking
364
    """
365

366
def weight_predictions(data: dict, *, weights: np.ndarray = None) -> InferenceData:
367
    """
368
    Weight predictions from multiple models using model weights.
369
    
370
    Args:
371
        data (dict): Dictionary of model predictions
372
        weights (np.ndarray, optional): Model weights (if None, uses equal weights)
373
    
374
    Returns:
375
        InferenceData: Weighted prediction ensemble
376
    """
377
```
378

379
## Kernel Density Estimation
380

381
```python { .api }
382
def kde(data, *, bw: str = "default", adaptive: bool = False, extend: bool = True, **kwargs):
383
    """
384
    Compute kernel density estimation for continuous data.
385
    
386
    Args:
387
        data: Input data (array-like)
388
        bw (str): Bandwidth selection method ("default", "scott", "silverman")
389
        adaptive (bool): Whether to use adaptive bandwidth (default False)
390
        extend (bool): Whether to extend domain beyond data range (default True)
391
        **kwargs: Additional KDE parameters
392
    
393
    Returns:
394
        tuple: (x_values, density_values) for plotting and analysis
395
    """
396
```
397

398
## Regression Diagnostics
399

400
```python { .api }
401
def r2_score(y_true: np.ndarray, y_pred: np.ndarray) -> float:
402
    """
403
    Compute R-squared coefficient of determination.
404
    
405
    Args:
406
        y_true (np.ndarray): True target values
407
        y_pred (np.ndarray): Predicted values
408
    
409
    Returns:
410
        float: R-squared score (1.0 is perfect prediction)
411
    """
412

413
def r2_samples(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
414
    """
415
    Compute sample-wise R-squared values for uncertainty quantification.
416
    
417
    Args:
418
        y_true (np.ndarray): True target values
419
        y_pred (np.ndarray): Predicted values (samples x observations)
420
    
421
    Returns:
422
        np.ndarray: R-squared values for each sample
423
    """
424
```
425

426
## Advanced Statistical Methods
427

428
### Model Refitting and Sensitivity Analysis
429

430
```python { .api }
431
def reloo(data: InferenceData, *, loo_kwargs: dict = None, refit_kwargs: dict = None) -> ELPDData:
432
    """
433
    Refit models for problematic observations in LOO-CV.
434
    
435
    Automatically identifies observations with high Pareto k values
436
    and refits the model excluding those observations for more
437
    accurate LOO estimates.
438
    
439
    Args:
440
        data (InferenceData): Inference data with log_likelihood group
441
        loo_kwargs (dict, optional): Arguments passed to az.loo()
442
        refit_kwargs (dict, optional): Arguments for model refitting
443
    
444
    Returns:
445
        ELPDData: Improved LOO results with refitted estimates
446
    """
447

448
def psens(data: InferenceData, *, perturbation: float = 0.1, var_names: list = None) -> dict:
449
    """
450
    Perform prior sensitivity analysis.
451
    
452
    Evaluates how sensitive posterior estimates are to changes
453
    in prior specifications by perturbing priors and measuring
454
    the impact on posterior quantities.
455
    
456
    Args:
457
        data (InferenceData): Inference data with prior samples
458
        perturbation (float): Size of prior perturbation (default 0.1)
459
        var_names (list, optional): Variables to analyze
460
    
461
    Returns:
462
        dict: Sensitivity analysis results for each variable
463
    """
464
```
465

466
### Utility Functions
467

468
```python { .api }
469
def apply_test_function(data, func, *, var_names: list = None, **kwargs):
470
    """
471
    Apply test function to inference data variables.
472
    
473
    Applies custom test functions to posterior samples for
474
    model checking, hypothesis testing, or custom diagnostics.
475
    
476
    Args:
477
        data: Inference data or array-like data
478
        func: Test function to apply
479
        var_names (list, optional): Variables to apply function to
480
        **kwargs: Additional arguments passed to function
481
    
482
    Returns:
483
        Results of applying test function
484
    """
485

486
def make_ufunc(func, *, signature: str = None, **kwargs):
487
    """
488
    Create universal function for broadcasting operations.
489
    
490
    Converts regular functions into universal functions that
491
    can operate on ArviZ data structures with proper broadcasting.
492
    
493
    Args:
494
        func: Function to convert to ufunc
495
        signature (str, optional): Function signature for broadcasting
496
        **kwargs: Additional ufunc creation parameters
497
    
498
    Returns:
499
        Universal function compatible with xarray operations
500
    """
501

502
def wrap_xarray_ufunc(func, *, dask: str = "forbidden", **kwargs):
503
    """
504
    Wrap functions for xarray universal function operations.
505
    
506
    Creates xarray-compatible wrapped functions that handle
507
    coordinates, dimensions, and attributes properly.
508
    
509
    Args:
510
        func: Function to wrap for xarray
511
        dask (str): Dask handling strategy ("forbidden", "allowed")
512
        **kwargs: Additional wrapping parameters
513
    
514
    Returns:
515
        Wrapped function compatible with xarray apply_ufunc
516
    """
517

518
def smooth_data(data, *, method: str = "gaussian", **kwargs):
519
    """
520
    Apply smoothing operations to data.
521
    
522
    Provides various smoothing algorithms for data preprocessing
523
    and noise reduction in statistical analysis.
524
    
525
    Args:
526
        data: Input data to smooth
527
        method (str): Smoothing method ("gaussian", "median", "savgol")
528
        **kwargs: Method-specific smoothing parameters
529
    
530
    Returns:
531
        Smoothed data with same structure as input
532
    """
533
```
534

535
### Usage Examples
536

537
```python
538
# Model refitting for problematic LOO observations
539
loo_initial = az.loo(idata)
540
problematic_k = (loo_initial.pareto_k > 0.7).sum()
541
print(f"Problematic observations: {problematic_k}")
542

543
if problematic_k > 0:
544
    loo_refit = az.reloo(idata)
545
    print(f"Improved LOO: {loo_refit.elpd_loo:.2f}")
546

547
# Prior sensitivity analysis
548
sensitivity = az.psens(idata, perturbation=0.2, var_names=["mu", "sigma"])
549
for var, result in sensitivity.items():
550
    print(f"{var} sensitivity: {result['sensitivity_score']:.3f}")
551

552
# Apply custom test function
553
def custom_test(samples):
554
    return np.mean(samples > 0)  # Probability of being positive
555

556
prob_positive = az.apply_test_function(idata, custom_test, var_names=["mu"])
557
print(f"P(mu > 0): {prob_positive:.3f}")
558

559
# Create and use universal function
560
def custom_transform(x):
561
    return np.log(1 + np.exp(x))  # Softplus transformation
562

563
softplus_ufunc = az.make_ufunc(custom_transform)
564
transformed_data = softplus_ufunc(idata.posterior["theta"])
565
```
566

567
## Data Types
568

569
```python { .api }
570
class ELPDData:
571
    """
572
    Expected log pointwise predictive density data container.
573
    
574
    Contains information criteria results from loo() and waic()
575
    including pointwise values, diagnostics, and summary statistics.
576
    
577
    Attributes:
578
        elpd_loo (float): LOO expected log pointwise predictive density
579
        elpd_waic (float): WAIC expected log pointwise predictive density  
580
        p_loo (float): Effective number of parameters (LOO)
581
        p_waic (float): Effective number of parameters (WAIC)
582
        se (float): Standard error of ELPD estimate
583
        pareto_k (np.ndarray): Pareto k diagnostic values
584
        waic (float): WAIC value
585
        loo (float): LOO value
586
    """
587
```
588

589
### Usage Examples
590

591
```python
592
# Model checking with LOO-PIT
593
loo_pit_vals = az.loo_pit(idata, y="y_obs", y_hat="y_pred")
594

595
# Weight predictions from multiple models
596
weighted_preds = az.weight_predictions(prediction_dict, weights=model_weights)
597

598
# Compute KDE for posterior samples
599
x_vals, density = az.kde(idata.posterior["mu"].values.flatten())
600

601
# Regression diagnostics
602
r2 = az.r2_score(y_true, y_pred_mean)
603
r2_posterior = az.r2_samples(y_true, y_pred_samples)
604
```

Version

Tile

Files

statistical-analysis.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

statistical-analysis.mddocs/