or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration-management.mddata-operations.mdframework-integrations.mdindex.mdperformance-utilities.mdstatistical-analysis.mdvisualization-plotting.md

statistical-analysis.mddocs/

0

# Statistical Analysis and Diagnostics

1

2

MCMC diagnostics, model comparison, and statistical functions for Bayesian analysis. Compute convergence diagnostics, effective sample sizes, information criteria, and perform Bayesian model comparison with advanced techniques like PSIS-LOO.

3

4

## Summary Statistics

5

6

```python { .api }

7

def summary(data: InferenceData, *, var_names: list = None, hdi_prob: float = 0.94, stat_focus: str = "mean", extend: bool = True, **kwargs) -> pd.DataFrame:

8

"""

9

Create comprehensive summary statistics table for inference data.

10

11

Args:

12

data (InferenceData): Bayesian inference data

13

var_names (list, optional): Variables to include in summary

14

hdi_prob (float): Probability for highest density interval (default 0.94)

15

stat_focus (str): Focus statistic ("mean", "median", "mode")

16

extend (bool): Whether to include extended diagnostics

17

**kwargs: Additional summary parameters

18

19

Returns:

20

pd.DataFrame: Summary statistics table with columns for mean, std, HDI bounds, diagnostics

21

"""

22

23

def hdi(data, hdi_prob: float = 0.94, *, circular: bool = False, multimodal: bool = False, **kwargs):

24

"""

25

Compute highest density interval for posterior samples.

26

27

Args:

28

data: Posterior samples (array-like or InferenceData)

29

hdi_prob (float): Probability mass for HDI (default 0.94)

30

circular (bool): Whether data is circular (default False)

31

multimodal (bool): Whether to detect multiple modes (default False)

32

**kwargs: Additional HDI parameters

33

34

Returns:

35

array: HDI bounds [lower, upper] or multimodal intervals

36

"""

37

```

38

39

### Basic Usage

40

41

```python

42

import arviz as az

43

44

# Load example data

45

idata = az.load_arviz_data("centered_eight")

46

47

# Generate comprehensive summary

48

summary_df = az.summary(idata, var_names=["mu", "tau"])

49

print(summary_df)

50

51

# Compute HDI for specific variables

52

hdi_bounds = az.hdi(idata.posterior["mu"], hdi_prob=0.89)

53

print(f"89% HDI for mu: {hdi_bounds}")

54

```

55

56

## MCMC Convergence Diagnostics

57

58

### Core Diagnostics

59

60

```python { .api }

61

def rhat(data: InferenceData, *, var_names: list = None, method: str = "rank") -> xr.Dataset:

62

"""

63

Compute R-hat convergence diagnostic for MCMC chains.

64

65

Args:

66

data (InferenceData): MCMC inference data with multiple chains

67

var_names (list, optional): Variables to analyze

68

method (str): Method to use ("rank", "split", "folded", "identity")

69

70

Returns:

71

xr.Dataset: R-hat values for each variable (values near 1.0 indicate convergence)

72

"""

73

74

def ess(data: InferenceData, *, var_names: list = None, method: str = "bulk", relative: bool = False) -> xr.Dataset:

75

"""

76

Compute effective sample size for MCMC chains.

77

78

Args:

79

data (InferenceData): MCMC inference data

80

var_names (list, optional): Variables to analyze

81

method (str): ESS method ("bulk", "tail", "quantile", "mean", "sd", "median", "mad")

82

relative (bool): Whether to return relative ESS (ESS/total_samples)

83

84

Returns:

85

xr.Dataset: Effective sample size values for each variable

86

"""

87

88

def mcse(data: InferenceData, *, var_names: list = None, method: str = "mean", prob: float = None) -> xr.Dataset:

89

"""

90

Compute Monte Carlo standard error for MCMC estimates.

91

92

Args:

93

data (InferenceData): MCMC inference data

94

var_names (list, optional): Variables to analyze

95

method (str): Statistic method ("mean", "sd", "quantile")

96

prob (float, optional): Probability for quantile method

97

98

Returns:

99

xr.Dataset: MCSE values for each variable

100

"""

101

```

102

103

### Advanced Diagnostics

104

105

```python { .api }

106

def bfmi(data: InferenceData) -> float:

107

"""

108

Compute Bayesian Fraction of Missing Information.

109

Diagnostic for HMC/NUTS efficiency (values < 0.2 indicate problems).

110

111

Args:

112

data (InferenceData): MCMC inference data with energy information

113

114

Returns:

115

float: BFMI value (higher is better, > 0.2 recommended)

116

"""

117

118

def autocorr(data, *, var_names: list = None, coords: dict = None, max_lag: int = None, **kwargs) -> xr.Dataset:

119

"""

120

Compute autocorrelation function for MCMC chains.

121

122

Args:

123

data: MCMC samples (InferenceData or array-like)

124

var_names (list, optional): Variables to analyze

125

coords (dict, optional): Coordinate specifications

126

max_lag (int, optional): Maximum lag to compute

127

**kwargs: Additional autocorrelation parameters

128

129

Returns:

130

xr.Dataset: Autocorrelation values at different lags

131

"""

132

133

def autocov(data, *, var_names: list = None, coords: dict = None, max_lag: int = None, **kwargs) -> xr.Dataset:

134

"""

135

Compute autocovariance function for MCMC chains.

136

137

Args:

138

data: MCMC samples (InferenceData or array-like)

139

var_names (list, optional): Variables to analyze

140

coords (dict, optional): Coordinate specifications

141

max_lag (int, optional): Maximum lag to compute

142

**kwargs: Additional autocovariance parameters

143

144

Returns:

145

xr.Dataset: Autocovariance values at different lags

146

"""

147

148

def bayes_factor(data: InferenceData, *, var_name: str, ref_val: float = 0, prior=None, return_ref_vals: bool = False):

149

"""

150

Compute Bayes factor for point hypotheses.

151

152

Args:

153

data (InferenceData): Posterior samples

154

var_name (str): Variable name to test

155

ref_val (float): Reference value for hypothesis test (default 0)

156

prior: Prior distribution for Bayes factor calculation

157

return_ref_vals (bool): Whether to return reference values

158

159

Returns:

160

dict or float: Bayes factor results

161

"""

162

163

def psislw(log_weights, *, reff: float = 1.0, normalize: bool = True) -> tuple:

164

"""

165

Pareto smoothed importance sampling leave-one-out weights.

166

167

Args:

168

log_weights: Log importance weights

169

reff (float): Relative effective sample size (default 1.0)

170

normalize (bool): Whether to normalize weights (default True)

171

172

Returns:

173

tuple: (smoothed_log_weights, pareto_k_values)

174

"""

175

176

def kde(x, *, circular: bool = False, **kwargs) -> tuple:

177

"""

178

Kernel density estimation for continuous data.

179

180

Args:

181

x: Input data for density estimation

182

circular (bool): Whether data is circular (default False)

183

**kwargs: Additional KDE parameters (bandwidth, kernel type, etc.)

184

185

Returns:

186

tuple: (x_values, density_values) for plotting or further analysis

187

"""

188

189

def r2_samples(y_true, y_pred) -> xr.Dataset:

190

"""

191

Compute R-squared coefficient for posterior samples.

192

193

Args:

194

y_true: True/observed values

195

y_pred: Predicted values (posterior samples)

196

197

Returns:

198

xr.Dataset: R-squared values for each posterior sample

199

"""

200

201

def r2_score(y_true, y_pred) -> float:

202

"""

203

Compute R-squared coefficient for point estimates.

204

205

Args:

206

y_true: True/observed values

207

y_pred: Predicted point estimates

208

209

Returns:

210

float: R-squared coefficient

211

"""

212

213

def autocov(data: InferenceData, *, var_names: list = None, max_lag: int = None) -> xr.Dataset:

214

"""

215

Compute autocovariance function for MCMC chains.

216

217

Args:

218

data (InferenceData): MCMC inference data

219

var_names (list, optional): Variables to analyze

220

max_lag (int, optional): Maximum lag to compute

221

222

Returns:

223

xr.Dataset: Autocovariance values by lag for each variable

224

"""

225

```

226

227

### Usage Examples

228

229

```python

230

# Check convergence diagnostics

231

rhat_values = az.rhat(idata)

232

print("R-hat values:", rhat_values)

233

234

# Compute effective sample sizes

235

ess_bulk = az.ess(idata, method="bulk")

236

ess_tail = az.ess(idata, method="tail")

237

print("Bulk ESS:", ess_bulk)

238

print("Tail ESS:", ess_tail)

239

240

# Check MCMC efficiency

241

bfmi_value = az.bfmi(idata)

242

print(f"BFMI: {bfmi_value:.3f}")

243

244

# Analyze autocorrelation

245

autocorr_vals = az.autocorr(idata, var_names=["mu"])

246

```

247

248

## Model Comparison

249

250

### Information Criteria

251

252

```python { .api }

253

def compare(data_dict: dict, *, ic: str = "loo", method: str = "stacking", scale: str = "log") -> pd.DataFrame:

254

"""

255

Compare multiple models using information criteria and model weights.

256

257

Args:

258

data_dict (dict): Dictionary of model names to InferenceData objects

259

ic (str): Information criterion ("loo", "waic")

260

method (str): Weighting method ("stacking", "bb-pseudo-bma", "pseudo-bma")

261

scale (str): Scale for comparison ("log", "negative_log", "deviance")

262

263

Returns:

264

pd.DataFrame: Model comparison table with IC values, weights, and rankings

265

"""

266

267

def loo(data: InferenceData, *, pointwise: bool = False, var_name: str = None, reff: float = 1.0) -> ELPDData:

268

"""

269

Compute Leave-One-Out Cross-Validation using Pareto Smoothed Importance Sampling.

270

271

Args:

272

data (InferenceData): Inference data with log_likelihood group

273

pointwise (bool): Whether to return pointwise values (default False)

274

var_name (str, optional): Variable name in log_likelihood group

275

reff (float): Relative efficiency of MCMC chains (default 1.0)

276

277

Returns:

278

ELPDData: LOO results with ELPD estimate, standard error, and Pareto k diagnostics

279

"""

280

281

def waic(data: InferenceData, *, pointwise: bool = False, var_name: str = None, scale: str = "log") -> ELPDData:

282

"""

283

Compute Widely Applicable Information Criterion.

284

285

Args:

286

data (InferenceData): Inference data with log_likelihood group

287

pointwise (bool): Whether to return pointwise values (default False)

288

var_name (str, optional): Variable name in log_likelihood group

289

scale (str): Scale for computation ("log", "negative_log", "deviance")

290

291

Returns:

292

ELPDData: WAIC results with ELPD estimate and effective parameters

293

"""

294

```

295

296

### Specialized Comparison Methods

297

298

```python { .api }

299

def psislw(log_weights: np.ndarray, *, reff: float = 1.0, normalize: bool = True) -> tuple:

300

"""

301

Pareto Smoothed Importance Sampling with diagnostics.

302

303

Args:

304

log_weights (np.ndarray): Log importance weights

305

reff (float): Relative efficiency of weights (default 1.0)

306

normalize (bool): Whether to normalize weights (default True)

307

308

Returns:

309

tuple: (smoothed_weights, pareto_k_values) with diagnostics

310

"""

311

312

def bayes_factor(data: InferenceData, *, var_name: str = None, prior_odds: float = 1.0) -> float:

313

"""

314

Compute Bayes factor for model comparison.

315

316

Args:

317

data (InferenceData): Inference data

318

var_name (str, optional): Variable for comparison

319

prior_odds (float): Prior odds ratio (default 1.0)

320

321

Returns:

322

float: Bayes factor value

323

"""

324

```

325

326

### Usage Examples

327

328

```python

329

# Compare multiple models

330

model_dict = {

331

"model_1": idata1,

332

"model_2": idata2,

333

"model_3": idata3

334

}

335

336

# Perform model comparison

337

comparison_df = az.compare(model_dict, ic="loo", method="stacking")

338

print(comparison_df)

339

340

# Compute LOO for single model

341

loo_results = az.loo(idata1, pointwise=True)

342

print(f"LOO ELPD: {loo_results.elpd_loo:.2f} ± {loo_results.se:.2f}")

343

print(f"Problematic observations (k > 0.7): {(loo_results.pareto_k > 0.7).sum()}")

344

345

# Compute WAIC

346

waic_results = az.waic(idata1)

347

print(f"WAIC: {waic_results.waic:.2f}")

348

```

349

350

## Predictive Model Checking

351

352

```python { .api }

353

def loo_pit(data: InferenceData, *, y: str = None, y_hat: str = None) -> np.ndarray:

354

"""

355

Compute Leave-One-Out Probability Integral Transform values.

356

357

Args:

358

data (InferenceData): Inference data with posterior predictive samples

359

y (str, optional): Observed data variable name

360

y_hat (str, optional): Posterior predictive variable name

361

362

Returns:

363

np.ndarray: LOO-PIT values for model checking

364

"""

365

366

def weight_predictions(data: dict, *, weights: np.ndarray = None) -> InferenceData:

367

"""

368

Weight predictions from multiple models using model weights.

369

370

Args:

371

data (dict): Dictionary of model predictions

372

weights (np.ndarray, optional): Model weights (if None, uses equal weights)

373

374

Returns:

375

InferenceData: Weighted prediction ensemble

376

"""

377

```

378

379

## Kernel Density Estimation

380

381

```python { .api }

382

def kde(data, *, bw: str = "default", adaptive: bool = False, extend: bool = True, **kwargs):

383

"""

384

Compute kernel density estimation for continuous data.

385

386

Args:

387

data: Input data (array-like)

388

bw (str): Bandwidth selection method ("default", "scott", "silverman")

389

adaptive (bool): Whether to use adaptive bandwidth (default False)

390

extend (bool): Whether to extend domain beyond data range (default True)

391

**kwargs: Additional KDE parameters

392

393

Returns:

394

tuple: (x_values, density_values) for plotting and analysis

395

"""

396

```

397

398

## Regression Diagnostics

399

400

```python { .api }

401

def r2_score(y_true: np.ndarray, y_pred: np.ndarray) -> float:

402

"""

403

Compute R-squared coefficient of determination.

404

405

Args:

406

y_true (np.ndarray): True target values

407

y_pred (np.ndarray): Predicted values

408

409

Returns:

410

float: R-squared score (1.0 is perfect prediction)

411

"""

412

413

def r2_samples(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:

414

"""

415

Compute sample-wise R-squared values for uncertainty quantification.

416

417

Args:

418

y_true (np.ndarray): True target values

419

y_pred (np.ndarray): Predicted values (samples x observations)

420

421

Returns:

422

np.ndarray: R-squared values for each sample

423

"""

424

```

425

426

## Advanced Statistical Methods

427

428

### Model Refitting and Sensitivity Analysis

429

430

```python { .api }

431

def reloo(data: InferenceData, *, loo_kwargs: dict = None, refit_kwargs: dict = None) -> ELPDData:

432

"""

433

Refit models for problematic observations in LOO-CV.

434

435

Automatically identifies observations with high Pareto k values

436

and refits the model excluding those observations for more

437

accurate LOO estimates.

438

439

Args:

440

data (InferenceData): Inference data with log_likelihood group

441

loo_kwargs (dict, optional): Arguments passed to az.loo()

442

refit_kwargs (dict, optional): Arguments for model refitting

443

444

Returns:

445

ELPDData: Improved LOO results with refitted estimates

446

"""

447

448

def psens(data: InferenceData, *, perturbation: float = 0.1, var_names: list = None) -> dict:

449

"""

450

Perform prior sensitivity analysis.

451

452

Evaluates how sensitive posterior estimates are to changes

453

in prior specifications by perturbing priors and measuring

454

the impact on posterior quantities.

455

456

Args:

457

data (InferenceData): Inference data with prior samples

458

perturbation (float): Size of prior perturbation (default 0.1)

459

var_names (list, optional): Variables to analyze

460

461

Returns:

462

dict: Sensitivity analysis results for each variable

463

"""

464

```

465

466

### Utility Functions

467

468

```python { .api }

469

def apply_test_function(data, func, *, var_names: list = None, **kwargs):

470

"""

471

Apply test function to inference data variables.

472

473

Applies custom test functions to posterior samples for

474

model checking, hypothesis testing, or custom diagnostics.

475

476

Args:

477

data: Inference data or array-like data

478

func: Test function to apply

479

var_names (list, optional): Variables to apply function to

480

**kwargs: Additional arguments passed to function

481

482

Returns:

483

Results of applying test function

484

"""

485

486

def make_ufunc(func, *, signature: str = None, **kwargs):

487

"""

488

Create universal function for broadcasting operations.

489

490

Converts regular functions into universal functions that

491

can operate on ArviZ data structures with proper broadcasting.

492

493

Args:

494

func: Function to convert to ufunc

495

signature (str, optional): Function signature for broadcasting

496

**kwargs: Additional ufunc creation parameters

497

498

Returns:

499

Universal function compatible with xarray operations

500

"""

501

502

def wrap_xarray_ufunc(func, *, dask: str = "forbidden", **kwargs):

503

"""

504

Wrap functions for xarray universal function operations.

505

506

Creates xarray-compatible wrapped functions that handle

507

coordinates, dimensions, and attributes properly.

508

509

Args:

510

func: Function to wrap for xarray

511

dask (str): Dask handling strategy ("forbidden", "allowed")

512

**kwargs: Additional wrapping parameters

513

514

Returns:

515

Wrapped function compatible with xarray apply_ufunc

516

"""

517

518

def smooth_data(data, *, method: str = "gaussian", **kwargs):

519

"""

520

Apply smoothing operations to data.

521

522

Provides various smoothing algorithms for data preprocessing

523

and noise reduction in statistical analysis.

524

525

Args:

526

data: Input data to smooth

527

method (str): Smoothing method ("gaussian", "median", "savgol")

528

**kwargs: Method-specific smoothing parameters

529

530

Returns:

531

Smoothed data with same structure as input

532

"""

533

```

534

535

### Usage Examples

536

537

```python

538

# Model refitting for problematic LOO observations

539

loo_initial = az.loo(idata)

540

problematic_k = (loo_initial.pareto_k > 0.7).sum()

541

print(f"Problematic observations: {problematic_k}")

542

543

if problematic_k > 0:

544

loo_refit = az.reloo(idata)

545

print(f"Improved LOO: {loo_refit.elpd_loo:.2f}")

546

547

# Prior sensitivity analysis

548

sensitivity = az.psens(idata, perturbation=0.2, var_names=["mu", "sigma"])

549

for var, result in sensitivity.items():

550

print(f"{var} sensitivity: {result['sensitivity_score']:.3f}")

551

552

# Apply custom test function

553

def custom_test(samples):

554

return np.mean(samples > 0) # Probability of being positive

555

556

prob_positive = az.apply_test_function(idata, custom_test, var_names=["mu"])

557

print(f"P(mu > 0): {prob_positive:.3f}")

558

559

# Create and use universal function

560

def custom_transform(x):

561

return np.log(1 + np.exp(x)) # Softplus transformation

562

563

softplus_ufunc = az.make_ufunc(custom_transform)

564

transformed_data = softplus_ufunc(idata.posterior["theta"])

565

```

566

567

## Data Types

568

569

```python { .api }

570

class ELPDData:

571

"""

572

Expected log pointwise predictive density data container.

573

574

Contains information criteria results from loo() and waic()

575

including pointwise values, diagnostics, and summary statistics.

576

577

Attributes:

578

elpd_loo (float): LOO expected log pointwise predictive density

579

elpd_waic (float): WAIC expected log pointwise predictive density

580

p_loo (float): Effective number of parameters (LOO)

581

p_waic (float): Effective number of parameters (WAIC)

582

se (float): Standard error of ELPD estimate

583

pareto_k (np.ndarray): Pareto k diagnostic values

584

waic (float): WAIC value

585

loo (float): LOO value

586

"""

587

```

588

589

### Usage Examples

590

591

```python

592

# Model checking with LOO-PIT

593

loo_pit_vals = az.loo_pit(idata, y="y_obs", y_hat="y_pred")

594

595

# Weight predictions from multiple models

596

weighted_preds = az.weight_predictions(prediction_dict, weights=model_weights)

597

598

# Compute KDE for posterior samples

599

x_vals, density = az.kde(idata.posterior["mu"].values.flatten())

600

601

# Regression diagnostics

602

r2 = az.r2_score(y_true, y_pred_mean)

603

r2_posterior = az.r2_samples(y_true, y_pred_samples)

604

```