Exploratory analysis of Bayesian models with comprehensive data manipulation, statistical diagnostics, and visualization capabilities
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
MCMC diagnostics, model comparison, and statistical functions for Bayesian analysis. Compute convergence diagnostics, effective sample sizes, information criteria, and perform Bayesian model comparison with advanced techniques like PSIS-LOO.
def summary(data: InferenceData, *, var_names: list = None, hdi_prob: float = 0.94, stat_focus: str = "mean", extend: bool = True, **kwargs) -> pd.DataFrame:
"""
Create comprehensive summary statistics table for inference data.
Args:
data (InferenceData): Bayesian inference data
var_names (list, optional): Variables to include in summary
hdi_prob (float): Probability for highest density interval (default 0.94)
stat_focus (str): Focus statistic ("mean", "median", "mode")
extend (bool): Whether to include extended diagnostics
**kwargs: Additional summary parameters
Returns:
pd.DataFrame: Summary statistics table with columns for mean, std, HDI bounds, diagnostics
"""
def hdi(data, hdi_prob: float = 0.94, *, circular: bool = False, multimodal: bool = False, **kwargs):
"""
Compute highest density interval for posterior samples.
Args:
data: Posterior samples (array-like or InferenceData)
hdi_prob (float): Probability mass for HDI (default 0.94)
circular (bool): Whether data is circular (default False)
multimodal (bool): Whether to detect multiple modes (default False)
**kwargs: Additional HDI parameters
Returns:
array: HDI bounds [lower, upper] or multimodal intervals
"""import arviz as az
# Load example data
idata = az.load_arviz_data("centered_eight")
# Generate comprehensive summary
summary_df = az.summary(idata, var_names=["mu", "tau"])
print(summary_df)
# Compute HDI for specific variables
hdi_bounds = az.hdi(idata.posterior["mu"], hdi_prob=0.89)
print(f"89% HDI for mu: {hdi_bounds}")def rhat(data: InferenceData, *, var_names: list = None, method: str = "rank") -> xr.Dataset:
"""
Compute R-hat convergence diagnostic for MCMC chains.
Args:
data (InferenceData): MCMC inference data with multiple chains
var_names (list, optional): Variables to analyze
method (str): Method to use ("rank", "split", "folded", "identity")
Returns:
xr.Dataset: R-hat values for each variable (values near 1.0 indicate convergence)
"""
def ess(data: InferenceData, *, var_names: list = None, method: str = "bulk", relative: bool = False) -> xr.Dataset:
"""
Compute effective sample size for MCMC chains.
Args:
data (InferenceData): MCMC inference data
var_names (list, optional): Variables to analyze
method (str): ESS method ("bulk", "tail", "quantile", "mean", "sd", "median", "mad")
relative (bool): Whether to return relative ESS (ESS/total_samples)
Returns:
xr.Dataset: Effective sample size values for each variable
"""
def mcse(data: InferenceData, *, var_names: list = None, method: str = "mean", prob: float = None) -> xr.Dataset:
"""
Compute Monte Carlo standard error for MCMC estimates.
Args:
data (InferenceData): MCMC inference data
var_names (list, optional): Variables to analyze
method (str): Statistic method ("mean", "sd", "quantile")
prob (float, optional): Probability for quantile method
Returns:
xr.Dataset: MCSE values for each variable
"""def bfmi(data: InferenceData) -> float:
"""
Compute Bayesian Fraction of Missing Information.
Diagnostic for HMC/NUTS efficiency (values < 0.2 indicate problems).
Args:
data (InferenceData): MCMC inference data with energy information
Returns:
float: BFMI value (higher is better, > 0.2 recommended)
"""
def autocorr(data, *, var_names: list = None, coords: dict = None, max_lag: int = None, **kwargs) -> xr.Dataset:
"""
Compute autocorrelation function for MCMC chains.
Args:
data: MCMC samples (InferenceData or array-like)
var_names (list, optional): Variables to analyze
coords (dict, optional): Coordinate specifications
max_lag (int, optional): Maximum lag to compute
**kwargs: Additional autocorrelation parameters
Returns:
xr.Dataset: Autocorrelation values at different lags
"""
def autocov(data, *, var_names: list = None, coords: dict = None, max_lag: int = None, **kwargs) -> xr.Dataset:
"""
Compute autocovariance function for MCMC chains.
Args:
data: MCMC samples (InferenceData or array-like)
var_names (list, optional): Variables to analyze
coords (dict, optional): Coordinate specifications
max_lag (int, optional): Maximum lag to compute
**kwargs: Additional autocovariance parameters
Returns:
xr.Dataset: Autocovariance values at different lags
"""
def bayes_factor(data: InferenceData, *, var_name: str, ref_val: float = 0, prior=None, return_ref_vals: bool = False):
"""
Compute Bayes factor for point hypotheses.
Args:
data (InferenceData): Posterior samples
var_name (str): Variable name to test
ref_val (float): Reference value for hypothesis test (default 0)
prior: Prior distribution for Bayes factor calculation
return_ref_vals (bool): Whether to return reference values
Returns:
dict or float: Bayes factor results
"""
def psislw(log_weights, *, reff: float = 1.0, normalize: bool = True) -> tuple:
"""
Pareto smoothed importance sampling leave-one-out weights.
Args:
log_weights: Log importance weights
reff (float): Relative effective sample size (default 1.0)
normalize (bool): Whether to normalize weights (default True)
Returns:
tuple: (smoothed_log_weights, pareto_k_values)
"""
def kde(x, *, circular: bool = False, **kwargs) -> tuple:
"""
Kernel density estimation for continuous data.
Args:
x: Input data for density estimation
circular (bool): Whether data is circular (default False)
**kwargs: Additional KDE parameters (bandwidth, kernel type, etc.)
Returns:
tuple: (x_values, density_values) for plotting or further analysis
"""
def r2_samples(y_true, y_pred) -> xr.Dataset:
"""
Compute R-squared coefficient for posterior samples.
Args:
y_true: True/observed values
y_pred: Predicted values (posterior samples)
Returns:
xr.Dataset: R-squared values for each posterior sample
"""
def r2_score(y_true, y_pred) -> float:
"""
Compute R-squared coefficient for point estimates.
Args:
y_true: True/observed values
y_pred: Predicted point estimates
Returns:
float: R-squared coefficient
"""
def autocov(data: InferenceData, *, var_names: list = None, max_lag: int = None) -> xr.Dataset:
"""
Compute autocovariance function for MCMC chains.
Args:
data (InferenceData): MCMC inference data
var_names (list, optional): Variables to analyze
max_lag (int, optional): Maximum lag to compute
Returns:
xr.Dataset: Autocovariance values by lag for each variable
"""# Check convergence diagnostics
rhat_values = az.rhat(idata)
print("R-hat values:", rhat_values)
# Compute effective sample sizes
ess_bulk = az.ess(idata, method="bulk")
ess_tail = az.ess(idata, method="tail")
print("Bulk ESS:", ess_bulk)
print("Tail ESS:", ess_tail)
# Check MCMC efficiency
bfmi_value = az.bfmi(idata)
print(f"BFMI: {bfmi_value:.3f}")
# Analyze autocorrelation
autocorr_vals = az.autocorr(idata, var_names=["mu"])def compare(data_dict: dict, *, ic: str = "loo", method: str = "stacking", scale: str = "log") -> pd.DataFrame:
"""
Compare multiple models using information criteria and model weights.
Args:
data_dict (dict): Dictionary of model names to InferenceData objects
ic (str): Information criterion ("loo", "waic")
method (str): Weighting method ("stacking", "bb-pseudo-bma", "pseudo-bma")
scale (str): Scale for comparison ("log", "negative_log", "deviance")
Returns:
pd.DataFrame: Model comparison table with IC values, weights, and rankings
"""
def loo(data: InferenceData, *, pointwise: bool = False, var_name: str = None, reff: float = 1.0) -> ELPDData:
"""
Compute Leave-One-Out Cross-Validation using Pareto Smoothed Importance Sampling.
Args:
data (InferenceData): Inference data with log_likelihood group
pointwise (bool): Whether to return pointwise values (default False)
var_name (str, optional): Variable name in log_likelihood group
reff (float): Relative efficiency of MCMC chains (default 1.0)
Returns:
ELPDData: LOO results with ELPD estimate, standard error, and Pareto k diagnostics
"""
def waic(data: InferenceData, *, pointwise: bool = False, var_name: str = None, scale: str = "log") -> ELPDData:
"""
Compute Widely Applicable Information Criterion.
Args:
data (InferenceData): Inference data with log_likelihood group
pointwise (bool): Whether to return pointwise values (default False)
var_name (str, optional): Variable name in log_likelihood group
scale (str): Scale for computation ("log", "negative_log", "deviance")
Returns:
ELPDData: WAIC results with ELPD estimate and effective parameters
"""def psislw(log_weights: np.ndarray, *, reff: float = 1.0, normalize: bool = True) -> tuple:
"""
Pareto Smoothed Importance Sampling with diagnostics.
Args:
log_weights (np.ndarray): Log importance weights
reff (float): Relative efficiency of weights (default 1.0)
normalize (bool): Whether to normalize weights (default True)
Returns:
tuple: (smoothed_weights, pareto_k_values) with diagnostics
"""
def bayes_factor(data: InferenceData, *, var_name: str = None, prior_odds: float = 1.0) -> float:
"""
Compute Bayes factor for model comparison.
Args:
data (InferenceData): Inference data
var_name (str, optional): Variable for comparison
prior_odds (float): Prior odds ratio (default 1.0)
Returns:
float: Bayes factor value
"""# Compare multiple models
model_dict = {
"model_1": idata1,
"model_2": idata2,
"model_3": idata3
}
# Perform model comparison
comparison_df = az.compare(model_dict, ic="loo", method="stacking")
print(comparison_df)
# Compute LOO for single model
loo_results = az.loo(idata1, pointwise=True)
print(f"LOO ELPD: {loo_results.elpd_loo:.2f} ± {loo_results.se:.2f}")
print(f"Problematic observations (k > 0.7): {(loo_results.pareto_k > 0.7).sum()}")
# Compute WAIC
waic_results = az.waic(idata1)
print(f"WAIC: {waic_results.waic:.2f}")def loo_pit(data: InferenceData, *, y: str = None, y_hat: str = None) -> np.ndarray:
"""
Compute Leave-One-Out Probability Integral Transform values.
Args:
data (InferenceData): Inference data with posterior predictive samples
y (str, optional): Observed data variable name
y_hat (str, optional): Posterior predictive variable name
Returns:
np.ndarray: LOO-PIT values for model checking
"""
def weight_predictions(data: dict, *, weights: np.ndarray = None) -> InferenceData:
"""
Weight predictions from multiple models using model weights.
Args:
data (dict): Dictionary of model predictions
weights (np.ndarray, optional): Model weights (if None, uses equal weights)
Returns:
InferenceData: Weighted prediction ensemble
"""def kde(data, *, bw: str = "default", adaptive: bool = False, extend: bool = True, **kwargs):
"""
Compute kernel density estimation for continuous data.
Args:
data: Input data (array-like)
bw (str): Bandwidth selection method ("default", "scott", "silverman")
adaptive (bool): Whether to use adaptive bandwidth (default False)
extend (bool): Whether to extend domain beyond data range (default True)
**kwargs: Additional KDE parameters
Returns:
tuple: (x_values, density_values) for plotting and analysis
"""def r2_score(y_true: np.ndarray, y_pred: np.ndarray) -> float:
"""
Compute R-squared coefficient of determination.
Args:
y_true (np.ndarray): True target values
y_pred (np.ndarray): Predicted values
Returns:
float: R-squared score (1.0 is perfect prediction)
"""
def r2_samples(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
"""
Compute sample-wise R-squared values for uncertainty quantification.
Args:
y_true (np.ndarray): True target values
y_pred (np.ndarray): Predicted values (samples x observations)
Returns:
np.ndarray: R-squared values for each sample
"""def reloo(data: InferenceData, *, loo_kwargs: dict = None, refit_kwargs: dict = None) -> ELPDData:
"""
Refit models for problematic observations in LOO-CV.
Automatically identifies observations with high Pareto k values
and refits the model excluding those observations for more
accurate LOO estimates.
Args:
data (InferenceData): Inference data with log_likelihood group
loo_kwargs (dict, optional): Arguments passed to az.loo()
refit_kwargs (dict, optional): Arguments for model refitting
Returns:
ELPDData: Improved LOO results with refitted estimates
"""
def psens(data: InferenceData, *, perturbation: float = 0.1, var_names: list = None) -> dict:
"""
Perform prior sensitivity analysis.
Evaluates how sensitive posterior estimates are to changes
in prior specifications by perturbing priors and measuring
the impact on posterior quantities.
Args:
data (InferenceData): Inference data with prior samples
perturbation (float): Size of prior perturbation (default 0.1)
var_names (list, optional): Variables to analyze
Returns:
dict: Sensitivity analysis results for each variable
"""def apply_test_function(data, func, *, var_names: list = None, **kwargs):
"""
Apply test function to inference data variables.
Applies custom test functions to posterior samples for
model checking, hypothesis testing, or custom diagnostics.
Args:
data: Inference data or array-like data
func: Test function to apply
var_names (list, optional): Variables to apply function to
**kwargs: Additional arguments passed to function
Returns:
Results of applying test function
"""
def make_ufunc(func, *, signature: str = None, **kwargs):
"""
Create universal function for broadcasting operations.
Converts regular functions into universal functions that
can operate on ArviZ data structures with proper broadcasting.
Args:
func: Function to convert to ufunc
signature (str, optional): Function signature for broadcasting
**kwargs: Additional ufunc creation parameters
Returns:
Universal function compatible with xarray operations
"""
def wrap_xarray_ufunc(func, *, dask: str = "forbidden", **kwargs):
"""
Wrap functions for xarray universal function operations.
Creates xarray-compatible wrapped functions that handle
coordinates, dimensions, and attributes properly.
Args:
func: Function to wrap for xarray
dask (str): Dask handling strategy ("forbidden", "allowed")
**kwargs: Additional wrapping parameters
Returns:
Wrapped function compatible with xarray apply_ufunc
"""
def smooth_data(data, *, method: str = "gaussian", **kwargs):
"""
Apply smoothing operations to data.
Provides various smoothing algorithms for data preprocessing
and noise reduction in statistical analysis.
Args:
data: Input data to smooth
method (str): Smoothing method ("gaussian", "median", "savgol")
**kwargs: Method-specific smoothing parameters
Returns:
Smoothed data with same structure as input
"""# Model refitting for problematic LOO observations
loo_initial = az.loo(idata)
problematic_k = (loo_initial.pareto_k > 0.7).sum()
print(f"Problematic observations: {problematic_k}")
if problematic_k > 0:
loo_refit = az.reloo(idata)
print(f"Improved LOO: {loo_refit.elpd_loo:.2f}")
# Prior sensitivity analysis
sensitivity = az.psens(idata, perturbation=0.2, var_names=["mu", "sigma"])
for var, result in sensitivity.items():
print(f"{var} sensitivity: {result['sensitivity_score']:.3f}")
# Apply custom test function
def custom_test(samples):
return np.mean(samples > 0) # Probability of being positive
prob_positive = az.apply_test_function(idata, custom_test, var_names=["mu"])
print(f"P(mu > 0): {prob_positive:.3f}")
# Create and use universal function
def custom_transform(x):
return np.log(1 + np.exp(x)) # Softplus transformation
softplus_ufunc = az.make_ufunc(custom_transform)
transformed_data = softplus_ufunc(idata.posterior["theta"])class ELPDData:
"""
Expected log pointwise predictive density data container.
Contains information criteria results from loo() and waic()
including pointwise values, diagnostics, and summary statistics.
Attributes:
elpd_loo (float): LOO expected log pointwise predictive density
elpd_waic (float): WAIC expected log pointwise predictive density
p_loo (float): Effective number of parameters (LOO)
p_waic (float): Effective number of parameters (WAIC)
se (float): Standard error of ELPD estimate
pareto_k (np.ndarray): Pareto k diagnostic values
waic (float): WAIC value
loo (float): LOO value
"""# Model checking with LOO-PIT
loo_pit_vals = az.loo_pit(idata, y="y_obs", y_hat="y_pred")
# Weight predictions from multiple models
weighted_preds = az.weight_predictions(prediction_dict, weights=model_weights)
# Compute KDE for posterior samples
x_vals, density = az.kde(idata.posterior["mu"].values.flatten())
# Regression diagnostics
r2 = az.r2_score(y_true, y_pred_mean)
r2_posterior = az.r2_samples(y_true, y_pred_samples)Install with Tessl CLI
npx tessl i tessl/pypi-arviz