CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-arviz

Exploratory analysis of Bayesian models with comprehensive data manipulation, statistical diagnostics, and visualization capabilities

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

statistical-analysis.mddocs/

Statistical Analysis and Diagnostics

MCMC diagnostics, model comparison, and statistical functions for Bayesian analysis. Compute convergence diagnostics, effective sample sizes, information criteria, and perform Bayesian model comparison with advanced techniques like PSIS-LOO.

Summary Statistics

def summary(data: InferenceData, *, var_names: list = None, hdi_prob: float = 0.94, stat_focus: str = "mean", extend: bool = True, **kwargs) -> pd.DataFrame:
    """
    Create comprehensive summary statistics table for inference data.
    
    Args:
        data (InferenceData): Bayesian inference data
        var_names (list, optional): Variables to include in summary
        hdi_prob (float): Probability for highest density interval (default 0.94)
        stat_focus (str): Focus statistic ("mean", "median", "mode")
        extend (bool): Whether to include extended diagnostics
        **kwargs: Additional summary parameters
    
    Returns:
        pd.DataFrame: Summary statistics table with columns for mean, std, HDI bounds, diagnostics
    """

def hdi(data, hdi_prob: float = 0.94, *, circular: bool = False, multimodal: bool = False, **kwargs):
    """
    Compute highest density interval for posterior samples.
    
    Args:
        data: Posterior samples (array-like or InferenceData)
        hdi_prob (float): Probability mass for HDI (default 0.94)
        circular (bool): Whether data is circular (default False)
        multimodal (bool): Whether to detect multiple modes (default False)
        **kwargs: Additional HDI parameters
    
    Returns:
        array: HDI bounds [lower, upper] or multimodal intervals
    """

Basic Usage

import arviz as az

# Load example data
idata = az.load_arviz_data("centered_eight")

# Generate comprehensive summary
summary_df = az.summary(idata, var_names=["mu", "tau"])
print(summary_df)

# Compute HDI for specific variables
hdi_bounds = az.hdi(idata.posterior["mu"], hdi_prob=0.89)
print(f"89% HDI for mu: {hdi_bounds}")

MCMC Convergence Diagnostics

Core Diagnostics

def rhat(data: InferenceData, *, var_names: list = None, method: str = "rank") -> xr.Dataset:
    """
    Compute R-hat convergence diagnostic for MCMC chains.
    
    Args:
        data (InferenceData): MCMC inference data with multiple chains
        var_names (list, optional): Variables to analyze
        method (str): Method to use ("rank", "split", "folded", "identity")
    
    Returns:
        xr.Dataset: R-hat values for each variable (values near 1.0 indicate convergence)
    """

def ess(data: InferenceData, *, var_names: list = None, method: str = "bulk", relative: bool = False) -> xr.Dataset:
    """
    Compute effective sample size for MCMC chains.
    
    Args:
        data (InferenceData): MCMC inference data
        var_names (list, optional): Variables to analyze
        method (str): ESS method ("bulk", "tail", "quantile", "mean", "sd", "median", "mad")
        relative (bool): Whether to return relative ESS (ESS/total_samples)
    
    Returns:
        xr.Dataset: Effective sample size values for each variable
    """

def mcse(data: InferenceData, *, var_names: list = None, method: str = "mean", prob: float = None) -> xr.Dataset:
    """
    Compute Monte Carlo standard error for MCMC estimates.
    
    Args:
        data (InferenceData): MCMC inference data
        var_names (list, optional): Variables to analyze
        method (str): Statistic method ("mean", "sd", "quantile")
        prob (float, optional): Probability for quantile method
    
    Returns:
        xr.Dataset: MCSE values for each variable
    """

Advanced Diagnostics

def bfmi(data: InferenceData) -> float:
    """
    Compute Bayesian Fraction of Missing Information.
    Diagnostic for HMC/NUTS efficiency (values < 0.2 indicate problems).
    
    Args:
        data (InferenceData): MCMC inference data with energy information
    
    Returns:
        float: BFMI value (higher is better, > 0.2 recommended)
    """

def autocorr(data, *, var_names: list = None, coords: dict = None, max_lag: int = None, **kwargs) -> xr.Dataset:
    """
    Compute autocorrelation function for MCMC chains.
    
    Args:
        data: MCMC samples (InferenceData or array-like)
        var_names (list, optional): Variables to analyze
        coords (dict, optional): Coordinate specifications
        max_lag (int, optional): Maximum lag to compute
        **kwargs: Additional autocorrelation parameters
    
    Returns:
        xr.Dataset: Autocorrelation values at different lags
    """

def autocov(data, *, var_names: list = None, coords: dict = None, max_lag: int = None, **kwargs) -> xr.Dataset:
    """
    Compute autocovariance function for MCMC chains.
    
    Args:
        data: MCMC samples (InferenceData or array-like)
        var_names (list, optional): Variables to analyze
        coords (dict, optional): Coordinate specifications
        max_lag (int, optional): Maximum lag to compute
        **kwargs: Additional autocovariance parameters
    
    Returns:
        xr.Dataset: Autocovariance values at different lags
    """

def bayes_factor(data: InferenceData, *, var_name: str, ref_val: float = 0, prior=None, return_ref_vals: bool = False):
    """
    Compute Bayes factor for point hypotheses.
    
    Args:
        data (InferenceData): Posterior samples
        var_name (str): Variable name to test
        ref_val (float): Reference value for hypothesis test (default 0)
        prior: Prior distribution for Bayes factor calculation
        return_ref_vals (bool): Whether to return reference values
    
    Returns:
        dict or float: Bayes factor results
    """

def psislw(log_weights, *, reff: float = 1.0, normalize: bool = True) -> tuple:
    """
    Pareto smoothed importance sampling leave-one-out weights.
    
    Args:
        log_weights: Log importance weights
        reff (float): Relative effective sample size (default 1.0)
        normalize (bool): Whether to normalize weights (default True)
    
    Returns:
        tuple: (smoothed_log_weights, pareto_k_values)
    """

def kde(x, *, circular: bool = False, **kwargs) -> tuple:
    """
    Kernel density estimation for continuous data.
    
    Args:
        x: Input data for density estimation
        circular (bool): Whether data is circular (default False)
        **kwargs: Additional KDE parameters (bandwidth, kernel type, etc.)
    
    Returns:
        tuple: (x_values, density_values) for plotting or further analysis
    """

def r2_samples(y_true, y_pred) -> xr.Dataset:
    """
    Compute R-squared coefficient for posterior samples.
    
    Args:
        y_true: True/observed values
        y_pred: Predicted values (posterior samples)
    
    Returns:
        xr.Dataset: R-squared values for each posterior sample
    """

def r2_score(y_true, y_pred) -> float:
    """
    Compute R-squared coefficient for point estimates.
    
    Args:
        y_true: True/observed values  
        y_pred: Predicted point estimates
    
    Returns:
        float: R-squared coefficient
    """

def autocov(data: InferenceData, *, var_names: list = None, max_lag: int = None) -> xr.Dataset:
    """
    Compute autocovariance function for MCMC chains.
    
    Args:
        data (InferenceData): MCMC inference data
        var_names (list, optional): Variables to analyze
        max_lag (int, optional): Maximum lag to compute
    
    Returns:
        xr.Dataset: Autocovariance values by lag for each variable
    """

Usage Examples

# Check convergence diagnostics
rhat_values = az.rhat(idata)
print("R-hat values:", rhat_values)

# Compute effective sample sizes
ess_bulk = az.ess(idata, method="bulk")
ess_tail = az.ess(idata, method="tail")
print("Bulk ESS:", ess_bulk)
print("Tail ESS:", ess_tail)

# Check MCMC efficiency
bfmi_value = az.bfmi(idata)
print(f"BFMI: {bfmi_value:.3f}")

# Analyze autocorrelation
autocorr_vals = az.autocorr(idata, var_names=["mu"])

Model Comparison

Information Criteria

def compare(data_dict: dict, *, ic: str = "loo", method: str = "stacking", scale: str = "log") -> pd.DataFrame:
    """
    Compare multiple models using information criteria and model weights.
    
    Args:
        data_dict (dict): Dictionary of model names to InferenceData objects
        ic (str): Information criterion ("loo", "waic")
        method (str): Weighting method ("stacking", "bb-pseudo-bma", "pseudo-bma")
        scale (str): Scale for comparison ("log", "negative_log", "deviance")
    
    Returns:
        pd.DataFrame: Model comparison table with IC values, weights, and rankings
    """

def loo(data: InferenceData, *, pointwise: bool = False, var_name: str = None, reff: float = 1.0) -> ELPDData:
    """
    Compute Leave-One-Out Cross-Validation using Pareto Smoothed Importance Sampling.
    
    Args:
        data (InferenceData): Inference data with log_likelihood group
        pointwise (bool): Whether to return pointwise values (default False)
        var_name (str, optional): Variable name in log_likelihood group
        reff (float): Relative efficiency of MCMC chains (default 1.0)
    
    Returns:
        ELPDData: LOO results with ELPD estimate, standard error, and Pareto k diagnostics
    """

def waic(data: InferenceData, *, pointwise: bool = False, var_name: str = None, scale: str = "log") -> ELPDData:
    """
    Compute Widely Applicable Information Criterion.
    
    Args:
        data (InferenceData): Inference data with log_likelihood group
        pointwise (bool): Whether to return pointwise values (default False)
        var_name (str, optional): Variable name in log_likelihood group
        scale (str): Scale for computation ("log", "negative_log", "deviance")
    
    Returns:
        ELPDData: WAIC results with ELPD estimate and effective parameters
    """

Specialized Comparison Methods

def psislw(log_weights: np.ndarray, *, reff: float = 1.0, normalize: bool = True) -> tuple:
    """
    Pareto Smoothed Importance Sampling with diagnostics.
    
    Args:
        log_weights (np.ndarray): Log importance weights
        reff (float): Relative efficiency of weights (default 1.0)
        normalize (bool): Whether to normalize weights (default True)
    
    Returns:
        tuple: (smoothed_weights, pareto_k_values) with diagnostics
    """

def bayes_factor(data: InferenceData, *, var_name: str = None, prior_odds: float = 1.0) -> float:
    """
    Compute Bayes factor for model comparison.
    
    Args:
        data (InferenceData): Inference data
        var_name (str, optional): Variable for comparison
        prior_odds (float): Prior odds ratio (default 1.0)
    
    Returns:
        float: Bayes factor value
    """

Usage Examples

# Compare multiple models
model_dict = {
    "model_1": idata1,
    "model_2": idata2, 
    "model_3": idata3
}

# Perform model comparison
comparison_df = az.compare(model_dict, ic="loo", method="stacking")
print(comparison_df)

# Compute LOO for single model
loo_results = az.loo(idata1, pointwise=True)
print(f"LOO ELPD: {loo_results.elpd_loo:.2f} ± {loo_results.se:.2f}")
print(f"Problematic observations (k > 0.7): {(loo_results.pareto_k > 0.7).sum()}")

# Compute WAIC
waic_results = az.waic(idata1)
print(f"WAIC: {waic_results.waic:.2f}")

Predictive Model Checking

def loo_pit(data: InferenceData, *, y: str = None, y_hat: str = None) -> np.ndarray:
    """
    Compute Leave-One-Out Probability Integral Transform values.
    
    Args:
        data (InferenceData): Inference data with posterior predictive samples
        y (str, optional): Observed data variable name
        y_hat (str, optional): Posterior predictive variable name
    
    Returns:
        np.ndarray: LOO-PIT values for model checking
    """

def weight_predictions(data: dict, *, weights: np.ndarray = None) -> InferenceData:
    """
    Weight predictions from multiple models using model weights.
    
    Args:
        data (dict): Dictionary of model predictions
        weights (np.ndarray, optional): Model weights (if None, uses equal weights)
    
    Returns:
        InferenceData: Weighted prediction ensemble
    """

Kernel Density Estimation

def kde(data, *, bw: str = "default", adaptive: bool = False, extend: bool = True, **kwargs):
    """
    Compute kernel density estimation for continuous data.
    
    Args:
        data: Input data (array-like)
        bw (str): Bandwidth selection method ("default", "scott", "silverman")
        adaptive (bool): Whether to use adaptive bandwidth (default False)
        extend (bool): Whether to extend domain beyond data range (default True)
        **kwargs: Additional KDE parameters
    
    Returns:
        tuple: (x_values, density_values) for plotting and analysis
    """

Regression Diagnostics

def r2_score(y_true: np.ndarray, y_pred: np.ndarray) -> float:
    """
    Compute R-squared coefficient of determination.
    
    Args:
        y_true (np.ndarray): True target values
        y_pred (np.ndarray): Predicted values
    
    Returns:
        float: R-squared score (1.0 is perfect prediction)
    """

def r2_samples(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray:
    """
    Compute sample-wise R-squared values for uncertainty quantification.
    
    Args:
        y_true (np.ndarray): True target values
        y_pred (np.ndarray): Predicted values (samples x observations)
    
    Returns:
        np.ndarray: R-squared values for each sample
    """

Advanced Statistical Methods

Model Refitting and Sensitivity Analysis

def reloo(data: InferenceData, *, loo_kwargs: dict = None, refit_kwargs: dict = None) -> ELPDData:
    """
    Refit models for problematic observations in LOO-CV.
    
    Automatically identifies observations with high Pareto k values
    and refits the model excluding those observations for more
    accurate LOO estimates.
    
    Args:
        data (InferenceData): Inference data with log_likelihood group
        loo_kwargs (dict, optional): Arguments passed to az.loo()
        refit_kwargs (dict, optional): Arguments for model refitting
    
    Returns:
        ELPDData: Improved LOO results with refitted estimates
    """

def psens(data: InferenceData, *, perturbation: float = 0.1, var_names: list = None) -> dict:
    """
    Perform prior sensitivity analysis.
    
    Evaluates how sensitive posterior estimates are to changes
    in prior specifications by perturbing priors and measuring
    the impact on posterior quantities.
    
    Args:
        data (InferenceData): Inference data with prior samples
        perturbation (float): Size of prior perturbation (default 0.1)
        var_names (list, optional): Variables to analyze
    
    Returns:
        dict: Sensitivity analysis results for each variable
    """

Utility Functions

def apply_test_function(data, func, *, var_names: list = None, **kwargs):
    """
    Apply test function to inference data variables.
    
    Applies custom test functions to posterior samples for
    model checking, hypothesis testing, or custom diagnostics.
    
    Args:
        data: Inference data or array-like data
        func: Test function to apply
        var_names (list, optional): Variables to apply function to
        **kwargs: Additional arguments passed to function
    
    Returns:
        Results of applying test function
    """

def make_ufunc(func, *, signature: str = None, **kwargs):
    """
    Create universal function for broadcasting operations.
    
    Converts regular functions into universal functions that
    can operate on ArviZ data structures with proper broadcasting.
    
    Args:
        func: Function to convert to ufunc
        signature (str, optional): Function signature for broadcasting
        **kwargs: Additional ufunc creation parameters
    
    Returns:
        Universal function compatible with xarray operations
    """

def wrap_xarray_ufunc(func, *, dask: str = "forbidden", **kwargs):
    """
    Wrap functions for xarray universal function operations.
    
    Creates xarray-compatible wrapped functions that handle
    coordinates, dimensions, and attributes properly.
    
    Args:
        func: Function to wrap for xarray
        dask (str): Dask handling strategy ("forbidden", "allowed")
        **kwargs: Additional wrapping parameters
    
    Returns:
        Wrapped function compatible with xarray apply_ufunc
    """

def smooth_data(data, *, method: str = "gaussian", **kwargs):
    """
    Apply smoothing operations to data.
    
    Provides various smoothing algorithms for data preprocessing
    and noise reduction in statistical analysis.
    
    Args:
        data: Input data to smooth
        method (str): Smoothing method ("gaussian", "median", "savgol")
        **kwargs: Method-specific smoothing parameters
    
    Returns:
        Smoothed data with same structure as input
    """

Usage Examples

# Model refitting for problematic LOO observations
loo_initial = az.loo(idata)
problematic_k = (loo_initial.pareto_k > 0.7).sum()
print(f"Problematic observations: {problematic_k}")

if problematic_k > 0:
    loo_refit = az.reloo(idata)
    print(f"Improved LOO: {loo_refit.elpd_loo:.2f}")

# Prior sensitivity analysis
sensitivity = az.psens(idata, perturbation=0.2, var_names=["mu", "sigma"])
for var, result in sensitivity.items():
    print(f"{var} sensitivity: {result['sensitivity_score']:.3f}")

# Apply custom test function
def custom_test(samples):
    return np.mean(samples > 0)  # Probability of being positive

prob_positive = az.apply_test_function(idata, custom_test, var_names=["mu"])
print(f"P(mu > 0): {prob_positive:.3f}")

# Create and use universal function
def custom_transform(x):
    return np.log(1 + np.exp(x))  # Softplus transformation

softplus_ufunc = az.make_ufunc(custom_transform)
transformed_data = softplus_ufunc(idata.posterior["theta"])

Data Types

class ELPDData:
    """
    Expected log pointwise predictive density data container.
    
    Contains information criteria results from loo() and waic()
    including pointwise values, diagnostics, and summary statistics.
    
    Attributes:
        elpd_loo (float): LOO expected log pointwise predictive density
        elpd_waic (float): WAIC expected log pointwise predictive density  
        p_loo (float): Effective number of parameters (LOO)
        p_waic (float): Effective number of parameters (WAIC)
        se (float): Standard error of ELPD estimate
        pareto_k (np.ndarray): Pareto k diagnostic values
        waic (float): WAIC value
        loo (float): LOO value
    """

Usage Examples

# Model checking with LOO-PIT
loo_pit_vals = az.loo_pit(idata, y="y_obs", y_hat="y_pred")

# Weight predictions from multiple models
weighted_preds = az.weight_predictions(prediction_dict, weights=model_weights)

# Compute KDE for posterior samples
x_vals, density = az.kde(idata.posterior["mu"].values.flatten())

# Regression diagnostics
r2 = az.r2_score(y_true, y_pred_mean)
r2_posterior = az.r2_samples(y_true, y_pred_samples)

Install with Tessl CLI

npx tessl i tessl/pypi-arviz

docs

configuration-management.md

data-operations.md

framework-integrations.md

index.md

performance-utilities.md

statistical-analysis.md

visualization-plotting.md

tile.json