CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-arviz

Exploratory analysis of Bayesian models with comprehensive data manipulation, statistical diagnostics, and visualization capabilities

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

data-operations.mddocs/

Data Operations

Comprehensive data loading, conversion, and manipulation capabilities supporting multiple Bayesian frameworks and file formats. Create, transform, and manage InferenceData objects with built-in dataset examples and extensive I/O operations.

Core Data Structure

class InferenceData:
    """
    Main data container for Bayesian inference results.
    
    NetCDF-based data structure using xarray groups to organize
    posterior samples, prior samples, observed data, diagnostics,
    and metadata from Bayesian inference.
    
    Groups:
        posterior: MCMC samples from posterior distribution
        prior: Samples from prior distribution
        observed_data: Observed/input data used in model
        posterior_predictive: Samples from posterior predictive distribution
        sample_stats: MCMC diagnostics and metadata
        log_likelihood: Log likelihood evaluations for model comparison
    """

Data Loading and Management

Built-in Datasets

def load_arviz_data(dataset: str, data_home: str = None, **kwargs) -> InferenceData:
    """
    Load built-in example datasets for testing and learning.
    
    Args:
        dataset (str): Name of dataset to load ('centered_eight', 'non_centered_eight', etc.)
        data_home (str, optional): Directory to cache datasets
        **kwargs: Additional parameters for data loading
    
    Returns:
        InferenceData: Loaded example dataset
    """

def list_datasets() -> list:
    """
    List all available built-in datasets.
    
    Returns:
        list: Names of available datasets
    """

def clear_data_home(data_home: str = None):
    """
    Clear cached datasets from local storage.
    
    Args:
        data_home (str, optional): Directory containing cached data
    """

Basic Usage

import arviz as az

# Load example dataset
idata = az.load_arviz_data("centered_eight")

# List available datasets
datasets = az.list_datasets()
print(datasets)

# Clear cache
az.clear_data_home()

Data Manipulation

Concatenation and Extraction

def concat(*args, dim: str, copy: bool = True, inplace: bool = False, reset_dim: bool = True) -> InferenceData:
    """
    Concatenate multiple InferenceData objects along specified dimension.
    
    Args:
        *args: InferenceData objects to concatenate
        dim (str): Dimension to concatenate along ('chain', 'draw', etc.)
        copy (bool): Whether to copy data (default True)
        inplace (bool): Whether to modify first object in-place (default False)
        reset_dim (bool): Whether to reset dimension coordinates (default True)
    
    Returns:
        InferenceData: Concatenated inference data
    """

def extract(data: InferenceData, *, var_names: list = None, groups: list = None, num_samples: int = None, **kwargs) -> InferenceData:
    """
    Extract subset of data from InferenceData object.
    
    Args:
        data (InferenceData): Source inference data
        var_names (list, optional): Variables to extract
        groups (list, optional): Groups to extract ('posterior', 'prior', etc.)
        num_samples (int, optional): Number of samples to extract
        **kwargs: Additional extraction parameters
    
    Returns:
        InferenceData: Extracted subset of data
    """

def extract_dataset(data: InferenceData, *, var_names: list = None, groups: list = None, num_samples: int = None, **kwargs) -> dict:
    """
    Extract xarray datasets from InferenceData object.
    
    Args:
        data (InferenceData): Source inference data
        var_names (list, optional): Variables to extract
        groups (list, optional): Groups to extract
        num_samples (int, optional): Number of samples to extract
        **kwargs: Additional extraction parameters
    
    Returns:
        dict: Dictionary mapping group names to xarray datasets
    """

Usage Examples

# Concatenate multiple inference runs
idata1 = az.load_arviz_data("centered_eight")
idata2 = az.load_arviz_data("non_centered_eight")
combined = az.concat(idata1, idata2, dim="chain")

# Extract subset of variables
subset = az.extract(idata1, var_names=["mu", "tau"], num_samples=1000)

# Extract specific groups as datasets
datasets = az.extract_dataset(idata1, groups=["posterior", "sample_stats"])

Data Conversion

General Conversion Functions

def convert_to_inference_data(obj, *, group: str = None, coords: dict = None, dims: dict = None, **kwargs) -> InferenceData:
    """
    Convert various objects to InferenceData format.
    
    Args:
        obj: Object to convert (dict, xarray Dataset, numpy array, etc.)
        group (str, optional): Target group name ('posterior', 'prior', etc.)
        coords (dict, optional): Coordinate specifications
        dims (dict, optional): Dimension specifications
        **kwargs: Additional conversion parameters
    
    Returns:
        InferenceData: Converted inference data object
    """

def convert_to_dataset(obj, *, group: str = None, coords: dict = None, dims: dict = None) -> dict:
    """
    Convert objects to xarray Dataset format.
    
    Args:
        obj: Object to convert
        group (str, optional): Target group name
        coords (dict, optional): Coordinate specifications
        dims (dict, optional): Dimension specifications
    
    Returns:
        dict: Dictionary containing xarray datasets
    """

Specialized Conversion Functions

def numpy_to_data_array(ary: np.ndarray, *, var_name: str = "x", coords: dict = None, dims: list = None) -> xr.DataArray:
    """
    Convert numpy array to xarray DataArray.
    
    Args:
        ary (np.ndarray): Input numpy array
        var_name (str): Variable name (default 'x')
        coords (dict, optional): Coordinate specifications
        dims (list, optional): Dimension names
    
    Returns:
        xr.DataArray: Converted data array
    """

def dict_to_dataset(data: dict, *, coords: dict = None, dims: dict = None, **kwargs) -> xr.Dataset:
    """
    Convert dictionary to xarray Dataset.
    
    Args:
        data (dict): Dictionary containing variable data
        coords (dict, optional): Coordinate specifications
        dims (dict, optional): Dimension specifications
        **kwargs: Additional parameters
    
    Returns:
        xr.Dataset: Converted dataset
    """

def pytree_to_dataset(data, *, var_names: list = None, coords: dict = None, dims: dict = None) -> xr.Dataset:
    """
    Convert pytree structure to xarray Dataset.
    
    Args:
        data: Pytree structure (JAX, PyTorch, etc.)
        var_names (list, optional): Variable names
        coords (dict, optional): Coordinate specifications
        dims (dict, optional): Dimension specifications
    
    Returns:
        xr.Dataset: Converted dataset
    """

Usage Examples

import numpy as np

# Convert numpy array to InferenceData
samples = np.random.normal(0, 1, (4, 1000, 10))  # 4 chains, 1000 draws, 10 parameters
idata = az.convert_to_inference_data(samples, group="posterior")

# Convert dictionary to InferenceData
data_dict = {
    "mu": np.random.normal(0, 1, (4, 1000)),
    "sigma": np.random.lognormal(0, 1, (4, 1000))
}
idata = az.convert_to_inference_data(data_dict, group="posterior")

# Convert numpy array to DataArray
arr = np.random.normal(0, 1, (100, 50))
da = az.numpy_to_data_array(arr, var_name="theta", dims=["draw", "parameter"])

File I/O Operations

NetCDF Format

def from_netcdf(filename: str, *, engine: str = None, group_kwargs: dict = None, regex: str = None) -> InferenceData:
    """
    Load InferenceData from NetCDF file.
    
    Args:
        filename (str): Path to NetCDF file
        engine (str, optional): NetCDF engine to use
        group_kwargs (dict, optional): Group-specific loading arguments
        regex (str, optional): Regular expression to filter groups
    
    Returns:
        InferenceData: Loaded inference data
    """

def to_netcdf(data: InferenceData, filename: str, *, groups: list = None, **kwargs):
    """
    Save InferenceData to NetCDF file.
    
    Args:
        data (InferenceData): Inference data to save
        filename (str): Output file path
        groups (list, optional): Groups to save
        **kwargs: Additional saving parameters
    """

JSON Format

def from_json(filename: str) -> InferenceData:
    """
    Load InferenceData from JSON file.
    
    Args:
        filename (str): Path to JSON file
    
    Returns:
        InferenceData: Loaded inference data
    """

def to_json(data: InferenceData, filename: str, *, groups: list = None, **kwargs):
    """
    Save InferenceData to JSON file.
    
    Args:
        data (InferenceData): Inference data to save
        filename (str): Output file path
        groups (list, optional): Groups to save
        **kwargs: Additional saving parameters
    """

Zarr Format

def from_zarr(store, *, groups: list = None, **kwargs) -> InferenceData:
    """
    Load InferenceData from Zarr store.
    
    Args:
        store: Zarr store path or object
        groups (list, optional): Groups to load
        **kwargs: Additional loading parameters
    
    Returns:
        InferenceData: Loaded inference data
    """

def to_zarr(data: InferenceData, store, *, groups: list = None, **kwargs):
    """
    Save InferenceData to Zarr store.
    
    Args:
        data (InferenceData): Inference data to save
        store: Zarr store path or object
        groups (list, optional): Groups to save
        **kwargs: Additional saving parameters
    """

DataTree Integration

def from_datatree(datatree) -> InferenceData:
    """
    Convert xarray DataTree to InferenceData.
    
    Args:
        datatree: xarray DataTree object
    
    Returns:
        InferenceData: Converted inference data
    """

def to_datatree(data: InferenceData) -> object:
    """
    Convert InferenceData to xarray DataTree.
    
    Args:
        data (InferenceData): Inference data to convert
    
    Returns:
        DataTree: Converted datatree object
    """

Usage Examples

# Save and load NetCDF
az.to_netcdf(idata, "my_analysis.nc")
loaded_idata = az.from_netcdf("my_analysis.nc")

# Save and load JSON
az.to_json(idata, "my_analysis.json")
loaded_idata = az.from_json("my_analysis.json")

# Save and load Zarr
az.to_zarr(idata, "my_analysis.zarr")
loaded_idata = az.from_zarr("my_analysis.zarr")

Framework Integration Functions

Dictionary and Basic Conversion

def from_dict(posterior=None, *, posterior_predictive=None, predictions=None, prior=None, prior_predictive=None, observed_data=None, constant_data=None, predictions_constant_data=None, log_likelihood=None, log_prior=None, sample_stats=None, sample_stats_prior=None, **kwargs) -> InferenceData:
    """
    Convert Python dictionaries to InferenceData format.
    
    Args:
        posterior (dict, optional): Posterior samples dictionary
        posterior_predictive (dict, optional): Posterior predictive samples
        predictions (dict, optional): Out of sample predictions
        prior (dict, optional): Prior samples dictionary
        prior_predictive (dict, optional): Prior predictive samples
        observed_data (dict, optional): Observed data dictionary
        constant_data (dict, optional): Model constants dictionary
        predictions_constant_data (dict, optional): Constants for predictions
        log_likelihood (dict, optional): Log likelihood evaluations
        log_prior (dict, optional): Log prior evaluations
        sample_stats (dict, optional): MCMC sample statistics
        sample_stats_prior (dict, optional): Prior sample statistics
        **kwargs: Additional conversion parameters (coords, dims, etc.)
    
    Returns:
        InferenceData: Converted inference data object
    """

Stan Integration

def from_cmdstan(posterior=None, *, posterior_predictive=None, predictions=None, prior=None, prior_predictive=None, observed_data=None, constant_data=None, predictions_constant_data=None, log_likelihood=None, save_warmup=False, **kwargs) -> InferenceData:
    """
    Convert CmdStan output files to InferenceData.
    
    Args:
        posterior (str or list): Path(s) to posterior CSV files
        posterior_predictive (str or list, optional): Path(s) to posterior predictive CSV files
        predictions (str or list, optional): Path(s) to predictions CSV files
        prior (str or list, optional): Path(s) to prior CSV files
        prior_predictive (str or list, optional): Path(s) to prior predictive CSV files
        observed_data (dict, optional): Observed data dictionary  
        constant_data (dict, optional): Model constants dictionary
        predictions_constant_data (dict, optional): Constants for predictions
        log_likelihood (dict, optional): Log likelihood evaluations
        save_warmup (bool): Whether to save warmup samples
        **kwargs: Additional conversion parameters
    
    Returns:
        InferenceData: Converted inference data
    """

def from_cmdstanpy(fit, *, posterior_predictive=None, predictions=None, prior=None, prior_predictive=None, observed_data=None, constant_data=None, predictions_constant_data=None, log_likelihood=None, **kwargs) -> InferenceData:
    """
    Convert CmdStanPy fit results to InferenceData.
    
    Args:
        fit: CmdStanPy fit object
        posterior_predictive (str or array, optional): Posterior predictive samples
        predictions (str or array, optional): Out of sample predictions  
        prior (str or array, optional): Prior samples
        prior_predictive (str or array, optional): Prior predictive samples
        observed_data (dict, optional): Observed data dictionary
        constant_data (dict, optional): Model constants dictionary
        predictions_constant_data (dict, optional): Constants for predictions
        log_likelihood (dict, optional): Log likelihood evaluations
        **kwargs: Additional conversion parameters
    
    Returns:
        InferenceData: Converted inference data
    """

def from_pystan(fit, *, posterior_predictive=None, observed_data=None, constant_data=None, predictions=None, log_likelihood=None, coords=None, dims=None, **kwargs) -> InferenceData:
    """
    Convert PyStan fit results to InferenceData.
    
    Args:
        fit: PyStan fit object (Stan 2.x or 3.x)
        posterior_predictive (str or array, optional): Posterior predictive samples
        observed_data (dict, optional): Observed data dictionary
        constant_data (dict, optional): Model constants dictionary
        predictions (str or array, optional): Out of sample predictions
        log_likelihood (dict, optional): Log likelihood evaluations
        coords (dict, optional): Coordinate specifications
        dims (dict, optional): Dimension specifications
        **kwargs: Additional conversion parameters
    
    Returns:
        InferenceData: Converted inference data
    """

PyMC Integration

def from_pymc(trace=None, *, prior=None, posterior_predictive=None, log_likelihood=None, coords=None, dims=None, model=None, save_warmup=False, **kwargs) -> InferenceData:
    """
    Convert PyMC trace to InferenceData.
    
    Args:
        trace: PyMC MultiTrace or InferenceData object
        prior (dict, optional): Prior samples
        posterior_predictive (dict, optional): Posterior predictive samples
        log_likelihood (dict, optional): Log likelihood evaluations
        coords (dict, optional): Coordinate specifications
        dims (dict, optional): Dimension specifications
        model: PyMC model object
        save_warmup (bool): Whether to save warmup samples
        **kwargs: Additional conversion parameters
    
    Returns:
        InferenceData: Converted inference data
    """

JAX/NumPyro Integration

def from_numpyro(posterior=None, *, prior=None, posterior_predictive=None, predictions=None, constant_data=None, predictions_constant_data=None, observed_data=None, **kwargs) -> InferenceData:
    """
    Convert NumPyro MCMC results to InferenceData.
    
    Args:
        posterior: NumPyro MCMC object or posterior samples dict
        prior (dict, optional): Prior samples dictionary
        posterior_predictive (dict, optional): Posterior predictive samples
        predictions (dict, optional): Out of sample predictions
        constant_data (dict, optional): Model constants dictionary
        predictions_constant_data (dict, optional): Constants for predictions
        observed_data (dict, optional): Observed data dictionary
        **kwargs: Additional conversion parameters
    
    Returns:
        InferenceData: Converted inference data
    """

def from_pyro(posterior=None, *, prior=None, posterior_predictive=None, **kwargs) -> InferenceData:
    """
    Convert Pyro MCMC results to InferenceData.
    
    Args:
        posterior: Pyro MCMC object or posterior samples dict
        prior (dict, optional): Prior samples dictionary  
        posterior_predictive (dict, optional): Posterior predictive samples
        **kwargs: Additional conversion parameters
    
    Returns:
        InferenceData: Converted inference data
    """

def from_pytree(posterior, *, prior=None, posterior_predictive=None, sample_stats=None, observed_data=None, **kwargs) -> InferenceData:
    """
    Convert PyTree structures (JAX, etc.) to InferenceData.
    
    Args:
        posterior: PyTree with posterior samples
        prior (dict, optional): Prior samples pytree
        posterior_predictive (dict, optional): Posterior predictive samples pytree
        sample_stats (dict, optional): Sample statistics pytree
        observed_data (dict, optional): Observed data dictionary
        **kwargs: Additional conversion parameters
    
    Returns:
        InferenceData: Converted inference data
    """

Other Framework Integration

def from_emcee(sampler, *, var_names=None, slices=None, coords=None, dims=None, **kwargs) -> InferenceData:
    """
    Convert emcee sampler results to InferenceData.
    
    Args:
        sampler: emcee EnsembleSampler object
        var_names (list, optional): Variable names for samples
        slices (dict, optional): Slices for multi-dimensional parameters
        coords (dict, optional): Coordinate specifications
        dims (dict, optional): Dimension specifications
        **kwargs: Additional conversion parameters
    
    Returns:
        InferenceData: Converted inference data
    """

def from_pyjags(fit, *, var_names=None, coords=None, dims=None, **kwargs) -> InferenceData:
    """
    Convert PyJAGS results to InferenceData.
    
    Args:
        fit: PyJAGS fit object
        var_names (list, optional): Variable names to extract
        coords (dict, optional): Coordinate specifications  
        dims (dict, optional): Dimension specifications
        **kwargs: Additional conversion parameters
    
    Returns:
        InferenceData: Converted inference data
    """

def from_beanmachine(samples, *, prior=None, posterior_predictive=None, **kwargs) -> InferenceData:
    """
    Convert Meta's Bean Machine samples to InferenceData.
    
    Args:
        samples: Bean Machine posterior samples
        prior (dict, optional): Prior samples
        posterior_predictive (dict, optional): Posterior predictive samples
        **kwargs: Additional conversion parameters
    
    Returns:
        InferenceData: Converted inference data
    """

Framework Integration Usage Examples

# Stan integration
idata = az.from_cmdstanpy(fit, observed_data={"y": y_obs})

# PyMC integration  
with model:
    trace = pm.sample(1000)
idata = az.from_pymc(trace, model=model)

# NumPyro integration
mcmc = MCMC(NUTS(model), num_warmup=500, num_samples=1000)
mcmc.run(rng_key, **data)
idata = az.from_numpyro(mcmc)

# Dictionary conversion
posterior_dict = {"mu": samples_mu, "sigma": samples_sigma}
idata = az.from_dict(posterior=posterior_dict, observed_data={"y": y_obs})

Type Definitions

CoordSpec = Dict[str, List[Any]]
"""Type alias for coordinate specifications in data conversion."""

DimSpec = Dict[str, List[str]]  
"""Type alias for dimension specifications in data conversion."""

Install with Tessl CLI

npx tessl i tessl/pypi-arviz

docs

configuration-management.md

data-operations.md

framework-integrations.md

index.md

performance-utilities.md

statistical-analysis.md

visualization-plotting.md

tile.json