Exploratory analysis of Bayesian models with comprehensive data manipulation, statistical diagnostics, and visualization capabilities
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Comprehensive data loading, conversion, and manipulation capabilities supporting multiple Bayesian frameworks and file formats. Create, transform, and manage InferenceData objects with built-in dataset examples and extensive I/O operations.
class InferenceData:
"""
Main data container for Bayesian inference results.
NetCDF-based data structure using xarray groups to organize
posterior samples, prior samples, observed data, diagnostics,
and metadata from Bayesian inference.
Groups:
posterior: MCMC samples from posterior distribution
prior: Samples from prior distribution
observed_data: Observed/input data used in model
posterior_predictive: Samples from posterior predictive distribution
sample_stats: MCMC diagnostics and metadata
log_likelihood: Log likelihood evaluations for model comparison
"""def load_arviz_data(dataset: str, data_home: str = None, **kwargs) -> InferenceData:
"""
Load built-in example datasets for testing and learning.
Args:
dataset (str): Name of dataset to load ('centered_eight', 'non_centered_eight', etc.)
data_home (str, optional): Directory to cache datasets
**kwargs: Additional parameters for data loading
Returns:
InferenceData: Loaded example dataset
"""
def list_datasets() -> list:
"""
List all available built-in datasets.
Returns:
list: Names of available datasets
"""
def clear_data_home(data_home: str = None):
"""
Clear cached datasets from local storage.
Args:
data_home (str, optional): Directory containing cached data
"""import arviz as az
# Load example dataset
idata = az.load_arviz_data("centered_eight")
# List available datasets
datasets = az.list_datasets()
print(datasets)
# Clear cache
az.clear_data_home()def concat(*args, dim: str, copy: bool = True, inplace: bool = False, reset_dim: bool = True) -> InferenceData:
"""
Concatenate multiple InferenceData objects along specified dimension.
Args:
*args: InferenceData objects to concatenate
dim (str): Dimension to concatenate along ('chain', 'draw', etc.)
copy (bool): Whether to copy data (default True)
inplace (bool): Whether to modify first object in-place (default False)
reset_dim (bool): Whether to reset dimension coordinates (default True)
Returns:
InferenceData: Concatenated inference data
"""
def extract(data: InferenceData, *, var_names: list = None, groups: list = None, num_samples: int = None, **kwargs) -> InferenceData:
"""
Extract subset of data from InferenceData object.
Args:
data (InferenceData): Source inference data
var_names (list, optional): Variables to extract
groups (list, optional): Groups to extract ('posterior', 'prior', etc.)
num_samples (int, optional): Number of samples to extract
**kwargs: Additional extraction parameters
Returns:
InferenceData: Extracted subset of data
"""
def extract_dataset(data: InferenceData, *, var_names: list = None, groups: list = None, num_samples: int = None, **kwargs) -> dict:
"""
Extract xarray datasets from InferenceData object.
Args:
data (InferenceData): Source inference data
var_names (list, optional): Variables to extract
groups (list, optional): Groups to extract
num_samples (int, optional): Number of samples to extract
**kwargs: Additional extraction parameters
Returns:
dict: Dictionary mapping group names to xarray datasets
"""# Concatenate multiple inference runs
idata1 = az.load_arviz_data("centered_eight")
idata2 = az.load_arviz_data("non_centered_eight")
combined = az.concat(idata1, idata2, dim="chain")
# Extract subset of variables
subset = az.extract(idata1, var_names=["mu", "tau"], num_samples=1000)
# Extract specific groups as datasets
datasets = az.extract_dataset(idata1, groups=["posterior", "sample_stats"])def convert_to_inference_data(obj, *, group: str = None, coords: dict = None, dims: dict = None, **kwargs) -> InferenceData:
"""
Convert various objects to InferenceData format.
Args:
obj: Object to convert (dict, xarray Dataset, numpy array, etc.)
group (str, optional): Target group name ('posterior', 'prior', etc.)
coords (dict, optional): Coordinate specifications
dims (dict, optional): Dimension specifications
**kwargs: Additional conversion parameters
Returns:
InferenceData: Converted inference data object
"""
def convert_to_dataset(obj, *, group: str = None, coords: dict = None, dims: dict = None) -> dict:
"""
Convert objects to xarray Dataset format.
Args:
obj: Object to convert
group (str, optional): Target group name
coords (dict, optional): Coordinate specifications
dims (dict, optional): Dimension specifications
Returns:
dict: Dictionary containing xarray datasets
"""def numpy_to_data_array(ary: np.ndarray, *, var_name: str = "x", coords: dict = None, dims: list = None) -> xr.DataArray:
"""
Convert numpy array to xarray DataArray.
Args:
ary (np.ndarray): Input numpy array
var_name (str): Variable name (default 'x')
coords (dict, optional): Coordinate specifications
dims (list, optional): Dimension names
Returns:
xr.DataArray: Converted data array
"""
def dict_to_dataset(data: dict, *, coords: dict = None, dims: dict = None, **kwargs) -> xr.Dataset:
"""
Convert dictionary to xarray Dataset.
Args:
data (dict): Dictionary containing variable data
coords (dict, optional): Coordinate specifications
dims (dict, optional): Dimension specifications
**kwargs: Additional parameters
Returns:
xr.Dataset: Converted dataset
"""
def pytree_to_dataset(data, *, var_names: list = None, coords: dict = None, dims: dict = None) -> xr.Dataset:
"""
Convert pytree structure to xarray Dataset.
Args:
data: Pytree structure (JAX, PyTorch, etc.)
var_names (list, optional): Variable names
coords (dict, optional): Coordinate specifications
dims (dict, optional): Dimension specifications
Returns:
xr.Dataset: Converted dataset
"""import numpy as np
# Convert numpy array to InferenceData
samples = np.random.normal(0, 1, (4, 1000, 10)) # 4 chains, 1000 draws, 10 parameters
idata = az.convert_to_inference_data(samples, group="posterior")
# Convert dictionary to InferenceData
data_dict = {
"mu": np.random.normal(0, 1, (4, 1000)),
"sigma": np.random.lognormal(0, 1, (4, 1000))
}
idata = az.convert_to_inference_data(data_dict, group="posterior")
# Convert numpy array to DataArray
arr = np.random.normal(0, 1, (100, 50))
da = az.numpy_to_data_array(arr, var_name="theta", dims=["draw", "parameter"])def from_netcdf(filename: str, *, engine: str = None, group_kwargs: dict = None, regex: str = None) -> InferenceData:
"""
Load InferenceData from NetCDF file.
Args:
filename (str): Path to NetCDF file
engine (str, optional): NetCDF engine to use
group_kwargs (dict, optional): Group-specific loading arguments
regex (str, optional): Regular expression to filter groups
Returns:
InferenceData: Loaded inference data
"""
def to_netcdf(data: InferenceData, filename: str, *, groups: list = None, **kwargs):
"""
Save InferenceData to NetCDF file.
Args:
data (InferenceData): Inference data to save
filename (str): Output file path
groups (list, optional): Groups to save
**kwargs: Additional saving parameters
"""def from_json(filename: str) -> InferenceData:
"""
Load InferenceData from JSON file.
Args:
filename (str): Path to JSON file
Returns:
InferenceData: Loaded inference data
"""
def to_json(data: InferenceData, filename: str, *, groups: list = None, **kwargs):
"""
Save InferenceData to JSON file.
Args:
data (InferenceData): Inference data to save
filename (str): Output file path
groups (list, optional): Groups to save
**kwargs: Additional saving parameters
"""def from_zarr(store, *, groups: list = None, **kwargs) -> InferenceData:
"""
Load InferenceData from Zarr store.
Args:
store: Zarr store path or object
groups (list, optional): Groups to load
**kwargs: Additional loading parameters
Returns:
InferenceData: Loaded inference data
"""
def to_zarr(data: InferenceData, store, *, groups: list = None, **kwargs):
"""
Save InferenceData to Zarr store.
Args:
data (InferenceData): Inference data to save
store: Zarr store path or object
groups (list, optional): Groups to save
**kwargs: Additional saving parameters
"""def from_datatree(datatree) -> InferenceData:
"""
Convert xarray DataTree to InferenceData.
Args:
datatree: xarray DataTree object
Returns:
InferenceData: Converted inference data
"""
def to_datatree(data: InferenceData) -> object:
"""
Convert InferenceData to xarray DataTree.
Args:
data (InferenceData): Inference data to convert
Returns:
DataTree: Converted datatree object
"""# Save and load NetCDF
az.to_netcdf(idata, "my_analysis.nc")
loaded_idata = az.from_netcdf("my_analysis.nc")
# Save and load JSON
az.to_json(idata, "my_analysis.json")
loaded_idata = az.from_json("my_analysis.json")
# Save and load Zarr
az.to_zarr(idata, "my_analysis.zarr")
loaded_idata = az.from_zarr("my_analysis.zarr")def from_dict(posterior=None, *, posterior_predictive=None, predictions=None, prior=None, prior_predictive=None, observed_data=None, constant_data=None, predictions_constant_data=None, log_likelihood=None, log_prior=None, sample_stats=None, sample_stats_prior=None, **kwargs) -> InferenceData:
"""
Convert Python dictionaries to InferenceData format.
Args:
posterior (dict, optional): Posterior samples dictionary
posterior_predictive (dict, optional): Posterior predictive samples
predictions (dict, optional): Out of sample predictions
prior (dict, optional): Prior samples dictionary
prior_predictive (dict, optional): Prior predictive samples
observed_data (dict, optional): Observed data dictionary
constant_data (dict, optional): Model constants dictionary
predictions_constant_data (dict, optional): Constants for predictions
log_likelihood (dict, optional): Log likelihood evaluations
log_prior (dict, optional): Log prior evaluations
sample_stats (dict, optional): MCMC sample statistics
sample_stats_prior (dict, optional): Prior sample statistics
**kwargs: Additional conversion parameters (coords, dims, etc.)
Returns:
InferenceData: Converted inference data object
"""def from_cmdstan(posterior=None, *, posterior_predictive=None, predictions=None, prior=None, prior_predictive=None, observed_data=None, constant_data=None, predictions_constant_data=None, log_likelihood=None, save_warmup=False, **kwargs) -> InferenceData:
"""
Convert CmdStan output files to InferenceData.
Args:
posterior (str or list): Path(s) to posterior CSV files
posterior_predictive (str or list, optional): Path(s) to posterior predictive CSV files
predictions (str or list, optional): Path(s) to predictions CSV files
prior (str or list, optional): Path(s) to prior CSV files
prior_predictive (str or list, optional): Path(s) to prior predictive CSV files
observed_data (dict, optional): Observed data dictionary
constant_data (dict, optional): Model constants dictionary
predictions_constant_data (dict, optional): Constants for predictions
log_likelihood (dict, optional): Log likelihood evaluations
save_warmup (bool): Whether to save warmup samples
**kwargs: Additional conversion parameters
Returns:
InferenceData: Converted inference data
"""
def from_cmdstanpy(fit, *, posterior_predictive=None, predictions=None, prior=None, prior_predictive=None, observed_data=None, constant_data=None, predictions_constant_data=None, log_likelihood=None, **kwargs) -> InferenceData:
"""
Convert CmdStanPy fit results to InferenceData.
Args:
fit: CmdStanPy fit object
posterior_predictive (str or array, optional): Posterior predictive samples
predictions (str or array, optional): Out of sample predictions
prior (str or array, optional): Prior samples
prior_predictive (str or array, optional): Prior predictive samples
observed_data (dict, optional): Observed data dictionary
constant_data (dict, optional): Model constants dictionary
predictions_constant_data (dict, optional): Constants for predictions
log_likelihood (dict, optional): Log likelihood evaluations
**kwargs: Additional conversion parameters
Returns:
InferenceData: Converted inference data
"""
def from_pystan(fit, *, posterior_predictive=None, observed_data=None, constant_data=None, predictions=None, log_likelihood=None, coords=None, dims=None, **kwargs) -> InferenceData:
"""
Convert PyStan fit results to InferenceData.
Args:
fit: PyStan fit object (Stan 2.x or 3.x)
posterior_predictive (str or array, optional): Posterior predictive samples
observed_data (dict, optional): Observed data dictionary
constant_data (dict, optional): Model constants dictionary
predictions (str or array, optional): Out of sample predictions
log_likelihood (dict, optional): Log likelihood evaluations
coords (dict, optional): Coordinate specifications
dims (dict, optional): Dimension specifications
**kwargs: Additional conversion parameters
Returns:
InferenceData: Converted inference data
"""def from_pymc(trace=None, *, prior=None, posterior_predictive=None, log_likelihood=None, coords=None, dims=None, model=None, save_warmup=False, **kwargs) -> InferenceData:
"""
Convert PyMC trace to InferenceData.
Args:
trace: PyMC MultiTrace or InferenceData object
prior (dict, optional): Prior samples
posterior_predictive (dict, optional): Posterior predictive samples
log_likelihood (dict, optional): Log likelihood evaluations
coords (dict, optional): Coordinate specifications
dims (dict, optional): Dimension specifications
model: PyMC model object
save_warmup (bool): Whether to save warmup samples
**kwargs: Additional conversion parameters
Returns:
InferenceData: Converted inference data
"""def from_numpyro(posterior=None, *, prior=None, posterior_predictive=None, predictions=None, constant_data=None, predictions_constant_data=None, observed_data=None, **kwargs) -> InferenceData:
"""
Convert NumPyro MCMC results to InferenceData.
Args:
posterior: NumPyro MCMC object or posterior samples dict
prior (dict, optional): Prior samples dictionary
posterior_predictive (dict, optional): Posterior predictive samples
predictions (dict, optional): Out of sample predictions
constant_data (dict, optional): Model constants dictionary
predictions_constant_data (dict, optional): Constants for predictions
observed_data (dict, optional): Observed data dictionary
**kwargs: Additional conversion parameters
Returns:
InferenceData: Converted inference data
"""
def from_pyro(posterior=None, *, prior=None, posterior_predictive=None, **kwargs) -> InferenceData:
"""
Convert Pyro MCMC results to InferenceData.
Args:
posterior: Pyro MCMC object or posterior samples dict
prior (dict, optional): Prior samples dictionary
posterior_predictive (dict, optional): Posterior predictive samples
**kwargs: Additional conversion parameters
Returns:
InferenceData: Converted inference data
"""
def from_pytree(posterior, *, prior=None, posterior_predictive=None, sample_stats=None, observed_data=None, **kwargs) -> InferenceData:
"""
Convert PyTree structures (JAX, etc.) to InferenceData.
Args:
posterior: PyTree with posterior samples
prior (dict, optional): Prior samples pytree
posterior_predictive (dict, optional): Posterior predictive samples pytree
sample_stats (dict, optional): Sample statistics pytree
observed_data (dict, optional): Observed data dictionary
**kwargs: Additional conversion parameters
Returns:
InferenceData: Converted inference data
"""def from_emcee(sampler, *, var_names=None, slices=None, coords=None, dims=None, **kwargs) -> InferenceData:
"""
Convert emcee sampler results to InferenceData.
Args:
sampler: emcee EnsembleSampler object
var_names (list, optional): Variable names for samples
slices (dict, optional): Slices for multi-dimensional parameters
coords (dict, optional): Coordinate specifications
dims (dict, optional): Dimension specifications
**kwargs: Additional conversion parameters
Returns:
InferenceData: Converted inference data
"""
def from_pyjags(fit, *, var_names=None, coords=None, dims=None, **kwargs) -> InferenceData:
"""
Convert PyJAGS results to InferenceData.
Args:
fit: PyJAGS fit object
var_names (list, optional): Variable names to extract
coords (dict, optional): Coordinate specifications
dims (dict, optional): Dimension specifications
**kwargs: Additional conversion parameters
Returns:
InferenceData: Converted inference data
"""
def from_beanmachine(samples, *, prior=None, posterior_predictive=None, **kwargs) -> InferenceData:
"""
Convert Meta's Bean Machine samples to InferenceData.
Args:
samples: Bean Machine posterior samples
prior (dict, optional): Prior samples
posterior_predictive (dict, optional): Posterior predictive samples
**kwargs: Additional conversion parameters
Returns:
InferenceData: Converted inference data
"""# Stan integration
idata = az.from_cmdstanpy(fit, observed_data={"y": y_obs})
# PyMC integration
with model:
trace = pm.sample(1000)
idata = az.from_pymc(trace, model=model)
# NumPyro integration
mcmc = MCMC(NUTS(model), num_warmup=500, num_samples=1000)
mcmc.run(rng_key, **data)
idata = az.from_numpyro(mcmc)
# Dictionary conversion
posterior_dict = {"mu": samples_mu, "sigma": samples_sigma}
idata = az.from_dict(posterior=posterior_dict, observed_data={"y": y_obs})CoordSpec = Dict[str, List[Any]]
"""Type alias for coordinate specifications in data conversion."""
DimSpec = Dict[str, List[str]]
"""Type alias for dimension specifications in data conversion."""Install with Tessl CLI
npx tessl i tessl/pypi-arviz