tessl/pypi-cmdstanpy

Python interface to CmdStan that provides comprehensive access to the Stan compiler and all Bayesian inference algorithms.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Advanced Variational Methods

Name: tessl/pypi-cmdstanpy
Author: tessl

Containers for advanced variational approximation methods including Pathfinder algorithm and Laplace approximation. These methods provide fast approximate inference alternatives to full MCMC.

Capabilities

CmdStanPathfinder

Container for Pathfinder algorithm results, which provides fast variational approximation using multiple optimization paths.

class CmdStanPathfinder:
    def draws(self):
        """
        Get approximate posterior draws from Pathfinder.
        
        Returns:
        np.ndarray: Posterior draws (draws, parameters)
        """

    def stan_variable(self, var):
        """
        Get draws for specific Stan variable.
        
        Parameters:
        - var (str): Variable name
        
        Returns:
        np.ndarray: Draws for the variable
        """

    def stan_variables(self):
        """
        Get all Stan variables as dictionary.
        
        Returns:
        dict: Mapping from variable names to draw arrays
        """

    def method_variables(self):
        """
        Get Pathfinder diagnostic variables.
        
        Returns:
        dict: Diagnostic information from Pathfinder runs
        """

    def create_inits(self, seed=None, chains=4):
        """
        Create initial values by sampling from Pathfinder approximation.
        
        Parameters:
        - seed (int, optional): Random seed
        - chains (int): Number of initial value sets to create
        
        Returns:
        list or dict: Initial value dictionaries for MCMC chains
        """

    def save_csvfiles(self, dir=None):
        """
        Save CSV output files to directory.
        
        Parameters:
        - dir (str or PathLike, optional): Target directory
        
        Returns:
        None
        """

Pathfinder Properties:

pathfinder.metadata        # InferenceMetadata: Run configuration and timing
pathfinder.column_names    # Tuple[str, ...]: Parameter names
pathfinder.is_resampled    # bool: Whether PSIS resampling was used

Pathfinder Usage Examples:

# Run Pathfinder with multiple paths
pf = model.pathfinder(data=data, num_paths=8, draws=2000)

# Use for approximate inference
theta_approx = pf.stan_variable("theta")
print(f"Approximate posterior mean: {theta_approx.mean()}")

# Create initial values for MCMC
inits = pf.create_inits(seed=12345, chains=4)
mcmc_fit = model.sample(data=data, inits=inits, chains=4)

# Check diagnostics
diagnostics = pf.method_variables()
print("Pathfinder diagnostics:", diagnostics.keys())

CmdStanLaplace

Container for Laplace approximation results, providing multivariate normal approximation around a posterior mode.

class CmdStanLaplace:
    def draws(self):
        """
        Get approximate posterior draws from Laplace approximation.
        
        Returns:
        np.ndarray: Posterior draws (draws, parameters)
        """

    def draws_pd(self, vars=None):
        """
        Get draws as pandas DataFrame.
        
        Parameters:
        - vars (list of str, optional): Specific variables to include
        
        Returns:
        pd.DataFrame: Draws with parameter names
        """

    def draws_xr(self, vars=None):
        """
        Get draws as xarray Dataset.
        
        Parameters:
        - vars (list of str, optional): Specific variables to include
        
        Returns:
        xr.Dataset: Draws with coordinate labels
        """

    def stan_variable(self, var):
        """
        Get draws for specific Stan variable.
        
        Parameters:
        - var (str): Variable name
        
        Returns:
        np.ndarray: Draws for the variable
        """

    def stan_variables(self):
        """
        Get all Stan variables as dictionary.
        
        Returns:
        dict: Mapping from variable names to draw arrays
        """

    def method_variables(self):
        """
        Get diagnostic variables.
        
        Returns:
        dict: Diagnostic information including Jacobian
        """

    def save_csvfiles(self, dir=None):
        """
        Save CSV output files to directory.
        
        Parameters:
        - dir (str or PathLike, optional): Target directory
        
        Returns:
        None
        """

Laplace Properties:

laplace.metadata        # InferenceMetadata: Run configuration and timing
laplace.column_names    # Tuple[str, ...]: Parameter names
laplace.mode           # CmdStanMLE: Mode around which approximation is centered

Laplace Usage Examples:

# First find the mode
mle = model.optimize(data=data)

# Run Laplace approximation around the mode
laplace = model.laplace_sample(data=data, mode=mle, draws=1000)

# Access approximation
theta_laplace = laplace.stan_variable("theta")
print(f"Laplace approximation mean: {theta_laplace.mean()}")

# Get mode information
mode_params = laplace.mode.optimized_params_dict()
print(f"Mode at: {mode_params}")

# Compare with xarray format
draws_xr = laplace.draws_xr()
print("Laplace draws structure:", draws_xr)

Advanced Usage Patterns

Pathfinder for MCMC Initialization

# Use Pathfinder to generate good initial values
pf = model.pathfinder(data=data, num_paths=8)

# Create multiple initialization strategies
inits_list = []
for i in range(4):  # 4 chains
    inits_list.append(pf.create_inits(seed=i, chains=1))

# Run MCMC with Pathfinder initialization
mcmc = model.sample(
    data=data, 
    inits=inits_list,
    chains=4,
    iter_warmup=500,  # May need less warmup with good inits
    iter_sampling=1000
)

Approximation Quality Assessment

# Compare Pathfinder, Laplace, and MCMC
pf = model.pathfinder(data=data, num_paths=4, draws=2000)
mle = model.optimize(data=data)
laplace = model.laplace_sample(data=data, mode=mle, draws=2000)
mcmc = model.sample(data=data, chains=4, iter_sampling=500)

# Compare posterior means
theta_pf = pf.stan_variable("theta").mean()
theta_laplace = laplace.stan_variable("theta").mean()
theta_mcmc = mcmc.stan_variable("theta").mean(axis=(0,1))

print(f"Pathfinder mean: {theta_pf}")
print(f"Laplace mean: {theta_laplace}")
print(f"MCMC mean: {theta_mcmc}")

# Compare uncertainties
print(f"Pathfinder std: {pf.stan_variable('theta').std()}")
print(f"Laplace std: {laplace.stan_variable('theta').std()}")
print(f"MCMC std: {mcmc.stan_variable('theta').std()}")

Hierarchical Workflow

# Stage 1: Fast approximation with Pathfinder
pf = model.pathfinder(data=data, num_paths=8)

# Stage 2: Refine with Laplace if unimodal
mle = model.optimize(data=data, inits=pf.create_inits(chains=1))
laplace = model.laplace_sample(data=data, mode=mle, draws=1000)

# Stage 3: Full MCMC for final inference (if needed)
mcmc = model.sample(
    data=data,
    inits=laplace.stan_variables(),  # Use Laplace mean as init
    chains=4
)

# Compare computational costs and accuracy
print("Pathfinder time:", pf.metadata.time_total)
print("Laplace time:", laplace.metadata.time_total)  
print("MCMC time:", mcmc.metadata.time_total)

Model Selection with Approximations

models = {
    "simple": CmdStanModel(stan_file="simple.stan"),
    "complex": CmdStanModel(stan_file="complex.stan")
}

# Fast model comparison using Pathfinder
results = {}
for name, model in models.items():
    pf = model.pathfinder(data=data, num_paths=4)
    
    # Estimate log marginal likelihood (approximate)
    log_lik = pf.method_variables().get("lp__", np.array([]))
    if len(log_lik) > 0:
        results[name] = {
            "pathfinder": pf,
            "approx_log_ml": np.mean(log_lik)
        }

# Select best model for full analysis
best_model = max(results.keys(), key=lambda k: results[k]["approx_log_ml"])
print(f"Best model (approximate): {best_model}")

# Run full MCMC on selected model
final_fit = models[best_model].sample(data=data)

Install with Tessl CLI