Python interface to CmdStan that provides comprehensive access to the Stan compiler and all Bayesian inference algorithms.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
The core CmdStanModel class that handles Stan program compilation and provides methods for all inference algorithms. This is the primary interface for working with Stan models in CmdStanPy.
The main class for encapsulating Stan models, handling compilation, and providing inference methods.
class CmdStanModel:
def __init__(
self,
model_name=None,
stan_file=None,
exe_file=None,
force_compile=False,
stanc_options=None,
cpp_options=None,
user_header=None,
compile=None
):
"""
Create a CmdStanModel instance.
Parameters:
- model_name (str, optional): Model name for output files
- stan_file (str or PathLike, optional): Path to Stan source file
- exe_file (str or PathLike, optional): Path to compiled executable
- force_compile (bool): Force recompilation of model
- stanc_options (dict, optional): Options for stanc compiler
- cpp_options (dict, optional): Options for C++ compiler
- user_header (str or PathLike, optional): Path to user header file
- compile (bool, optional): Whether to compile on instantiation
"""Basic Usage:
from cmdstanpy import CmdStanModel
# Create model from Stan file
model = CmdStanModel(stan_file="my_model.stan")
# Create model with custom options
model = CmdStanModel(
stan_file="my_model.stan",
stanc_options={"O1": True},
cpp_options={"STAN_THREADS": True}
)Run Hamiltonian Monte Carlo sampling using the No-U-Turn Sampler (NUTS).
def sample(
self,
data=None,
chains=4,
parallel_chains=None,
threads_per_chain=None,
seed=None,
chain_ids=None,
inits=None,
iter_warmup=1000,
iter_sampling=1000,
save_warmup=False,
thin=1,
max_treedepth=10,
metric=None,
step_size=None,
adapt_engaged=True,
adapt_delta=0.8,
adapt_init_phase=15,
adapt_metric_window=25,
adapt_step_size=50,
fixed_param=False,
output_dir=None,
sig_figs=None,
validate_csv=True,
show_console=False,
refresh=None,
time_fmt=None,
timeout=None,
force_one_process_per_chain=None
):
"""
Run MCMC sampling.
Parameters:
- data (dict, str, or PathLike, optional): Model data
- chains (int): Number of chains to run
- parallel_chains (int, optional): Number of chains to run in parallel
- threads_per_chain (int, optional): Threads per chain (requires STAN_THREADS)
- seed (int, optional): Random seed
- chain_ids (list of int, optional): Chain identifiers
- inits (dict, list, str, optional): Initial parameter values
- iter_warmup (int): Number of warmup iterations per chain
- iter_sampling (int): Number of sampling iterations per chain
- save_warmup (bool): Save warmup draws in output
- thin (int): Period between saved samples
- max_treedepth (int): Maximum tree depth for NUTS
- metric (str or array, optional): Mass matrix ("diag_e", "dense_e", or array)
- step_size (float or array, optional): Initial step size
- adapt_engaged (bool): Enable adaptation
- adapt_delta (float): Target acceptance probability
- adapt_init_phase (int): Initial adaptation phase iterations
- adapt_metric_window (int): Metric adaptation window
- adapt_step_size (int): Step size adaptation iterations
- fixed_param (bool): Run with fixed parameters (no sampling)
- output_dir (str or PathLike, optional): Directory for output files
- sig_figs (int, optional): Significant figures in output (1-18)
- validate_csv (bool): Validate CSV output format
- show_console (bool): Display console output
- refresh (int, optional): Progress update frequency
- time_fmt (str, optional): Timestamp format for output files
- timeout (float, optional): Timeout in seconds
- force_one_process_per_chain (bool, optional): Force single process per chain
Returns:
CmdStanMCMC: MCMC results container
"""Usage Example:
# Basic sampling
fit = model.sample(data={"N": 100, "y": [1, 2, 3, ...]})
# Advanced sampling configuration
fit = model.sample(
data=data,
chains=4,
parallel_chains=4,
iter_warmup=2000,
iter_sampling=2000,
adapt_delta=0.95,
max_treedepth=12,
seed=12345
)Run optimization algorithms to find maximum likelihood or maximum a posteriori estimates.
def optimize(
self,
data=None,
seed=None,
inits=None,
algorithm=None,
iter=2000,
jacobian=False,
output_dir=None,
sig_figs=None,
show_console=False,
refresh=None,
time_fmt=None,
timeout=None
):
"""
Run optimization for MLE/MAP estimation.
Parameters:
- data (dict, str, or PathLike, optional): Model data
- seed (int, optional): Random seed
- inits (dict, str, optional): Initial parameter values
- algorithm (str, optional): Optimization algorithm ("lbfgs", "bfgs", "newton")
- iter (int): Maximum number of iterations
- jacobian (bool): Save Jacobian matrix
- output_dir (str or PathLike, optional): Directory for output files
- sig_figs (int, optional): Significant figures in output
- show_console (bool): Display console output
- refresh (int, optional): Progress update frequency
- time_fmt (str, optional): Timestamp format for output files
- timeout (float, optional): Timeout in seconds
Returns:
CmdStanMLE: Optimization results container
"""Usage Example:
# Basic optimization
mle = model.optimize(data=data)
# L-BFGS with custom settings
mle = model.optimize(
data=data,
algorithm="lbfgs",
iter=5000,
jacobian=True,
seed=54321
)Run Automatic Differentiation Variational Inference (ADVI) for approximate posterior inference.
def variational(
self,
data=None,
seed=None,
inits=None,
algorithm=None,
iter=10000,
grad_samples=1,
elbo_samples=100,
eta=1.0,
adapt_engaged=True,
adapt_iter=50,
tol_rel_obj=0.01,
eval_elbo=100,
draws=1000,
output_dir=None,
sig_figs=None,
show_console=False,
refresh=None,
time_fmt=None,
timeout=None,
output_samples=None
):
"""
Run variational inference.
Parameters:
- data (dict, str, or PathLike, optional): Model data
- seed (int, optional): Random seed
- inits (dict, str, optional): Initial parameter values
- algorithm (str, optional): VI algorithm ("meanfield", "fullrank")
- iter (int): Maximum iterations for VI algorithm
- grad_samples (int): Samples per gradient evaluation
- elbo_samples (int): Samples for ELBO evaluation
- eta (float): Learning rate scaling parameter
- adapt_engaged (bool): Enable learning rate adaptation
- adapt_iter (int): Adaptation iterations
- tol_rel_obj (float): Relative objective tolerance for convergence
- eval_elbo (int): ELBO evaluation frequency
- draws (int): Number of posterior draws to generate
- output_dir (str or PathLike, optional): Directory for output files
- sig_figs (int, optional): Significant figures in output
- show_console (bool): Display console output
- refresh (int, optional): Progress update frequency
- time_fmt (str, optional): Timestamp format for output files
- timeout (float, optional): Timeout in seconds
- output_samples (int, optional): Deprecated parameter, use draws instead
Returns:
CmdStanVB: Variational inference results container
"""Run the Pathfinder algorithm for fast approximate inference.
def pathfinder(
self,
data=None,
seed=None,
inits=None,
num_paths=4,
draws=1000,
psis_resample=True,
calculate_lp=True,
max_lbfgs_iters=1000,
num_draws=None,
save_single_paths=False,
output_dir=None,
sig_figs=None,
show_console=False,
refresh=None,
time_fmt=None,
timeout=None,
init_alpha=None,
num_threads=None
):
"""
Run Pathfinder algorithm for variational approximation.
Parameters:
- data (dict, str, or PathLike, optional): Model data
- seed (int, optional): Random seed
- inits (dict, str, optional): Initial parameter values
- num_paths (int): Number of pathfinder paths to run
- draws (int): Total number of draws to return
- psis_resample (bool): Use Pareto smoothed importance sampling resampling
- calculate_lp (bool): Calculate log probability for draws
- max_lbfgs_iters (int): Maximum L-BFGS iterations per path
- num_draws (int, optional): Deprecated, use draws instead
- save_single_paths (bool): Save individual path outputs
- output_dir (str or PathLike, optional): Directory for output files
- sig_figs (int, optional): Significant figures in output
- show_console (bool): Display console output
- refresh (int, optional): Progress update frequency
- time_fmt (str, optional): Timestamp format for output files
- timeout (float, optional): Timeout in seconds
- init_alpha (float, optional): Initial step size for pathfinder
- num_threads (int, optional): Number of threads for pathfinder
Returns:
CmdStanPathfinder: Pathfinder results container
"""Run Laplace approximation around a mode for approximate posterior inference.
def laplace_sample(
self,
data=None,
mode=None,
draws=1000,
jacobian=True,
refresh=100,
output_dir=None,
sig_figs=None,
show_console=False,
time_fmt=None,
timeout=None,
opt_args=None
):
"""
Run Laplace approximation sampling.
Parameters:
- data (dict, str, or PathLike, optional): Model data
- mode (CmdStanMLE, optional): Mode for approximation center (if None, runs optimization first)
- draws (int): Number of draws from approximation
- jacobian (bool): Calculate Jacobian matrix
- refresh (int): Progress update frequency
- output_dir (str or PathLike, optional): Directory for output files
- sig_figs (int, optional): Significant figures in output
- show_console (bool): Display console output
- time_fmt (str, optional): Timestamp format for output files
- timeout (float, optional): Timeout in seconds
- opt_args (dict, optional): Additional optimization arguments if mode is None
Returns:
CmdStanLaplace: Laplace approximation results container
"""Generate additional quantities of interest from existing fit results.
def generate_quantities(
self,
data=None,
previous_fit=None,
seed=None,
parallel_chains=None,
output_dir=None,
sig_figs=None,
show_console=False,
refresh=None,
time_fmt=None,
timeout=None,
mcmc_sample=None
):
"""
Generate quantities from existing fit.
Parameters:
- data (dict, str, or PathLike, optional): Model data (can be different from original fit)
- previous_fit (CmdStanMCMC, CmdStanMLE, CmdStanVB, or CmdStanPathfinder): Existing fit for parameter values
- seed (int, optional): Random seed
- parallel_chains (int, optional): Number of parallel chains
- output_dir (str or PathLike, optional): Directory for output files
- sig_figs (int, optional): Significant figures in output
- show_console (bool): Display console output
- refresh (int, optional): Progress update frequency
- time_fmt (str, optional): Timestamp format for output files
- timeout (float, optional): Timeout in seconds
- mcmc_sample (CmdStanMCMC or list of str, optional): Alternative parameter name for previous_fit
Returns:
CmdStanGQ: Generated quantities results container
"""Calculate log probability and gradients at specific parameter values.
def log_prob(self, params, data=None, jacobian=True, sig_figs=None):
"""
Calculate log probability and gradients.
Parameters:
- params (dict): Parameter values to evaluate
- data (dict, str, or PathLike, optional): Model data
- jacobian (bool): Calculate gradients
- sig_figs (int, optional): Significant figures in output
Returns:
pd.DataFrame: Log probability and gradients
"""def code(self):
"""
Return Stan program as string.
Returns:
str or None: Stan program code if available
"""def exe_info(self):
"""
Get executable information by running with 'info' option.
Returns:
dict: Executable configuration information including compiler options and flags
"""def src_info(self):
"""
Get model information by running stanc with '--info'.
Returns:
dict: Model structure and parameter information including inputs, parameters, and generated quantities
"""Access model metadata and compilation information.
# Properties
model.name # str: Model name
model.stan_file # str or None: Path to Stan source file
model.exe_file # str or None: Path to compiled executable
model.stanc_options # dict: Stanc compiler options
model.cpp_options # dict: C++ compiler options
model.user_header # str or None: User header file path# Different initialization strategies
fit1 = model.sample(data=data, inits=0) # Initialize at zero
fit2 = model.sample(data=data, inits=2) # Initialize with random values
fit3 = model.sample(data=data, inits={"theta": 1.5, "sigma": 0.5}) # Custom values
fit4 = model.sample(data=data, inits="previous_fit.json") # From file# Start with optimization for good initial values
mle = model.optimize(data=data)
# Use MLE as starting point for MCMC
fit = model.sample(
data=data,
inits=mle.optimized_params_dict()
)
# Generate additional quantities
gq = model.generate_quantities(
data=extended_data,
previous_fit=fit
)models = {
"simple": CmdStanModel(stan_file="simple_model.stan"),
"complex": CmdStanModel(stan_file="complex_model.stan")
}
results = {}
for name, model in models.items():
# Pathfinder for fast approximate inference
pf = model.pathfinder(data=data, num_paths=8)
# Use Pathfinder results to initialize MCMC
fit = model.sample(
data=data,
inits=pf.create_inits(chains=4),
chains=4
)
results[name] = fit
# Compare models using LOO or other criteriaInstall with Tessl CLI
npx tessl i tessl/pypi-cmdstanpy