Python interface to CmdStan that provides comprehensive access to the Stan compiler and all Bayesian inference algorithms.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Container for generated quantities computed from existing fit results, enabling post-processing and derived quantity calculation. The CmdStanGQ class allows computation of additional quantities using parameter draws from previous inference runs.
Access generated quantities in multiple formats, similar to MCMC results.
def draws(self, concat_chains=True):
"""
Get generated quantities draws.
Parameters:
- concat_chains (bool): Concatenate chains into single array
Returns:
np.ndarray: Generated quantities draws
"""
def draws_pd(self, vars=None):
"""
Get generated quantities as pandas DataFrame.
Parameters:
- vars (list of str, optional): Specific quantities to include
Returns:
pd.DataFrame: Quantities with names as columns
"""
def draws_xr(self, vars=None):
"""
Get generated quantities as xarray Dataset.
Parameters:
- vars (list of str, optional): Specific quantities to include
Returns:
xr.Dataset: Quantities with coordinate labels
"""def stan_variable(self, var):
"""
Get draws for specific generated quantity.
Parameters:
- var (str): Quantity name
Returns:
np.ndarray: Draws for the quantity
"""
def stan_variables(self):
"""
Get all generated quantities as dictionary.
Returns:
dict: Mapping from quantity names to draw arrays
"""def save_csvfiles(self, dir=None):
"""
Save CSV output files to directory.
Parameters:
- dir (str or PathLike, optional): Target directory
Returns:
None
"""# Chain information (inherited from original fit)
gq.chains # int: Number of chains
gq.chain_ids # List[int]: Chain identifiers
gq.column_names # Tuple[str, ...]: Generated quantity names
gq.metadata # InferenceMetadata: Run configuration and timing
# Original fit reference
gq.mcmc_sample # CmdStanMCMC/CmdStanMLE/CmdStanVB: Original fit object# Original MCMC fit
fit = model.sample(data=data)
# Define generated quantities in Stan model
gq_model = CmdStanModel(stan_file="generated_quantities.stan")
# Generate additional quantities from MCMC draws
gq = gq_model.generate_quantities(
data=data,
previous_fit=fit
)
# Access generated quantities
y_rep = gq.stan_variable("y_rep") # Posterior predictive samples
log_lik = gq.stan_variable("log_lik") # Pointwise log-likelihood
print(f"Posterior predictive mean: {y_rep.mean()}")
print(f"Log-likelihood shape: {log_lik.shape}")# Generate posterior predictive samples
gq = model.generate_quantities(
data=data,
previous_fit=mcmc_fit
)
# Extract predictive samples
y_rep = gq.stan_variable("y_rep")
y_obs = data["y"]
# Compute test statistics
def test_statistic(y):
return np.mean(y)
T_rep = np.array([test_statistic(y_rep[i, j]) for i in range(y_rep.shape[0])
for j in range(y_rep.shape[1])])
T_obs = test_statistic(y_obs)
# Bayesian p-value
p_value = np.mean(T_rep >= T_obs)
print(f"Bayesian p-value: {p_value}")# Generate log-likelihood for LOO-CV
gq = model.generate_quantities(
data=data,
previous_fit=mcmc_fit
)
log_lik = gq.stan_variable("log_lik")
# Use with ArviZ for LOO computation
import arviz as az
inference_data = az.from_cmdstanpy(
mcmc_fit,
log_likelihood={"log_lik": log_lik}
)
loo = az.loo(inference_data, pointwise=True)
print(f"LOO estimate: {loo.loo}")
print(f"Number of problematic points: {(loo.pareto_k > 0.7).sum()}")# K-fold cross-validation using generated quantities
from sklearn.model_selection import KFold
kf = KFold(n_splits=5, shuffle=True, random_state=42)
X, y = data["X"], data["y"]
cv_scores = []
for train_idx, test_idx in kf.split(X):
# Fit on training data
train_data = {"N": len(train_idx), "X": X[train_idx], "y": y[train_idx]}
train_fit = model.sample(data=train_data)
# Generate quantities on test data
test_data = {"N": len(test_idx), "X": X[test_idx], "y": y[test_idx]}
gq = model.generate_quantities(
data=test_data,
previous_fit=train_fit
)
# Compute predictive log-likelihood
log_lik = gq.stan_variable("log_lik")
cv_score = np.mean(log_lik)
cv_scores.append(cv_score)
print(f"CV score: {np.mean(cv_scores)} ± {np.std(cv_scores)}")# Fit model on training data
train_fit = model.sample(data=train_data)
# Predict on new data
new_data = {"N": 50, "X": X_new, "y": np.zeros(50)} # y is placeholder
pred_gq = prediction_model.generate_quantities(
data=new_data,
previous_fit=train_fit
)
# Extract predictions
y_pred = pred_gq.stan_variable("y_pred")
y_pred_mean = y_pred.mean(axis=(0, 1))
y_pred_ci = np.percentile(y_pred, [2.5, 97.5], axis=(0, 1))
print(f"Predicted values: {y_pred_mean}")
print(f"95% credible intervals: {y_pred_ci}")# For hierarchical model with group-level effects
gq = model.generate_quantities(
data=data,
previous_fit=hierarchical_fit
)
# Extract group-level quantities
group_effects = gq.stan_variable("group_effect")
group_predictions = gq.stan_variable("group_pred")
# Compute group-level summaries
for g in range(data["n_groups"]):
effect_mean = group_effects[:, :, g].mean()
pred_mean = group_predictions[:, :, g].mean()
print(f"Group {g}: effect = {effect_mean:.3f}, prediction = {pred_mean:.3f}")# For computationally expensive generated quantities,
# use subset of posterior draws
subset_fit = mcmc_fit
# Manually subset draws if needed
subset_draws = mcmc_fit.draws()[::10] # Every 10th draw
# Generate quantities on subset for faster computation
gq = model.generate_quantities(
data=data,
previous_fit=subset_fit
)# Generate quantities from different types of fits
fits = {
"mcmc": model.sample(data=data),
"vb": model.variational(data=data),
"pathfinder": model.pathfinder(data=data)
}
generated_quantities = {}
for fit_type, fit in fits.items():
gq = model.generate_quantities(data=data, previous_fit=fit)
generated_quantities[fit_type] = gq.stan_variable("quantity_of_interest")
# Compare quantities across inference methods
for fit_type, quantities in generated_quantities.items():
print(f"{fit_type}: mean = {quantities.mean():.3f}, std = {quantities.std():.3f}")Install with Tessl CLI
npx tessl i tessl/pypi-cmdstanpy