0
# Generated Quantities
1
2
Container for generated quantities computed from existing fit results, enabling post-processing and derived quantity calculation. The CmdStanGQ class allows computation of additional quantities using parameter draws from previous inference runs.
3
4
## Capabilities
5
6
### Data Access
7
8
Access generated quantities in multiple formats, similar to MCMC results.
9
10
```python { .api }
11
def draws(self, concat_chains=True):
12
"""
13
Get generated quantities draws.
14
15
Parameters:
16
- concat_chains (bool): Concatenate chains into single array
17
18
Returns:
19
np.ndarray: Generated quantities draws
20
"""
21
22
def draws_pd(self, vars=None):
23
"""
24
Get generated quantities as pandas DataFrame.
25
26
Parameters:
27
- vars (list of str, optional): Specific quantities to include
28
29
Returns:
30
pd.DataFrame: Quantities with names as columns
31
"""
32
33
def draws_xr(self, vars=None):
34
"""
35
Get generated quantities as xarray Dataset.
36
37
Parameters:
38
- vars (list of str, optional): Specific quantities to include
39
40
Returns:
41
xr.Dataset: Quantities with coordinate labels
42
"""
43
```
44
45
### Variable Access
46
47
```python { .api }
48
def stan_variable(self, var):
49
"""
50
Get draws for specific generated quantity.
51
52
Parameters:
53
- var (str): Quantity name
54
55
Returns:
56
np.ndarray: Draws for the quantity
57
"""
58
59
def stan_variables(self):
60
"""
61
Get all generated quantities as dictionary.
62
63
Returns:
64
dict: Mapping from quantity names to draw arrays
65
"""
66
```
67
68
### File Operations
69
70
```python { .api }
71
def save_csvfiles(self, dir=None):
72
"""
73
Save CSV output files to directory.
74
75
Parameters:
76
- dir (str or PathLike, optional): Target directory
77
78
Returns:
79
None
80
"""
81
```
82
83
## Properties
84
85
```python { .api }
86
# Chain information (inherited from original fit)
87
gq.chains # int: Number of chains
88
gq.chain_ids # List[int]: Chain identifiers
89
gq.column_names # Tuple[str, ...]: Generated quantity names
90
gq.metadata # InferenceMetadata: Run configuration and timing
91
92
# Original fit reference
93
gq.mcmc_sample # CmdStanMCMC/CmdStanMLE/CmdStanVB: Original fit object
94
```
95
96
## Usage Examples
97
98
### Basic Generated Quantities
99
100
```python
101
# Original MCMC fit
102
fit = model.sample(data=data)
103
104
# Define generated quantities in Stan model
105
gq_model = CmdStanModel(stan_file="generated_quantities.stan")
106
107
# Generate additional quantities from MCMC draws
108
gq = gq_model.generate_quantities(
109
data=data,
110
previous_fit=fit
111
)
112
113
# Access generated quantities
114
y_rep = gq.stan_variable("y_rep") # Posterior predictive samples
115
log_lik = gq.stan_variable("log_lik") # Pointwise log-likelihood
116
117
print(f"Posterior predictive mean: {y_rep.mean()}")
118
print(f"Log-likelihood shape: {log_lik.shape}")
119
```
120
121
### Posterior Predictive Checking
122
123
```python
124
# Generate posterior predictive samples
125
gq = model.generate_quantities(
126
data=data,
127
previous_fit=mcmc_fit
128
)
129
130
# Extract predictive samples
131
y_rep = gq.stan_variable("y_rep")
132
y_obs = data["y"]
133
134
# Compute test statistics
135
def test_statistic(y):
136
return np.mean(y)
137
138
T_rep = np.array([test_statistic(y_rep[i, j]) for i in range(y_rep.shape[0])
139
for j in range(y_rep.shape[1])])
140
T_obs = test_statistic(y_obs)
141
142
# Bayesian p-value
143
p_value = np.mean(T_rep >= T_obs)
144
print(f"Bayesian p-value: {p_value}")
145
```
146
147
### Model Comparison with LOO
148
149
```python
150
# Generate log-likelihood for LOO-CV
151
gq = model.generate_quantities(
152
data=data,
153
previous_fit=mcmc_fit
154
)
155
156
log_lik = gq.stan_variable("log_lik")
157
158
# Use with ArviZ for LOO computation
159
import arviz as az
160
161
inference_data = az.from_cmdstanpy(
162
mcmc_fit,
163
log_likelihood={"log_lik": log_lik}
164
)
165
166
loo = az.loo(inference_data, pointwise=True)
167
print(f"LOO estimate: {loo.loo}")
168
print(f"Number of problematic points: {(loo.pareto_k > 0.7).sum()}")
169
```
170
171
### Cross-Validation
172
173
```python
174
# K-fold cross-validation using generated quantities
175
from sklearn.model_selection import KFold
176
177
kf = KFold(n_splits=5, shuffle=True, random_state=42)
178
X, y = data["X"], data["y"]
179
180
cv_scores = []
181
for train_idx, test_idx in kf.split(X):
182
# Fit on training data
183
train_data = {"N": len(train_idx), "X": X[train_idx], "y": y[train_idx]}
184
train_fit = model.sample(data=train_data)
185
186
# Generate quantities on test data
187
test_data = {"N": len(test_idx), "X": X[test_idx], "y": y[test_idx]}
188
gq = model.generate_quantities(
189
data=test_data,
190
previous_fit=train_fit
191
)
192
193
# Compute predictive log-likelihood
194
log_lik = gq.stan_variable("log_lik")
195
cv_score = np.mean(log_lik)
196
cv_scores.append(cv_score)
197
198
print(f"CV score: {np.mean(cv_scores)} ± {np.std(cv_scores)}")
199
```
200
201
### Out-of-Sample Prediction
202
203
```python
204
# Fit model on training data
205
train_fit = model.sample(data=train_data)
206
207
# Predict on new data
208
new_data = {"N": 50, "X": X_new, "y": np.zeros(50)} # y is placeholder
209
pred_gq = prediction_model.generate_quantities(
210
data=new_data,
211
previous_fit=train_fit
212
)
213
214
# Extract predictions
215
y_pred = pred_gq.stan_variable("y_pred")
216
y_pred_mean = y_pred.mean(axis=(0, 1))
217
y_pred_ci = np.percentile(y_pred, [2.5, 97.5], axis=(0, 1))
218
219
print(f"Predicted values: {y_pred_mean}")
220
print(f"95% credible intervals: {y_pred_ci}")
221
```
222
223
### Hierarchical Model Post-Processing
224
225
```python
226
# For hierarchical model with group-level effects
227
gq = model.generate_quantities(
228
data=data,
229
previous_fit=hierarchical_fit
230
)
231
232
# Extract group-level quantities
233
group_effects = gq.stan_variable("group_effect")
234
group_predictions = gq.stan_variable("group_pred")
235
236
# Compute group-level summaries
237
for g in range(data["n_groups"]):
238
effect_mean = group_effects[:, :, g].mean()
239
pred_mean = group_predictions[:, :, g].mean()
240
print(f"Group {g}: effect = {effect_mean:.3f}, prediction = {pred_mean:.3f}")
241
```
242
243
## Advanced Patterns
244
245
### Efficient Quantity Computation
246
247
```python
248
# For computationally expensive generated quantities,
249
# use subset of posterior draws
250
subset_fit = mcmc_fit
251
# Manually subset draws if needed
252
subset_draws = mcmc_fit.draws()[::10] # Every 10th draw
253
254
# Generate quantities on subset for faster computation
255
gq = model.generate_quantities(
256
data=data,
257
previous_fit=subset_fit
258
)
259
```
260
261
### Combining Multiple Fits
262
263
```python
264
# Generate quantities from different types of fits
265
fits = {
266
"mcmc": model.sample(data=data),
267
"vb": model.variational(data=data),
268
"pathfinder": model.pathfinder(data=data)
269
}
270
271
generated_quantities = {}
272
for fit_type, fit in fits.items():
273
gq = model.generate_quantities(data=data, previous_fit=fit)
274
generated_quantities[fit_type] = gq.stan_variable("quantity_of_interest")
275
276
# Compare quantities across inference methods
277
for fit_type, quantities in generated_quantities.items():
278
print(f"{fit_type}: mean = {quantities.mean():.3f}, std = {quantities.std():.3f}")
279
```