0
# PyMC Model Building and Core Components
1
2
PyMC's modeling framework provides the foundational components for building probabilistic models. The core classes and functions enable model specification, transformation, and manipulation within a coherent probabilistic programming paradigm.
3
4
## Model Context and Management
5
6
### Core Model Class
7
8
```python { .api }
9
from pymc import Model
10
11
class Model:
12
"""
13
Main container for probabilistic models in PyMC.
14
15
Parameters:
16
- name (str, optional): Name of the model
17
- coords (dict, optional): Coordinate specifications for dimensions
18
- check_bounds (bool): Check parameter bounds during sampling (default: True)
19
20
Attributes:
21
- free_RVs (list): List of free random variables
22
- observed_RVs (list): List of observed random variables
23
- deterministics (list): List of deterministic variables
24
- potentials (list): List of potential terms
25
- named_vars (dict): Dictionary of named variables
26
"""
27
28
def __init__(self, name='', coords=None, check_bounds=True):
29
pass
30
31
def __enter__(self):
32
"""Enter model context."""
33
return self
34
35
def __exit__(self, *args):
36
"""Exit model context."""
37
pass
38
39
def compile_logp(self, vars=None, jacobian=True):
40
"""
41
Compile log-probability function.
42
43
Parameters:
44
- vars (list, optional): Variables to include
45
- jacobian (bool): Include Jacobian terms for transforms
46
47
Returns:
48
- logp_func: Compiled log-probability function
49
"""
50
pass
51
52
def compile_fn(self, outs, ins=None, **kwargs):
53
"""
54
Compile PyTensor function from model variables.
55
56
Parameters:
57
- outs: Output variables
58
- ins (list, optional): Input variables
59
- **kwargs: Additional compilation options
60
61
Returns:
62
- compiled_fn: Compiled PyTensor function
63
"""
64
pass
65
66
# Basic model definition
67
with pm.Model() as basic_model:
68
# Model components go here
69
alpha = pm.Normal('alpha', mu=0, sigma=1)
70
beta = pm.Normal('beta', mu=0, sigma=1)
71
sigma = pm.HalfNormal('sigma', sigma=1)
72
73
# Linear combination
74
mu = alpha + beta * x_data
75
76
# Likelihood
77
y_obs = pm.Normal('y_obs', mu=mu, sigma=sigma, observed=y_data)
78
79
# Model with coordinates and naming
80
coords = {
81
'regions': ['north', 'south', 'east', 'west'],
82
'time': range(2000, 2021)
83
}
84
85
with pm.Model(name='regional_model', coords=coords) as model:
86
# Region-specific parameters
87
alpha = pm.Normal('alpha', 0, 1, dims='regions')
88
89
# Time-varying effects
90
beta = pm.GaussianRandomWalk('beta', sigma=0.1, dims='time')
91
```
92
93
### Model Context Functions
94
95
```python { .api }
96
def modelcontext(model=None):
97
"""
98
Get the current model from context or return specified model.
99
100
Parameters:
101
- model (Model, optional): Explicit model to return
102
103
Returns:
104
- model: Current model in context
105
"""
106
107
# Get current model
108
current_model = pm.modelcontext()
109
110
# Access model properties
111
print(f"Free RVs: {len(current_model.free_RVs)}")
112
print(f"Observed RVs: {len(current_model.observed_RVs)}")
113
print(f"Deterministics: {len(current_model.deterministics)}")
114
```
115
116
### Data Management
117
118
```python { .api }
119
def set_data(new_data, model=None, coords=None):
120
"""
121
Update data in existing model.
122
123
Parameters:
124
- new_data (dict): Dictionary of new data values
125
- model (Model, optional): Target model
126
- coords (dict, optional): New coordinate specifications
127
"""
128
129
# Define model with data containers
130
with pm.Model() as dynamic_model:
131
# Data containers that can be updated
132
X_data = pm.Data('X_data', X_train)
133
y_data = pm.Data('y_data', y_train)
134
135
# Model using data
136
beta = pm.Normal('beta', 0, 1, shape=X_train.shape[1])
137
mu = pm.math.dot(X_data, beta)
138
y_pred = pm.Normal('y_pred', mu=mu, sigma=1, observed=y_data)
139
140
# Sample with training data
141
trace = pm.sample()
142
143
# Update for prediction on new data
144
pm.set_data({'X_data': X_test, 'y_data': None})
145
with dynamic_model:
146
post_pred = pm.sample_posterior_predictive(trace)
147
```
148
149
## Random Variables and Distributions
150
151
### Random Variable Creation
152
153
Random variables are created automatically when distributions are instantiated within a model context:
154
155
```python { .api }
156
with pm.Model() as model:
157
# Creates random variable named 'parameter'
158
parameter = pm.Normal('parameter', mu=0, sigma=1)
159
160
# Access random variable properties
161
print(f"Name: {parameter.name}")
162
print(f"Distribution: {parameter.distribution}")
163
print(f"Transform: {parameter.transform}")
164
print(f"Shape: {parameter.shape}")
165
```
166
167
## Deterministic Variables
168
169
### Deterministic Class
170
171
```python { .api }
172
from pymc import Deterministic
173
174
class Deterministic:
175
"""
176
Deterministic transformation of other variables.
177
178
Parameters:
179
- name (str): Variable name
180
- var: PyTensor expression
181
- dims (tuple, optional): Dimension names
182
- model (Model, optional): Model context
183
184
Attributes:
185
- name (str): Variable name
186
- owner: PyTensor variable
187
"""
188
189
def __init__(self, name, var, dims=None, model=None):
190
pass
191
192
# Deterministic transformations
193
with pm.Model() as model:
194
# Parameters
195
alpha = pm.Normal('alpha', 0, 1)
196
beta = pm.Normal('beta', 0, 1)
197
198
# Deterministic combinations
199
linear_pred = pm.Deterministic('linear_pred', alpha + beta * x_data)
200
201
# Transformed parameters
202
log_odds = pm.Deterministic('log_odds', pm.math.log(p / (1 - p)))
203
204
# Complex transformations
205
interaction = pm.Deterministic('interaction',
206
alpha * beta + pm.math.exp(alpha))
207
```
208
209
### Common Deterministic Patterns
210
211
```python { .api }
212
# Centered vs non-centered parameterization
213
with pm.Model() as hierarchical:
214
# Hyperparameters
215
mu_alpha = pm.Normal('mu_alpha', 0, 10)
216
sigma_alpha = pm.HalfNormal('sigma_alpha', 5)
217
218
# Non-centered parameterization (better for sampling)
219
alpha_raw = pm.Normal('alpha_raw', 0, 1, shape=n_groups)
220
alpha = pm.Deterministic('alpha', mu_alpha + sigma_alpha * alpha_raw)
221
222
# Soft constraints via deterministic
223
alpha_constrained = pm.Deterministic('alpha_constrained',
224
pm.math.clip(alpha, -10, 10))
225
```
226
227
## Potential Terms
228
229
### Potential Class
230
231
```python { .api }
232
from pymc import Potential
233
234
class Potential:
235
"""
236
Add arbitrary log-probability terms to the model.
237
238
Parameters:
239
- name (str): Name for the potential
240
- var: PyTensor expression for log-probability
241
- model (Model, optional): Model context
242
"""
243
244
def __init__(self, name, var, model=None):
245
pass
246
247
# Custom priors and constraints
248
with pm.Model() as constrained_model:
249
# Parameters
250
theta = pm.Uniform('theta', 0, 10, shape=3)
251
252
# Custom constraint: sum of theta must be less than 5
253
constraint = pm.Potential('sum_constraint',
254
pm.math.switch(pm.math.sum(theta) < 5, 0, -np.inf))
255
256
# Custom prior that's not available as distribution
257
custom_prior = pm.Potential('custom_prior',
258
-0.5 * pm.math.sum((theta - 2)**4))
259
260
# Soft constraints with penalty
261
soft_constraint = pm.Potential('soft_constraint',
262
-10 * pm.math.maximum(0, pm.math.sum(theta) - 8))
263
```
264
265
## Model Transformations and Operations
266
267
### Causal Interventions
268
269
```python { .api }
270
from pymc.model.transform.conditioning import do
271
272
def do(model, interventions):
273
"""
274
Apply causal interventions to model (do-operator).
275
276
Parameters:
277
- model (Model): Original model
278
- interventions (dict): Variable interventions {var_name: value}
279
280
Returns:
281
- intervened_model: Model with interventions applied
282
"""
283
284
# Causal inference with interventions
285
with pm.Model() as causal_model:
286
# Structural model
287
X = pm.Normal('X', 0, 1)
288
Y = pm.Normal('Y', mu=2*X + 1, sigma=0.5) # Y depends on X
289
Z = pm.Normal('Z', mu=X + Y, sigma=0.3) # Z depends on X and Y
290
291
# Observational model
292
obs_trace = pm.sample()
293
294
# Interventional model: do(X = 1.5)
295
intervened_model = pm.do(causal_model, {'X': 1.5})
296
with intervened_model:
297
# Sample from intervened distribution
298
intervention_trace = pm.sample()
299
```
300
301
### Conditional Models
302
303
```python { .api }
304
from pymc.model.transform.conditioning import observe
305
306
def observe(model, observations):
307
"""
308
Condition model on observed values.
309
310
Parameters:
311
- model (Model): Original model
312
- observations (dict): Observed values {var_name: value}
313
314
Returns:
315
- conditioned_model: Model conditioned on observations
316
"""
317
318
# Conditional inference
319
with pm.Model() as joint_model:
320
X = pm.Normal('X', 0, 1)
321
Y = pm.Normal('Y', mu=X, sigma=0.5)
322
323
# Condition on observed X
324
conditioned_model = pm.observe(joint_model, {'X': observed_x_values})
325
with conditioned_model:
326
# Sample conditional distribution P(Y|X=observed)
327
conditional_trace = pm.sample()
328
```
329
330
## Compilation and Functions
331
332
### Function Compilation
333
334
```python { .api }
335
def compile_fn(outs, ins=None, mode=None, **kwargs):
336
"""
337
Compile PyTensor function from model variables.
338
339
Parameters:
340
- outs: Output variables or expressions
341
- ins (list, optional): Input variables
342
- mode: Compilation mode ('FAST_RUN', 'FAST_COMPILE', etc.)
343
- **kwargs: Additional PyTensor compilation options
344
345
Returns:
346
- compiled_fn: Compiled function
347
"""
348
349
with pm.Model() as model:
350
# Parameters
351
alpha = pm.Normal('alpha', 0, 1)
352
beta = pm.Normal('beta', 0, 1)
353
x = pm.Data('x', x_data)
354
355
# Expression to compile
356
prediction = alpha + beta * x
357
358
# Compile prediction function
359
predict_fn = pm.compile_fn([prediction], [alpha, beta, x])
360
361
# Use compiled function
362
pred_values = predict_fn(alpha_val, beta_val, x_new)
363
364
# Compile log-probability function
365
with model:
366
logp_fn = model.compile_logp()
367
368
# Evaluate log-probability at point
369
test_point = {'alpha': 0.5, 'beta': -1.2}
370
log_prob = logp_fn(test_point)
371
```
372
373
## Advanced Model Patterns
374
375
### Model Factories
376
377
```python { .api }
378
def create_regression_model(X, y, priors=None):
379
"""Factory function for regression models."""
380
381
if priors is None:
382
priors = {'alpha': (0, 10), 'beta': (0, 5), 'sigma': 1}
383
384
with pm.Model() as regression_model:
385
# Priors
386
alpha = pm.Normal('alpha', *priors['alpha'])
387
beta = pm.Normal('beta', *priors['beta'], shape=X.shape[1])
388
sigma = pm.HalfNormal('sigma', priors['sigma'])
389
390
# Linear model
391
mu = alpha + pm.math.dot(X, beta)
392
393
# Likelihood
394
y_obs = pm.Normal('y_obs', mu=mu, sigma=sigma, observed=y)
395
396
return regression_model
397
398
# Create multiple models with different priors
399
model1 = create_regression_model(X_data, y_data)
400
model2 = create_regression_model(X_data, y_data,
401
priors={'alpha': (0, 1), 'beta': (0, 1), 'sigma': 0.5})
402
```
403
404
### Model Composition
405
406
```python { .api }
407
# Combine submodels
408
def create_measurement_model():
409
with pm.Model() as measurement:
410
# Measurement error
411
sigma_m = pm.HalfNormal('sigma_m', 1)
412
return measurement, sigma_m
413
414
def create_process_model():
415
with pm.Model() as process:
416
# Process parameters
417
mu_p = pm.Normal('mu_p', 0, 5)
418
sigma_p = pm.HalfNormal('sigma_p', 2)
419
return process, (mu_p, sigma_p)
420
421
# Combine models
422
measurement_model, sigma_m = create_measurement_model()
423
process_model, (mu_p, sigma_p) = create_process_model()
424
425
with pm.Model() as combined_model:
426
# Include submodel variables
427
sigma_m_combined = pm.HalfNormal('sigma_m', 1)
428
mu_p_combined = pm.Normal('mu_p', 0, 5)
429
sigma_p_combined = pm.HalfNormal('sigma_p', 2)
430
431
# Latent process
432
latent = pm.Normal('latent', mu=mu_p_combined, sigma=sigma_p_combined)
433
434
# Observed measurements
435
observed = pm.Normal('observed', mu=latent, sigma=sigma_m_combined,
436
observed=measurements)
437
```
438
439
### Dynamic Models
440
441
```python { .api }
442
def create_dynamic_model(data_stream):
443
"""Create model that adapts to streaming data."""
444
445
with pm.Model() as dynamic:
446
# Time-varying parameters
447
n_timepoints = len(data_stream)
448
449
# Random walk parameters
450
innovation_sd = pm.HalfNormal('innovation_sd', 0.1)
451
initial_state = pm.Normal('initial_state', 0, 1)
452
453
# State evolution
454
states = [initial_state]
455
for t in range(1, n_timepoints):
456
state_t = pm.Normal(f'state_{t}',
457
mu=states[t-1],
458
sigma=innovation_sd)
459
states.append(state_t)
460
461
# Observations
462
obs_sd = pm.HalfNormal('obs_sd', 1)
463
for t, data_t in enumerate(data_stream):
464
pm.Normal(f'obs_{t}', mu=states[t], sigma=obs_sd,
465
observed=data_t)
466
467
return dynamic
468
```
469
470
## Model Utilities and Introspection
471
472
### Model Analysis
473
474
```python { .api }
475
# Analyze model structure
476
with pm.Model() as analysis_model:
477
# Complex model...
478
479
# Get all variables
480
free_vars = analysis_model.free_RVs
481
observed_vars = analysis_model.observed_RVs
482
deterministic_vars = analysis_model.deterministics
483
484
print("Free variables:")
485
for var in free_vars:
486
print(f" {var.name}: {var.distribution}")
487
488
print("Deterministic variables:")
489
for var in deterministic_vars:
490
print(f" {var.name}: shape {var.shape}")
491
492
# Get variable by name
493
specific_var = analysis_model['parameter_name']
494
495
# Check model dimensions
496
print(f"Model coordinates: {analysis_model.coords}")
497
print(f"Model RNG: {analysis_model.rng}")
498
```
499
500
### Model Validation
501
502
```python { .api }
503
def validate_model(model):
504
"""Validate model structure and parameters."""
505
506
with model:
507
# Check for common issues
508
try:
509
# Compile log-probability function
510
logp_fn = model.compile_logp()
511
512
# Test evaluation at random point
513
test_point = model.initial_point()
514
logp_val = logp_fn(test_point)
515
516
if np.isnan(logp_val) or np.isinf(logp_val):
517
print("Warning: Model has numerical issues")
518
519
print(f"Model log-probability: {logp_val}")
520
521
except Exception as e:
522
print(f"Model validation failed: {e}")
523
524
# Check parameter shapes
525
for var in model.free_RVs:
526
print(f"{var.name}: shape {var.shape}, dtype {var.dtype}")
527
528
# Usage
529
validate_model(my_model)
530
```
531
532
## Point Operations
533
534
### Point Type and Operations
535
536
```python { .api }
537
Point = Dict[str, np.ndarray] # Type alias for parameter points
538
539
# Working with points
540
with pm.Model() as model:
541
alpha = pm.Normal('alpha', 0, 1)
542
beta = pm.Normal('beta', 0, 1, shape=3)
543
544
# Get initial point
545
initial_point = model.initial_point()
546
print(f"Initial point: {initial_point}")
547
548
# Create custom point
549
custom_point = {'alpha': 1.5, 'beta': np.array([0.5, -0.2, 0.8])}
550
551
# Evaluate log-probability at point
552
logp_fn = model.compile_logp()
553
logp_value = logp_fn(custom_point)
554
```
555
556
PyMC's model framework provides a flexible foundation for building complex probabilistic models while maintaining clean separation between model specification, compilation, and inference.