or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data-handling.mddistributions.mdgaussian-processes.mdglm.mdindex.mdmath-functions.mdmodeling.mdsampling.mdstats-plots.mdstep-methods.mdvariational.md

modeling.mddocs/

0

# Model Building and Core Constructs

1

2

PyMC3's model building capabilities provide a flexible and intuitive framework for constructing Bayesian models. The Model class serves as a container that manages random variables, transformations, and the computational graph, enabling automatic inference and efficient computation.

3

4

## Capabilities

5

6

### Model Container

7

8

The central Model class that manages all components of a Bayesian model including random variables, deterministic transformations, and observed data.

9

10

```python { .api }

11

class Model:

12

"""

13

Main model container class for PyMC3 Bayesian models.

14

15

The Model class serves as a context manager that collects random variables,

16

deterministic variables, and potential functions to define a complete

17

Bayesian model for inference.

18

"""

19

20

def __init__(self, name='', model=None):

21

"""

22

Create a new Model.

23

24

Parameters:

25

- name: str, model name for identification

26

- model: Model, parent model for nested models

27

"""

28

29

def Var(self, name, dist, data=None, total_size=None, dims=None):

30

"""

31

Create a random variable within the model.

32

33

Parameters:

34

- name: str, variable name

35

- dist: Distribution, probability distribution

36

- data: array-like, observed data (makes variable observed)

37

- total_size: int, total size for minibatch inference

38

- dims: tuple, dimension names for ArviZ

39

40

Returns:

41

- TensorVariable: model variable

42

"""

43

44

def set_data(self, new_data, model=None):

45

"""

46

Update shared data variables in the model.

47

48

Parameters:

49

- new_data: dict, mapping variable names to new data

50

- model: Model, model context (uses current if None)

51

"""

52

53

@property

54

def logp(self):

55

"""Combined log-probability of all random variables."""

56

57

@property

58

def logpt(self):

59

"""Theano tensor for log-probability computation."""

60

61

@property

62

def free_RVs(self):

63

"""List of free (unobserved) random variables."""

64

65

@property

66

def observed_RVs(self):

67

"""List of observed random variables."""

68

69

@property

70

def deterministics(self):

71

"""List of deterministic variables."""

72

73

@property

74

def potentials(self):

75

"""List of potential functions."""

76

77

def compile_logp(self, vars=None, jacobian=True):

78

"""

79

Compile log-probability function.

80

81

Parameters:

82

- vars: list, variables to include (all free RVs if None)

83

- jacobian: bool, include Jacobian of transformations

84

85

Returns:

86

- function: compiled log-probability function

87

"""

88

89

def compile_dlogp(self, vars=None, jacobian=True):

90

"""

91

Compile gradient of log-probability function.

92

93

Parameters:

94

- vars: list, variables to include (all free RVs if None)

95

- jacobian: bool, include Jacobian of transformations

96

97

Returns:

98

- function: compiled gradient function

99

"""

100

101

def check_test_point(self, test_point=None):

102

"""

103

Check that model can be evaluated at test point.

104

105

Parameters:

106

- test_point: dict, point to test (uses model.initial_point if None)

107

108

Returns:

109

- dict: evaluation results and diagnostics

110

"""

111

```

112

113

### Model Context Management

114

115

Functions for working with model contexts and accessing the current model.

116

117

```python { .api }

118

def modelcontext(model=None):

119

"""

120

Get current model context.

121

122

Parameters:

123

- model: Model, specific model to return (current context if None)

124

125

Returns:

126

- Model: current or specified model

127

128

Raises:

129

- TypeError: if no model is found and none provided

130

"""

131

132

def set_data(new_data, model=None):

133

"""

134

Set new values for shared data variables.

135

136

Parameters:

137

- new_data: dict, mapping from variable names to new data arrays

138

- model: Model, model containing the variables (current context if None)

139

140

Example:

141

with pm.Model() as model:

142

x_data = pm.Data('x_data', np.array([1, 2, 3]))

143

y = pm.Normal('y', mu=x_data, sigma=1)

144

145

# Later update the data

146

pm.set_data({'x_data': np.array([4, 5, 6])}, model=model)

147

"""

148

149

def Point(*args, **kwargs):

150

"""

151

Create a point dictionary for model evaluation.

152

153

Parameters:

154

- args: values in order of model.free_RVs

155

- kwargs: mapping from variable names to values

156

157

Returns:

158

- dict: point dictionary mapping variable names to values

159

"""

160

```

161

162

### Random Variables

163

164

Core random variable classes that represent stochastic components in Bayesian models.

165

166

```python { .api }

167

class FreeRV:

168

"""

169

Free (unobserved) random variable with prior distribution.

170

171

Represents parameters or latent variables that will be inferred

172

from data through MCMC or variational inference.

173

"""

174

175

@property

176

def distribution(self):

177

"""Associated probability distribution."""

178

179

@property

180

def transformed(self):

181

"""Transformed version for unconstrained sampling."""

182

183

@property

184

def tag(self):

185

"""Variable metadata and properties."""

186

187

class ObservedRV:

188

"""

189

Observed random variable representing data or likelihood.

190

191

Links data to the model through a probability distribution,

192

defining the likelihood component of Bayesian inference.

193

"""

194

195

@property

196

def distribution(self):

197

"""Associated probability distribution."""

198

199

@property

200

def observations(self):

201

"""Observed data values."""

202

203

@property

204

def missing_values(self):

205

"""Locations of missing data (if any)."""

206

207

class MultiObservedRV:

208

"""

209

Container for multiple observed random variables.

210

211

Used when multiple related observations share parameters

212

or when vectorizing likelihood computations.

213

"""

214

```

215

216

### Deterministic Variables

217

218

Variables that are deterministic functions of other model variables.

219

220

```python { .api }

221

def Deterministic(name, var, model=None, dims=None):

222

"""

223

Create a deterministic variable from a Theano expression.

224

225

Deterministic variables are functions of other model variables

226

and are tracked for posterior analysis without being sampled directly.

227

228

Parameters:

229

- name: str, variable name

230

- var: TensorVariable, Theano expression defining the transformation

231

- model: Model, model to add variable to (current context if None)

232

- dims: tuple, dimension names for ArviZ integration

233

234

Returns:

235

- TensorVariable: deterministic variable

236

237

Example:

238

with pm.Model() as model:

239

mu = pm.Normal('mu', 0, 1)

240

sigma = pm.HalfNormal('sigma', 1)

241

242

# Deterministic transformation

243

precision = pm.Deterministic('precision', 1 / sigma**2)

244

245

y = pm.Normal('y', mu=mu, tau=precision, observed=data)

246

"""

247

```

248

249

### Potential Functions

250

251

Custom log-likelihood terms for incorporating external information or constraints.

252

253

```python { .api }

254

def Potential(name, var, model=None):

255

"""

256

Add a potential (log-likelihood) term to the model.

257

258

Potentials allow adding arbitrary log-probability terms that are not

259

associated with specific random variables, useful for custom likelihoods,

260

constraints, or incorporating external information.

261

262

Parameters:

263

- name: str, potential name for identification

264

- var: TensorVariable, log-probability expression to add to model

265

- model: Model, model to add potential to (current context if None)

266

267

Returns:

268

- TensorVariable: the potential term

269

270

Example:

271

with pm.Model() as model:

272

theta = pm.Uniform('theta', 0, 1, shape=3)

273

274

# Add constraint that parameters sum to 1

275

constraint = pm.Potential('constraint',

276

tt.switch(tt.abs_(theta.sum() - 1) < 1e-6,

277

0,

278

-np.inf))

279

"""

280

```

281

282

### Factor Variables

283

284

Base class for model components that contribute to log-probability.

285

286

```python { .api }

287

class Factor:

288

"""

289

Base class for model factors (components contributing to log-probability).

290

291

Factors include random variables, deterministic variables, and potentials.

292

They provide a unified interface for model introspection and manipulation.

293

"""

294

295

@property

296

def logp(self):

297

"""Log-probability contribution of this factor."""

298

299

@property

300

def logpt(self):

301

"""Theano tensor for log-probability computation."""

302

```

303

304

### Data Handling

305

306

Classes and functions for managing observed data and minibatch inference.

307

308

```python { .api }

309

class Data:

310

"""

311

Shared data container for observations that can be updated.

312

313

Data objects allow updating observed values without recompiling

314

the model, enabling out-of-sample prediction and minibatch inference.

315

"""

316

317

def __init__(self, name, value, dims=None, export_index_as_dims=False):

318

"""

319

Create a shared data variable.

320

321

Parameters:

322

- name: str, variable name

323

- value: array-like, initial data values

324

- dims: tuple, dimension names for ArviZ

325

- export_index_as_dims: bool, use integer indices as dimension names

326

327

Returns:

328

- TensorSharedVariable: shared data tensor

329

"""

330

331

def set_value(self, new_value):

332

"""Update data values."""

333

334

class Minibatch:

335

"""

336

Minibatch container for stochastic variational inference.

337

338

Automatically handles data subsampling and scaling for minibatch

339

training with variational inference methods.

340

"""

341

342

def __init__(self, data, batch_size=None, dtype=None, broadcastable=None,

343

name=None, random_seed=None, update_shared_f=None,

344

in_memory_slices=None):

345

"""

346

Create minibatch data container.

347

348

Parameters:

349

- data: array-like, full dataset

350

- batch_size: int, size of minibatches

351

- dtype: data type for batches

352

- broadcastable: tuple, broadcastable dimensions

353

- name: str, variable name

354

- random_seed: int, random seed for sampling

355

- update_shared_f: function, custom update function

356

- in_memory_slices: tuple, slices to keep in memory

357

"""

358

359

class GeneratorAdapter:

360

"""

361

Adapter for using Python generators as data sources.

362

363

Enables streaming data processing and infinite data generators

364

for online learning scenarios.

365

"""

366

367

def get_data(filename):

368

"""

369

Load data from file (supports various formats).

370

371

Parameters:

372

- filename: str, path to data file

373

374

Returns:

375

- array: loaded data

376

"""

377

378

def align_minibatches(*minibatch_tensors):

379

"""

380

Align multiple minibatch tensors for synchronized sampling.

381

382

Parameters:

383

- minibatch_tensors: MiniBatch objects to align

384

385

Returns:

386

- list: aligned minibatch tensors

387

"""

388

```

389

390

### Model Compilation and Functions

391

392

Utilities for compiling efficient functions from model components.

393

394

```python { .api }

395

def fn(outs, ins, mode=None, **kwargs):

396

"""

397

Compile Theano function from model variables.

398

399

Parameters:

400

- outs: list, output variables

401

- ins: list, input variables

402

- mode: str, Theano compilation mode

403

- kwargs: additional arguments for theano.function

404

405

Returns:

406

- function: compiled Theano function

407

"""

408

409

def fastfn(outs, ins, mode=None, **kwargs):

410

"""

411

Compile fast Theano function with optimizations.

412

413

Parameters:

414

- outs: list, output variables

415

- ins: list, input variables

416

- mode: str, Theano compilation mode ('FAST_COMPILE' if None)

417

- kwargs: additional arguments for theano.function

418

419

Returns:

420

- function: compiled optimized function

421

"""

422

423

class FastPointFunc:

424

"""

425

Fast function wrapper for point evaluation.

426

427

Provides efficient evaluation of model functions at specific

428

parameter values without full Theano overhead.

429

"""

430

431

class LoosePointFunc:

432

"""

433

Loose function wrapper for flexible point evaluation.

434

435

More flexible than FastPointFunc but potentially slower,

436

handles shape mismatches and missing variables gracefully.

437

"""

438

439

class ValueGradFunction:

440

"""

441

Combined value and gradient function for optimization.

442

443

Efficiently computes both function values and gradients

444

in a single pass, useful for MAP estimation and optimization.

445

"""

446

```

447

448

### Model Visualization

449

450

Tools for visualizing model structure and dependencies.

451

452

```python { .api }

453

def model_to_graphviz(model, var_names=None, formatting='plain',

454

save=None, figsize=(5, 5), dpi=300):

455

"""

456

Convert PyMC3 model to GraphViz representation.

457

458

Creates a directed graph showing relationships between model variables,

459

useful for understanding model structure and debugging complex models.

460

461

Parameters:

462

- model: Model, PyMC3 model to visualize

463

- var_names: list, variables to include (all if None)

464

- formatting: str, node formatting style ('plain', 'fancy')

465

- save: str, filename to save graph (displays if None)

466

- figsize: tuple, figure size in inches

467

- dpi: int, resolution for saved figure

468

469

Returns:

470

- graphviz.Digraph: GraphViz graph object

471

"""

472

```

473

474

## Usage Examples

475

476

### Basic Model Structure

477

478

```python

479

import pymc3 as pm

480

import numpy as np

481

import theano.tensor as tt

482

483

# Basic linear regression model

484

with pm.Model() as basic_model:

485

# Priors

486

alpha = pm.Normal('alpha', mu=0, sigma=10)

487

beta = pm.Normal('beta', mu=0, sigma=10)

488

sigma = pm.HalfNormal('sigma', sigma=1)

489

490

# Linear predictor

491

mu = alpha + beta * x_data

492

493

# Likelihood

494

y_obs = pm.Normal('y_obs', mu=mu, sigma=sigma, observed=y_data)

495

496

# Deterministic quantities for posterior analysis

497

R2 = pm.Deterministic('R2', 1 - tt.var(y_data - mu) / tt.var(y_data))

498

```

499

500

### Hierarchical Model with Groups

501

502

```python

503

# Hierarchical model with group-level effects

504

with pm.Model() as hierarchical_model:

505

# Hyperpriors

506

mu_alpha = pm.Normal('mu_alpha', mu=0, sigma=10)

507

sigma_alpha = pm.HalfNormal('sigma_alpha', sigma=5)

508

509

mu_beta = pm.Normal('mu_beta', mu=0, sigma=10)

510

sigma_beta = pm.HalfNormal('sigma_beta', sigma=5)

511

512

# Group-level parameters

513

alpha = pm.Normal('alpha', mu=mu_alpha, sigma=sigma_alpha, shape=n_groups)

514

beta = pm.Normal('beta', mu=mu_beta, sigma=sigma_beta, shape=n_groups)

515

516

# Observation-level parameters

517

sigma = pm.HalfNormal('sigma', sigma=1)

518

519

# Expected values

520

mu = alpha[group_idx] + beta[group_idx] * x_data

521

522

# Likelihood

523

y_obs = pm.Normal('y_obs', mu=mu, sigma=sigma, observed=y_data)

524

```

525

526

### Model with Custom Likelihood

527

528

```python

529

# Model with custom potential for non-standard likelihood

530

with pm.Model() as custom_model:

531

# Parameters

532

theta = pm.Beta('theta', alpha=1, beta=1)

533

534

# Custom log-likelihood function

535

def logp_custom(obs, theta):

536

# Example: zero-truncated Poisson

537

return tt.sum(obs * tt.log(theta) - theta - tt.gammaln(obs + 1)

538

- tt.log(1 - tt.exp(-theta)))

539

540

# Add custom likelihood as potential

541

likelihood = pm.Potential('likelihood',

542

logp_custom(observed_data, theta))

543

```

544

545

### Model with Shared Data

546

547

```python

548

# Model using shared data for prediction

549

x_shared = pm.Data('x_shared', x_train)

550

y_shared = pm.Data('y_shared', y_train)

551

552

with pm.Model() as prediction_model:

553

# Model parameters

554

alpha = pm.Normal('alpha', mu=0, sigma=10)

555

beta = pm.Normal('beta', mu=0, sigma=10)

556

sigma = pm.HalfNormal('sigma', sigma=1)

557

558

# Linear predictor using shared data

559

mu = alpha + beta * x_shared

560

561

# Likelihood

562

y_obs = pm.Normal('y_obs', mu=mu, sigma=sigma, observed=y_shared)

563

564

# Sample posterior

565

trace = pm.sample(1000, tune=1000)

566

567

# Update data for out-of-sample prediction

568

pm.set_data({'x_shared': x_test}, model=prediction_model)

569

570

# Generate posterior predictions

571

with prediction_model:

572

post_pred = pm.sample_posterior_predictive(trace)

573

```

574

575

### Model with Deterministic Transformations

576

577

```python

578

# Model with multiple deterministic quantities

579

with pm.Model() as transform_model:

580

# Raw parameters

581

raw_effects = pm.Normal('raw_effects', mu=0, sigma=1, shape=5)

582

raw_scale = pm.Normal('raw_scale', mu=0, sigma=1)

583

584

# Transformed parameters

585

effects = pm.Deterministic('effects', raw_effects * 0.5)

586

scale = pm.Deterministic('scale', tt.exp(raw_scale))

587

588

# Derived quantities

589

total_effect = pm.Deterministic('total_effect', tt.sum(effects))

590

effect_variance = pm.Deterministic('effect_variance', tt.var(effects))

591

592

# Model prediction

593

prediction = pm.Deterministic('prediction',

594

tt.dot(x_data, effects))

595

596

# Likelihood

597

y_obs = pm.Normal('y_obs', mu=prediction, sigma=scale,

598

observed=y_data)

599

```

600

601

### Minibatch Model for Large Data

602

603

```python

604

# Model with minibatch training for large datasets

605

batch_size = 100

606

n_data = len(large_dataset)

607

608

# Create minibatch containers

609

x_minibatch = pm.Minibatch(x_large, batch_size=batch_size)

610

y_minibatch = pm.Minibatch(y_large, batch_size=batch_size)

611

612

with pm.Model() as minibatch_model:

613

# Model parameters

614

weights = pm.Normal('weights', mu=0, sigma=1, shape=n_features)

615

intercept = pm.Normal('intercept', mu=0, sigma=10)

616

617

# Scale factor for minibatch

618

scale_factor = n_data // batch_size

619

620

# Linear predictor

621

prediction = intercept + tt.dot(x_minibatch, weights)

622

623

# Scaled likelihood for minibatch

624

y_obs = pm.Normal('y_obs', mu=prediction, sigma=1,

625

observed=y_minibatch,

626

total_size=n_data)

627

```

628

629

### Model Diagnostics and Checking

630

631

```python

632

# Model checking and diagnostics

633

with pm.Model() as diagnostic_model:

634

# Model specification...

635

theta = pm.Beta('theta', alpha=2, beta=2)

636

y_obs = pm.Binomial('y_obs', n=10, p=theta, observed=data)

637

638

# Check model evaluation at test point

639

test_point = diagnostic_model.test_point

640

print("Test point:", test_point)

641

642

# Check log-probability evaluation

643

logp_check = diagnostic_model.check_test_point()

644

print("Log-probability check:", logp_check)

645

646

# Compile log-probability function

647

logp_fn = diagnostic_model.compile_logp()

648

649

# Compile gradient function

650

dlogp_fn = diagnostic_model.compile_dlogp()

651

652

# Test function evaluation

653

logp_val = logp_fn(test_point)

654

grad_val = dlogp_fn(test_point)

655

```