or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data.mddistributions.mdgp.mdindex.mdmath.mdmodel.mdode.mdsampling.mdstats.mdvariational.md

sampling.mddocs/

0

# PyMC Sampling and Inference

1

2

PyMC provides a comprehensive suite of sampling and inference methods for Bayesian analysis. The library supports Markov Chain Monte Carlo (MCMC), variational inference, sequential Monte Carlo, and predictive sampling with automatic tuning and diagnostics.

3

4

## Main Sampling Interface

5

6

### Primary Sampling Function

7

8

```python { .api }

9

import pymc as pm

10

11

def sample(draws=1000, tune=1000, chains=None, cores=None,

12

random_seed=None, progressbar=True, step=None,

13

nuts_sampler='nutpie', initvals=None, init='auto',

14

jitter_max_retries=10, n_init=200000, trace=None,

15

discard_tuned_samples=True, compute_convergence_checks=True,

16

keep_warning_stat=False, return_inferencedata=True,

17

idata_kwargs=None, callback=None, mp_ctx=None, **kwargs):

18

"""

19

Draw samples from the posterior distribution using MCMC.

20

21

Parameters:

22

- draws (int): Number of samples to draw (default: 1000)

23

- tune (int): Number of tuning samples (default: 1000)

24

- chains (int): Number of chains (default: auto)

25

- cores (int): Number of parallel processes (default: auto)

26

- random_seed (int): Random seed for reproducibility

27

- step (step method): Custom step method (default: auto-select)

28

- init (str): Initialization method ('auto', 'adapt_diag', 'jitter+adapt_diag')

29

- nuts_sampler (str): NUTS implementation ('nutpie', 'pymc')

30

- return_inferencedata (bool): Return ArviZ InferenceData object

31

32

Returns:

33

- trace: MCMC samples as InferenceData or MultiTrace

34

"""

35

36

# Basic sampling

37

with pm.Model() as model:

38

# Define model...

39

trace = pm.sample()

40

41

# Custom sampling configuration

42

trace = pm.sample(

43

draws=2000, # More samples

44

tune=1500, # More tuning

45

chains=4, # Parallel chains

46

cores=4, # Use 4 cores

47

target_accept=0.9, # Higher acceptance rate

48

max_treedepth=12 # Deeper trees for complex models

49

)

50

```

51

52

### Initialization Methods

53

54

```python { .api }

55

def init_nuts(init='auto', chains=None, n_init=200000, model=None,

56

random_seed=None, progressbar=True, jitter_max_retries=10,

57

tune=None, initvals=None, **kwargs):

58

"""

59

Initialize NUTS sampler with optimal step size and mass matrix.

60

61

Parameters:

62

- init (str): Initialization strategy

63

- 'auto': Automatic initialization

64

- 'adapt_diag': Adapt diagonal mass matrix

65

- 'jitter+adapt_diag': Add jitter + adapt diagonal

66

- 'adapt_full': Adapt full mass matrix

67

- chains (int): Number of chains to initialize

68

- n_init (int): Number of initialization samples

69

- initvals (dict): Initial parameter values

70

71

Returns:

72

- step: Initialized NUTS step method

73

- potential: Model potential function

74

"""

75

76

# Initialize NUTS with custom settings

77

step, potential = pm.init_nuts(

78

init='adapt_full', # Full mass matrix adaptation

79

n_init=500000, # More initialization samples

80

chains=4

81

)

82

83

# Use initialized sampler

84

trace = pm.sample(step=step)

85

```

86

87

## Step Methods

88

89

### NUTS (No-U-Turn Sampler)

90

91

The default and most efficient sampler for continuous variables:

92

93

```python { .api }

94

from pymc.step_methods import NUTS

95

96

class NUTS:

97

"""

98

No-U-Turn Sampler for continuous variables.

99

100

Parameters:

101

- vars (list): Variables to sample (default: all continuous)

102

- target_accept (float): Target acceptance probability (default: 0.8)

103

- max_treedepth (int): Maximum tree depth (default: 10)

104

- step_scale (float): Initial step size scaling

105

- is_cov (array): Covariance matrix for mass matrix

106

"""

107

108

def __init__(self, vars=None, target_accept=0.8, max_treedepth=10, **kwargs):

109

pass

110

111

# Custom NUTS configuration

112

nuts_step = pm.NUTS(

113

vars=[param1, param2], # Specific variables

114

target_accept=0.95, # High acceptance rate

115

max_treedepth=15 # Deep trees for complex geometry

116

)

117

118

trace = pm.sample(step=nuts_step)

119

```

120

121

### Hamiltonian Monte Carlo

122

123

```python { .api }

124

from pymc.step_methods import HamiltonianMC

125

126

class HamiltonianMC:

127

"""

128

Hamiltonian Monte Carlo sampler.

129

130

Parameters:

131

- vars (list): Variables to sample

132

- path_length (float): Length of Hamiltonian trajectory

133

- step_rand (function): Step size randomization function

134

- step_scale (float): Step size scaling factor

135

"""

136

137

# HMC with fixed trajectory length

138

hmc_step = pm.HamiltonianMC(vars=continuous_vars, path_length=2.0)

139

```

140

141

### Metropolis Samplers

142

143

```python { .api }

144

from pymc.step_methods import (Metropolis, BinaryMetropolis,

145

CategoricalGibbsMetropolis, DEMetropolis)

146

147

# Standard Metropolis-Hastings

148

metropolis = pm.Metropolis(vars=param, proposal_dist=pm.NormalProposal)

149

150

# For binary variables

151

binary_metropolis = pm.BinaryMetropolis(vars=binary_var)

152

153

# For categorical variables

154

categorical_gibbs = pm.CategoricalGibbsMetropolis(vars=categorical_var)

155

156

# Differential Evolution Metropolis

157

de_metropolis = pm.DEMetropolis(vars=param, scaling=1e-4)

158

```

159

160

### Slice Sampler

161

162

```python { .api }

163

from pymc.step_methods import Slice

164

165

class Slice:

166

"""

167

Slice sampler for univariate distributions.

168

169

Parameters:

170

- vars (list): Variables to sample

171

- w (float): Initial width of slice

172

- tune (bool): Tune width during sampling

173

"""

174

175

# Slice sampling for specific parameter

176

slice_step = pm.Slice(vars=[difficult_param], w=2.0)

177

```

178

179

### Specialized Step Methods

180

181

```python { .api }

182

from pymc.step_methods import (

183

BinaryGibbsMetropolis, BinaryMetropolis, CategoricalGibbsMetropolis,

184

DEMetropolis, DEMetropolisZ, CompoundStep, BlockedStep

185

)

186

187

# Binary Gibbs Metropolis for binary variables

188

binary_gibbs = pm.BinaryGibbsMetropolis('binary_var')

189

190

# Binary Metropolis with custom proposal

191

binary_metro = pm.BinaryMetropolis('binary_var')

192

193

# Categorical Gibbs for categorical variables

194

cat_gibbs = pm.CategoricalGibbsMetropolis('categorical_var')

195

196

# Differential Evolution Metropolis

197

de_metro = pm.DEMetropolis(vars=[var1, var2],

198

scaling=0.001, # Scaling factor

199

tune_scaling=True, # Auto-tune scaling

200

tune_interval=100) # Tuning frequency

201

202

# DE Metropolis with Z proposals

203

de_metro_z = pm.DEMetropolisZ(vars=[var1, var2])

204

```

205

206

### Proposal Distributions

207

208

```python { .api }

209

from pymc.step_methods.metropolis import (

210

CauchyProposal, LaplaceProposal, MultivariateNormalProposal,

211

NormalProposal, PoissonProposal, UniformProposal

212

)

213

214

# Custom Metropolis with different proposals

215

metro_cauchy = pm.Metropolis('var', proposal_dist=CauchyProposal)

216

metro_laplace = pm.Metropolis('var', proposal_dist=LaplaceProposal)

217

metro_uniform = pm.Metropolis('var', proposal_dist=UniformProposal)

218

219

# Multivariate normal proposal with custom covariance

220

mv_proposal = MultivariateNormalProposal(np.eye(3) * 0.1)

221

metro_mv = pm.Metropolis(vars=[var1, var2, var3], proposal_dist=mv_proposal)

222

223

# Poisson proposal for count data

224

metro_poisson = pm.Metropolis('count_var', proposal_dist=PoissonProposal)

225

```

226

227

### Compound and Blocked Steps

228

229

```python { .api }

230

# Compound step combining different samplers

231

step1 = pm.NUTS([continuous_var1, continuous_var2])

232

step2 = pm.BinaryGibbsMetropolis([binary_var])

233

step3 = pm.CategoricalGibbsMetropolis([categorical_var])

234

235

compound_step = pm.CompoundStep([step1, step2, step3])

236

237

# Use compound step in sampling

238

trace = pm.sample(step=compound_step)

239

240

# Blocked step for grouping variables

241

blocked_step = pm.BlockedStep(methods=[step1, step2], blocking=[0, 1])

242

```

243

244

### Compound Step Methods

245

246

```python { .api }

247

from pymc.step_methods import CompoundStep

248

249

# Combine different step methods

250

compound = pm.CompoundStep([

251

pm.NUTS(vars=continuous_vars),

252

pm.BinaryMetropolis(vars=binary_vars),

253

pm.CategoricalGibbsMetropolis(vars=categorical_vars)

254

])

255

256

trace = pm.sample(step=compound)

257

```

258

259

## Proposal Distributions

260

261

### Standard Proposals

262

263

```python { .api }

264

from pymc.step_methods.metropolis import (

265

NormalProposal, UniformProposal, LaplaceProposal,

266

CauchyProposal, PoissonProposal, MultivariateNormalProposal

267

)

268

269

# Normal proposal for Metropolis

270

normal_prop = pm.NormalProposal(s=0.5) # Standard deviation

271

272

# Uniform proposal

273

uniform_prop = pm.UniformProposal(s=1.0) # Half-width

274

275

# Multivariate normal proposal

276

mv_prop = pm.MultivariateNormalProposal(cov=covariance_matrix)

277

```

278

279

## Sequential Monte Carlo

280

281

```python { .api }

282

def sample_smc(draws=1000, kernel='metropolis', n_steps=25, parallel=False,

283

start=None, cores=None, compute_convergence_checks=True,

284

model=None, random_seed=None, progressbar=True, **kernel_kwargs):

285

"""

286

Sequential Monte Carlo sampling.

287

288

Parameters:

289

- draws (int): Number of samples to draw

290

- kernel (str): SMC kernel ('metropolis', 'nuts')

291

- n_steps (int): Number of SMC steps

292

- parallel (bool): Parallel chain execution

293

- start (dict): Starting values

294

295

Returns:

296

- trace: SMC samples as InferenceData

297

"""

298

299

# Basic SMC sampling

300

with pm.Model() as model:

301

# Define model...

302

smc_trace = pm.sample_smc(draws=2000, kernel='metropolis')

303

304

# Advanced SMC configuration

305

smc_trace = pm.sample_smc(

306

draws=5000,

307

kernel='nuts',

308

n_steps=50,

309

parallel=True,

310

cores=4

311

)

312

```

313

314

## Predictive Sampling

315

316

### Prior Predictive Sampling

317

318

```python { .api }

319

def sample_prior_predictive(samples=500, var_names=None, model=None,

320

size=None, keep_size=False, random_seed=None,

321

return_inferencedata=True, extend_inferencedata=True,

322

predictions=False, **kwargs):

323

"""

324

Sample from prior distributions.

325

326

Parameters:

327

- samples (int): Number of prior samples

328

- var_names (list): Variables to sample (default: all)

329

- size (int): Size for each sample draw

330

- predictions (bool): Include deterministic variables

331

- return_inferencedata (bool): Return ArviZ InferenceData

332

333

Returns:

334

- prior_samples: Prior predictive samples

335

"""

336

337

# Sample from priors

338

with pm.Model() as model:

339

# Define priors and likelihood...

340

prior_pred = pm.sample_prior_predictive(samples=1000)

341

342

# Sample specific variables only

343

prior_pred = pm.sample_prior_predictive(

344

samples=2000,

345

var_names=['alpha', 'beta', 'sigma']

346

)

347

```

348

349

### Posterior Predictive Sampling

350

351

```python { .api }

352

def sample_posterior_predictive(trace, samples=None, var_names=None,

353

size=None, keep_size=False, model=None,

354

progressbar=True, random_seed=None,

355

extend_inferencedata=True, predictions=False,

356

return_inferencedata=True, **kwargs):

357

"""

358

Sample from posterior predictive distribution.

359

360

Parameters:

361

- trace: MCMC trace or InferenceData object

362

- samples (int): Number of predictive samples per posterior sample

363

- var_names (list): Variables to predict

364

- size (int): Size for each prediction

365

- predictions (bool): Include deterministic predictions

366

367

Returns:

368

- posterior_pred: Posterior predictive samples

369

"""

370

371

# Basic posterior predictive sampling

372

with pm.Model() as model:

373

# Define model and sample...

374

trace = pm.sample()

375

post_pred = pm.sample_posterior_predictive(trace)

376

377

# Out-of-sample predictions

378

pm.set_data({'X_new': X_test}) # Update data

379

post_pred = pm.sample_posterior_predictive(

380

trace,

381

var_names=['y_pred'],

382

predictions=True

383

)

384

```

385

386

### Direct Sampling

387

388

```python { .api }

389

def draw(vars, draws=1, random_seed=None, **kwargs):

390

"""

391

Draw samples directly from distributions.

392

393

Parameters:

394

- vars: Distribution(s) to sample from

395

- draws (int): Number of samples

396

- random_seed (int): Random seed

397

398

Returns:

399

- samples: Array of samples

400

"""

401

402

# Draw from distribution objects

403

normal_samples = pm.draw(pm.Normal.dist(0, 1), draws=1000)

404

405

# Draw from multiple distributions

406

samples = pm.draw([

407

pm.Normal.dist(0, 1),

408

pm.Beta.dist(2, 3)

409

], draws=500)

410

411

# Draw within model context

412

with pm.Model() as model:

413

mu = pm.Normal('mu', 0, 1)

414

sigma = pm.HalfNormal('sigma', 1)

415

416

# Draw from prior

417

mu_samples = pm.draw(mu, draws=100)

418

```

419

420

## Sampling Utilities

421

422

### Compilation Functions

423

424

```python { .api }

425

def compile_forward_sampling_function(outputs, inputs, **kwargs):

426

"""

427

Compile fast forward sampling function.

428

429

Parameters:

430

- outputs (list): Output variables to sample

431

- inputs (list): Input parameter variables

432

- **kwargs: Additional compilation options

433

434

Returns:

435

- compiled_fn: Compiled sampling function

436

"""

437

438

# Compile sampling function for predictions

439

with pm.Model() as model:

440

# Define model...

441

sampling_fn = pm.compile_forward_sampling_function(

442

outputs=[y_pred],

443

inputs=[X, theta]

444

)

445

446

# Use compiled function

447

predictions = sampling_fn(X_new, theta_samples)

448

```

449

450

### Vectorized Operations

451

452

```python { .api }

453

def vectorize_over_posterior(fn, trace, var_names=None, **kwargs):

454

"""

455

Vectorize function over posterior samples.

456

457

Parameters:

458

- fn (callable): Function to vectorize

459

- trace: Posterior samples

460

- var_names (list): Variables to pass to function

461

462

Returns:

463

- results: Vectorized function results

464

"""

465

466

# Define custom function

467

def custom_prediction(alpha, beta, X):

468

return alpha + beta * X

469

470

# Vectorize over posterior

471

results = pm.vectorize_over_posterior(

472

custom_prediction,

473

trace,

474

var_names=['alpha', 'beta'],

475

X=X_new

476

)

477

```

478

479

## Advanced Sampling Techniques

480

481

### Custom Step Methods

482

483

```python { .api }

484

from pymc.step_methods import BlockedStep

485

486

class CustomStep(BlockedStep):

487

"""

488

Template for custom step methods.

489

"""

490

491

def __init__(self, vars, model=None, **kwargs):

492

self.vars = vars

493

self.model = model

494

super().__init__(vars, [model.compile_logp(vars)])

495

496

def step(self, point):

497

"""Implement custom step logic."""

498

# Custom sampling logic here

499

return new_point, stats

500

```

501

502

### Parallel Chain Sampling

503

504

```python { .api }

505

# Parallel sampling with custom configuration

506

import multiprocessing as mp

507

508

# Set up parallel context

509

with mp.Pool(processes=4) as pool:

510

trace = pm.sample(

511

chains=4,

512

cores=4,

513

mp_ctx=pool

514

)

515

516

# Control parallelization

517

trace = pm.sample(

518

chains=8, # More chains than cores

519

cores=4, # Limit parallel processes

520

chain_method='sequential' # Sequential within process

521

)

522

```

523

524

### Memory-Efficient Sampling

525

526

```python { .api }

527

# Use Zarr backend for large traces

528

trace = pm.sample(

529

draws=100000,

530

trace=pm.backends.ZarrTrace('large_trace.zarr')

531

)

532

533

# Streaming inference for very large models

534

with pm.Model() as streaming_model:

535

# Large model definition...

536

537

# Sample in batches

538

for batch in range(10):

539

batch_trace = pm.sample(

540

draws=1000,

541

trace=pm.backends.NDArray(),

542

compute_convergence_checks=False

543

)

544

# Process batch_trace...

545

```

546

547

## Sampling Diagnostics

548

549

### Built-in Diagnostics

550

551

```python { .api }

552

# Sample with full diagnostics

553

trace = pm.sample(

554

compute_convergence_checks=True, # Compute R-hat, ESS

555

keep_warning_stat=True, # Keep warning statistics

556

progressbar=True # Show progress

557

)

558

559

# Access sampling statistics

560

print(trace.get_sampler_stats('diverging').sum()) # Divergences

561

print(trace.get_sampler_stats('energy')) # Energy statistics

562

```

563

564

### Custom Callbacks

565

566

```python { .api }

567

def custom_callback(trace, draw):

568

"""Custom callback function called during sampling."""

569

if draw % 100 == 0:

570

print(f"Completed draw {draw}")

571

# Custom diagnostics or early stopping logic

572

573

# Sample with callback

574

trace = pm.sample(callback=custom_callback)

575

```

576

577

## Usage Patterns

578

579

### Model Selection via Sampling

580

581

```python { .api }

582

# Compare models using sampling

583

models = [model1, model2, model3]

584

traces = []

585

586

for model in models:

587

with model:

588

trace = pm.sample()

589

traces.append(trace)

590

591

# Compare using information criteria

592

comparison = pm.compare({f'model_{i}': trace

593

for i, trace in enumerate(traces)})

594

```

595

596

### Robust Sampling Strategies

597

598

```python { .api }

599

# Robust sampling for difficult models

600

with pm.Model() as difficult_model:

601

# Complex model definition...

602

603

# Start with MAP estimate

604

map_estimate = pm.find_MAP()

605

606

# Initialize sampler carefully

607

step = pm.NUTS(

608

target_accept=0.99, # Very high acceptance

609

max_treedepth=15, # Deep trees

610

step_scale=0.1 # Small initial steps

611

)

612

613

# Sample with more tuning

614

trace = pm.sample(

615

draws=2000,

616

tune=3000, # Extra tuning

617

init='adapt_full', # Full mass matrix

618

initvals=map_estimate, # Start from MAP

619

step=step

620

)

621

```

622

623

PyMC's sampling system provides flexible and efficient inference for a wide range of Bayesian models, from simple linear regression to complex hierarchical models with custom likelihood functions.