or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data-handling.mddistributions.mdgaussian-processes.mdglm.mdindex.mdmath-functions.mdmodeling.mdsampling.mdstats-plots.mdstep-methods.mdvariational.md

gaussian-processes.mddocs/

0

# Gaussian Processes

1

2

PyMC3's Gaussian process module provides flexible non-parametric modeling capabilities for regression, classification, and other machine learning tasks. The implementation supports various covariance functions, mean functions, and inference methods including exact, sparse, and Kronecker-structured GPs.

3

4

## Capabilities

5

6

### GP Classes

7

8

Core Gaussian process implementations with different inference methods and computational strategies.

9

10

```python { .api }

11

class Marginal:

12

"""

13

Marginal Gaussian Process for regression with exact inference.

14

15

Integrates out the GP function values analytically, providing exact

16

posterior inference suitable for moderate-sized datasets (< 5000 points).

17

"""

18

19

def __init__(self, cov_func, mean_func=None):

20

"""

21

Initialize marginal GP.

22

23

Parameters:

24

- cov_func: covariance function defining GP prior

25

- mean_func: mean function (zero mean if None)

26

27

Returns:

28

- Marginal: marginal GP object

29

"""

30

31

def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs):

32

"""

33

Add marginal likelihood term to model.

34

35

Parameters:

36

- name: str, variable name

37

- X: array, input locations (n_samples, n_features)

38

- y: array, observed outputs (n_samples,)

39

- noise: float or array, observation noise variance

40

- is_observed: bool, whether y is observed data

41

42

Returns:

43

- TensorVariable: marginal likelihood contribution

44

"""

45

46

def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs):

47

"""

48

Posterior predictive distribution at new locations.

49

50

Parameters:

51

- name: str, variable name for predictions

52

- Xnew: array, new input locations

53

- pred_noise: bool, include predictive noise

54

- given: dict, conditioning data (uses marginal_likelihood if None)

55

56

Returns:

57

- TensorVariable: predictive distribution

58

"""

59

60

def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None):

61

"""

62

Posterior predictive mean and covariance.

63

64

Parameters:

65

- Xnew: array, prediction inputs

66

- point: dict, parameter values for prediction

67

- diag: bool, return only diagonal of covariance

68

- pred_noise: bool, include predictive noise

69

- given: dict, conditioning data

70

71

Returns:

72

- tuple: (mean, variance) predictions

73

"""

74

75

class Latent:

76

"""

77

Latent Gaussian Process for non-Gaussian likelihoods.

78

79

Treats GP function values as latent variables for use with

80

non-Gaussian observation models like Poisson, binomial, etc.

81

"""

82

83

def __init__(self, cov_func, mean_func=None):

84

"""

85

Initialize latent GP.

86

87

Parameters:

88

- cov_func: covariance function

89

- mean_func: mean function (zero if None)

90

"""

91

92

def prior(self, name, X, reparameterize=True, **kwargs):

93

"""

94

GP prior distribution over function values.

95

96

Parameters:

97

- name: str, variable name

98

- X: array, input locations

99

- reparameterize: bool, use non-centered parameterization

100

101

Returns:

102

- TensorVariable: GP function values

103

"""

104

105

def conditional(self, name, Xnew, given=None, **kwargs):

106

"""

107

Conditional distribution at new inputs.

108

109

Parameters:

110

- name: str, variable name

111

- Xnew: array, new input locations

112

- given: dict, conditioning GP values

113

114

Returns:

115

- TensorVariable: conditional GP values

116

"""

117

118

class MarginalSparse:

119

"""

120

Sparse Gaussian Process using inducing points for scalability.

121

122

Approximates full GP using a smaller set of inducing points,

123

enabling GP inference for large datasets (> 10,000 points).

124

"""

125

126

def __init__(self, cov_func, approx='FITC', mean_func=None):

127

"""

128

Initialize sparse GP.

129

130

Parameters:

131

- cov_func: covariance function

132

- approx: str, sparse approximation method ('FITC', 'VFE', 'DTC')

133

- mean_func: mean function

134

"""

135

136

def marginal_likelihood(self, name, X, Xu, y, noise, is_observed=True, **kwargs):

137

"""

138

Sparse marginal likelihood.

139

140

Parameters:

141

- name: str, variable name

142

- X: array, training inputs

143

- Xu: array, inducing point locations

144

- y: array, training outputs

145

- noise: float, observation noise

146

- is_observed: bool, whether y is observed

147

148

Returns:

149

- TensorVariable: sparse marginal likelihood

150

"""

151

152

class MarginalKron:

153

"""

154

Kronecker-structured GP for multi-dimensional inputs.

155

156

Exploits Kronecker structure in covariance for efficient

157

computation with grid-structured or separable inputs.

158

"""

159

160

def __init__(self, cov_funcs, mean_func=None):

161

"""

162

Initialize Kronecker GP.

163

164

Parameters:

165

- cov_funcs: list, covariance functions for each dimension

166

- mean_func: mean function

167

"""

168

169

class LatentKron:

170

"""

171

Latent Kronecker GP for structured multi-dimensional problems.

172

173

Combines Kronecker structure with latent variable formulation

174

for non-Gaussian likelihoods on grid data.

175

"""

176

177

class TP:

178

"""

179

Student's T Process for robust non-parametric regression.

180

181

Heavy-tailed extension of GPs using Student's t distributions

182

for robustness to outliers and model misspecification.

183

"""

184

185

def __init__(self, cov_func, mean_func=None):

186

"""

187

Initialize T process.

188

189

Parameters:

190

- cov_func: covariance function

191

- mean_func: mean function

192

"""

193

```

194

195

### Covariance Functions

196

197

Covariance functions defining GP priors through the `pm.gp.cov` module.

198

199

```python { .api }

200

# Available as pm.gp.cov.ExpQuad, pm.gp.cov.Matern52, etc.

201

202

class ExpQuad:

203

"""

204

Exponentiated quadratic (RBF/squared exponential) covariance.

205

206

Smooth, infinitely differentiable covariance function suitable

207

for modeling smooth processes.

208

"""

209

210

def __init__(self, input_dim, ls=None, ls_inv=None, active_dims=None):

211

"""

212

Parameters:

213

- input_dim: int, number of input dimensions

214

- ls: float or array, length scale parameters

215

- ls_inv: float or array, inverse length scales (alternative to ls)

216

- active_dims: list, active input dimensions

217

"""

218

219

class Matern52:

220

"""

221

Matérn covariance function with ν = 5/2.

222

223

Twice differentiable covariance providing balance between

224

smoothness and flexibility.

225

"""

226

227

class Matern32:

228

"""

229

Matérn covariance function with ν = 3/2.

230

231

Once differentiable covariance for moderately rough functions.

232

"""

233

234

class RatQuad:

235

"""

236

Rational quadratic covariance function.

237

238

Infinite mixture of RBF kernels with different length scales,

239

providing scale-invariant modeling capability.

240

"""

241

242

def __init__(self, input_dim, alpha=None, ls=None, active_dims=None):

243

"""

244

Parameters:

245

- input_dim: int, input dimension

246

- alpha: float, shape parameter controlling tail behavior

247

- ls: float or array, length scale parameters

248

- active_dims: list, active dimensions

249

"""

250

251

class Linear:

252

"""

253

Linear covariance function.

254

255

Models linear relationships and polynomial trends

256

when combined with other covariance functions.

257

"""

258

259

def __init__(self, input_dim, c=None, active_dims=None):

260

"""

261

Parameters:

262

- input_dim: int, input dimension

263

- c: float or array, offset parameters

264

- active_dims: list, active dimensions

265

"""

266

267

class Polynomial:

268

"""

269

Polynomial covariance function.

270

271

Models polynomial relationships of specified degree.

272

"""

273

274

def __init__(self, input_dim, c=None, d=2, offset=None, active_dims=None):

275

"""

276

Parameters:

277

- input_dim: int, input dimension

278

- c: float, scaling parameter

279

- d: int, polynomial degree

280

- offset: float, offset parameter

281

- active_dims: list, active dimensions

282

"""

283

284

class Cosine:

285

"""

286

Cosine covariance function for periodic processes.

287

288

Models exactly periodic functions with known period.

289

"""

290

291

def __init__(self, input_dim, ls=None, active_dims=None):

292

"""

293

Parameters:

294

- input_dim: int, input dimension

295

- ls: float or array, period parameters

296

- active_dims: list, active dimensions

297

"""

298

299

class Periodic:

300

"""

301

Periodic covariance function.

302

303

Combines periodicity with additional smoothness control

304

for quasi-periodic and approximately periodic functions.

305

"""

306

307

def __init__(self, input_dim, period=None, ls=None, active_dims=None):

308

"""

309

Parameters:

310

- input_dim: int, input dimension

311

- period: float or array, period parameters

312

- ls: float or array, length scale parameters

313

- active_dims: list, active dimensions

314

"""

315

316

class WarpedInput:

317

"""

318

Warped input covariance function.

319

320

Applies input transformation before computing covariance,

321

enabling non-stationary modeling.

322

"""

323

324

def __init__(self, cov_func, warp_func, args):

325

"""

326

Parameters:

327

- cov_func: base covariance function

328

- warp_func: input warping function

329

- args: arguments for warping function

330

"""

331

332

class Gibbs:

333

"""

334

Gibbs non-stationary covariance function.

335

336

Length scale varies as function of input location,

337

enabling locally adaptive smoothness.

338

"""

339

340

def __init__(self, input_dim, lengthscale_func, args, active_dims=None):

341

"""

342

Parameters:

343

- input_dim: int, input dimension

344

- lengthscale_func: function defining spatially varying length scale

345

- args: arguments for length scale function

346

- active_dims: list, active dimensions

347

"""

348

349

# Covariance function operators

350

class Add:

351

"""

352

Additive covariance (sum of covariance functions).

353

354

Combines multiple covariance functions additively

355

for modeling multiple patterns simultaneously.

356

"""

357

358

class Prod:

359

"""

360

Multiplicative covariance (product of covariance functions).

361

362

Models interactions between different input dimensions

363

or combines different types of structure.

364

"""

365

366

class Kron:

367

"""

368

Kronecker product covariance for separable multi-dimensional inputs.

369

370

Assumes independence between input dimensions while

371

maintaining within-dimension correlations.

372

"""

373

```

374

375

### Mean Functions

376

377

Mean functions for GP priors through the `pm.gp.mean` module.

378

379

```python { .api }

380

# Available as pm.gp.mean.Zero, pm.gp.mean.Constant, etc.

381

382

class Zero:

383

"""

384

Zero mean function (default).

385

386

Assumes zero prior mean for GP, suitable when

387

data is centered or mean is captured by other model components.

388

"""

389

390

class Constant:

391

"""

392

Constant mean function.

393

394

Non-zero constant prior mean for GP.

395

"""

396

397

def __init__(self, c=None):

398

"""

399

Parameters:

400

- c: float, constant mean value

401

"""

402

403

class Linear:

404

"""

405

Linear mean function.

406

407

Linear trend in GP mean function.

408

"""

409

410

def __init__(self, coeffs=None, intercept=None):

411

"""

412

Parameters:

413

- coeffs: array, linear coefficients

414

- intercept: float, intercept term

415

"""

416

```

417

418

### GP Utilities

419

420

Utility functions for GP modeling and visualization.

421

422

```python { .api }

423

def plot_gp_dist(ax, samples=[], plot_samples=True, palette='Reds',

424

fill_alpha=0.8, samples_alpha=0.7, fill_kwargs=None,

425

samples_kwargs=None):

426

"""

427

Plot GP distribution with uncertainty bands.

428

429

Visualizes GP posterior with credible intervals and

430

optional sample trajectories from the posterior.

431

432

Parameters:

433

- ax: matplotlib axis for plotting

434

- samples: array, GP sample trajectories

435

- plot_samples: bool, whether to plot individual samples

436

- palette: str, color palette for visualization

437

- fill_alpha: float, transparency for uncertainty bands

438

- samples_alpha: float, transparency for sample lines

439

- fill_kwargs: dict, arguments for uncertainty band plotting

440

- samples_kwargs: dict, arguments for sample line plotting

441

442

Returns:

443

- matplotlib artists: plot elements

444

"""

445

446

# GP utilities available in pm.gp.util module

447

def conditional_cov(Kxx, Kxz, Kzz_inv):

448

"""

449

Compute conditional covariance for GP prediction.

450

451

Parameters:

452

- Kxx: array, covariance between prediction points

453

- Kxz: array, cross-covariance between prediction and conditioning points

454

- Kzz_inv: array, inverse covariance of conditioning points

455

456

Returns:

457

- array: conditional covariance matrix

458

"""

459

460

def conditional_mean(Kxz, Kzz_inv, y):

461

"""

462

Compute conditional mean for GP prediction.

463

464

Parameters:

465

- Kxz: array, cross-covariance matrix

466

- Kzz_inv: array, inverse conditioning covariance

467

- y: array, conditioning outputs

468

469

Returns:

470

- array: conditional mean

471

"""

472

473

def stabilize(K, jitter=1e-6):

474

"""

475

Numerically stabilize covariance matrix.

476

477

Parameters:

478

- K: array, covariance matrix

479

- jitter: float, diagonal noise for numerical stability

480

481

Returns:

482

- array: stabilized covariance matrix

483

"""

484

```

485

486

## Usage Examples

487

488

### Basic GP Regression

489

490

```python

491

import pymc3 as pm

492

import numpy as np

493

import matplotlib.pyplot as plt

494

495

# Generate synthetic data

496

np.random.seed(42)

497

n = 50

498

X = np.linspace(0, 10, n)[:, None]

499

true_func = lambda x: np.sin(x) + 0.1 * x**2

500

y = true_func(X.ravel()) + np.random.normal(0, 0.1, n)

501

502

# GP regression model

503

with pm.Model() as gp_model:

504

# Covariance function parameters

505

ls = pm.Gamma('ls', alpha=2, beta=1) # length scale

506

sigma_f = pm.HalfNormal('sigma_f', sigma=1) # signal variance

507

508

# Covariance function

509

cov = sigma_f**2 * pm.gp.cov.ExpQuad(1, ls)

510

511

# GP prior

512

gp = pm.gp.Marginal(cov_func=cov)

513

514

# Observation noise

515

sigma_y = pm.HalfNormal('sigma_y', sigma=0.5)

516

517

# Marginal likelihood

518

y_obs = gp.marginal_likelihood('y_obs', X=X, y=y, noise=sigma_y)

519

520

# Inference

521

trace = pm.sample(1000, tune=1000, chains=2)

522

523

# Prediction

524

X_new = np.linspace(-1, 11, 100)[:, None]

525

with gp_model:

526

f_pred = gp.conditional('f_pred', X_new)

527

pred_samples = pm.sample_posterior_predictive(trace, vars=[f_pred], samples=100)

528

```

529

530

### GP Classification

531

532

```python

533

# GP for binary classification

534

n_train = 100

535

X_train = np.random.randn(n_train, 2)

536

true_boundary = lambda x: x[:, 0] + 0.5 * x[:, 1]**2

537

y_train = (true_boundary(X_train) > 0).astype(int)

538

539

with pm.Model() as gp_classification:

540

# GP parameters

541

ls = pm.Gamma('ls', alpha=2, beta=1, shape=2)

542

543

# Covariance function

544

cov = pm.gp.cov.ExpQuad(2, ls)

545

546

# Latent GP

547

gp = pm.gp.Latent(cov_func=cov)

548

549

# GP prior over latent function

550

f = gp.prior('f', X=X_train)

551

552

# Probit likelihood

553

y_obs = pm.Bernoulli('y_obs', p=pm.math.sigmoid(f), observed=y_train)

554

555

# Sampling

556

trace = pm.sample(1000, tune=1000)

557

558

# Prediction for classification

559

X_test = np.random.randn(50, 2)

560

with gp_classification:

561

f_test = gp.conditional('f_test', X_test)

562

test_samples = pm.sample_posterior_predictive(trace, vars=[f_test])

563

564

# Classification probabilities

565

prob_samples = pm.math.sigmoid(test_samples['f_test'])

566

mean_probs = prob_samples.mean(axis=0)

567

```

568

569

### Sparse GP for Large Datasets

570

571

```python

572

# Large dataset with sparse GP

573

n_large = 5000

574

X_large = np.random.uniform(0, 20, (n_large, 1))

575

y_large = np.sin(X_large.ravel()) + 0.2 * np.random.randn(n_large)

576

577

# Inducing points

578

n_inducing = 50

579

Xu = np.linspace(0, 20, n_inducing)[:, None]

580

581

with pm.Model() as sparse_gp:

582

# GP parameters

583

ls = pm.Gamma('ls', alpha=2, beta=1)

584

sigma_f = pm.HalfNormal('sigma_f', sigma=1)

585

586

# Covariance function

587

cov = sigma_f**2 * pm.gp.cov.Matern52(1, ls)

588

589

# Sparse GP

590

gp = pm.gp.MarginalSparse(cov_func=cov, approx='FITC')

591

592

# Noise parameter

593

sigma_y = pm.HalfNormal('sigma_y', sigma=1)

594

595

# Sparse marginal likelihood

596

y_obs = gp.marginal_likelihood('y_obs',

597

X=X_large,

598

Xu=Xu,

599

y=y_large,

600

noise=sigma_y)

601

602

# Efficient sampling

603

trace = pm.sample(500, tune=500)

604

```

605

606

### Multi-Output GP

607

608

```python

609

# Multi-output GP regression

610

n_outputs = 3

611

X_multi = np.linspace(0, 5, 30)[:, None]

612

613

# Correlated outputs

614

true_funcs = [

615

lambda x: np.sin(2*x),

616

lambda x: np.cos(2*x),

617

lambda x: np.sin(2*x) + np.cos(2*x)

618

]

619

620

Y_multi = np.column_stack([f(X_multi.ravel()) for f in true_funcs])

621

Y_multi += 0.1 * np.random.randn(*Y_multi.shape)

622

623

with pm.Model() as multi_gp:

624

# Shared covariance parameters

625

ls = pm.Gamma('ls', alpha=2, beta=1)

626

627

# Output-specific parameters

628

sigma_f = pm.HalfNormal('sigma_f', sigma=1, shape=n_outputs)

629

sigma_y = pm.HalfNormal('sigma_y', sigma=1, shape=n_outputs)

630

631

# Independent GPs for each output

632

gps = []

633

for i in range(n_outputs):

634

cov_i = sigma_f[i]**2 * pm.gp.cov.ExpQuad(1, ls)

635

gp_i = pm.gp.Marginal(cov_func=cov_i)

636

637

y_obs_i = gp_i.marginal_likelihood(f'y_obs_{i}',

638

X=X_multi,

639

y=Y_multi[:, i],

640

noise=sigma_y[i])

641

gps.append(gp_i)

642

643

trace = pm.sample(1000, tune=1000)

644

```

645

646

### Periodic GP

647

648

```python

649

# Periodic GP for seasonal data

650

t = np.linspace(0, 4*np.pi, 100)

651

y_seasonal = np.sin(t) + 0.5*np.cos(2*t) + 0.1*t + 0.1*np.random.randn(len(t))

652

653

with pm.Model() as periodic_gp:

654

# Periodic covariance parameters

655

period = pm.Gamma('period', alpha=10, beta=1.5) # Expected period ~ 2π

656

ls_periodic = pm.Gamma('ls_periodic', alpha=2, beta=1)

657

658

# Trend parameters

659

ls_trend = pm.Gamma('ls_trend', alpha=2, beta=0.5)

660

661

# Signal variances

662

sigma_periodic = pm.HalfNormal('sigma_periodic', sigma=1)

663

sigma_trend = pm.HalfNormal('sigma_trend', sigma=1)

664

665

# Combined covariance: periodic + trend

666

cov_periodic = sigma_periodic**2 * pm.gp.cov.Periodic(1, period=period, ls=ls_periodic)

667

cov_trend = sigma_trend**2 * pm.gp.cov.ExpQuad(1, ls_trend)

668

cov_combined = cov_periodic + cov_trend

669

670

# GP model

671

gp = pm.gp.Marginal(cov_func=cov_combined)

672

673

# Observation noise

674

sigma_y = pm.HalfNormal('sigma_y', sigma=0.2)

675

676

# Likelihood

677

X_time = t[:, None]

678

y_obs = gp.marginal_likelihood('y_obs', X=X_time, y=y_seasonal, noise=sigma_y)

679

680

trace = pm.sample(1000, tune=1000)

681

682

# Extrapolate to future

683

t_future = np.linspace(0, 6*np.pi, 150)[:, None]

684

with periodic_gp:

685

f_future = gp.conditional('f_future', t_future)

686

future_pred = pm.sample_posterior_predictive(trace, vars=[f_future])

687

```

688

689

### Non-Stationary GP

690

691

```python

692

# Non-stationary GP using input warping

693

X_nonstat = np.linspace(0, 10, 60)[:, None]

694

# Function with varying smoothness

695

y_nonstat = np.where(X_nonstat.ravel() < 5,

696

np.sin(5*X_nonstat.ravel()), # High frequency

697

np.sin(X_nonstat.ravel())) # Low frequency

698

y_nonstat += 0.1 * np.random.randn(len(y_nonstat))

699

700

with pm.Model() as nonstat_gp:

701

# Warping function parameters

702

alpha = pm.Normal('alpha', mu=0, sigma=1)

703

beta = pm.HalfNormal('beta', sigma=1)

704

705

# Input warping: stretch inputs differently in different regions

706

def warp_func(x, alpha, beta):

707

return x + alpha * pm.math.tanh(beta * (x - 5))

708

709

# Base covariance function

710

ls = pm.Gamma('ls', alpha=2, beta=1)

711

sigma_f = pm.HalfNormal('sigma_f', sigma=1)

712

base_cov = sigma_f**2 * pm.gp.cov.ExpQuad(1, ls)

713

714

# Warped covariance

715

cov_warped = pm.gp.cov.WarpedInput(base_cov, warp_func, (alpha, beta))

716

717

# GP with warped inputs

718

gp = pm.gp.Marginal(cov_func=cov_warped)

719

720

# Observation noise

721

sigma_y = pm.HalfNormal('sigma_y', sigma=0.2)

722

723

# Likelihood

724

y_obs = gp.marginal_likelihood('y_obs', X=X_nonstat, y=y_nonstat, noise=sigma_y)

725

726

trace = pm.sample(1000, tune=1000)

727

```

728

729

### Hierarchical GP

730

731

```python

732

# Hierarchical GP with group-specific parameters

733

n_groups = 4

734

n_per_group = 25

735

736

# Generate group data with different characteristics

737

group_data = []

738

group_labels = []

739

for g in range(n_groups):

740

X_g = np.random.uniform(0, 10, n_per_group)[:, None]

741

# Different length scales per group

742

ls_true = 1 + g * 0.5

743

y_g = np.sin(X_g.ravel() / ls_true) + 0.1 * np.random.randn(n_per_group)

744

745

group_data.append((X_g, y_g))

746

group_labels.extend([g] * n_per_group)

747

748

# Combine data

749

X_all = np.vstack([X for X, y in group_data])

750

y_all = np.hstack([y for X, y in group_data])

751

group_idx = np.array(group_labels)

752

753

with pm.Model() as hierarchical_gp:

754

# Hierarchical priors for length scales

755

mu_ls = pm.Gamma('mu_ls', alpha=2, beta=1)

756

sigma_ls = pm.HalfNormal('sigma_ls', sigma=1)

757

ls_group = pm.Gamma('ls_group', alpha=2, beta=1/sigma_ls, shape=n_groups)

758

759

# Shared signal variance

760

sigma_f = pm.HalfNormal('sigma_f', sigma=1)

761

762

# Group-specific GPs

763

gps = []

764

for g in range(n_groups):

765

# Group mask

766

mask = group_idx == g

767

X_g = X_all[mask]

768

y_g = y_all[mask]

769

770

# Group covariance function

771

cov_g = sigma_f**2 * pm.gp.cov.ExpQuad(1, ls_group[g])

772

gp_g = pm.gp.Marginal(cov_func=cov_g)

773

774

# Group likelihood

775

sigma_y = pm.HalfNormal(f'sigma_y_{g}', sigma=0.5)

776

y_obs_g = gp_g.marginal_likelihood(f'y_obs_{g}',

777

X=X_g,

778

y=y_g,

779

noise=sigma_y)

780

gps.append(gp_g)

781

782

trace = pm.sample(1000, tune=1000)

783

784

# Analyze group differences

785

ls_posterior = trace['ls_group']

786

print("Group length scales (posterior means):")

787

for g in range(n_groups):

788

print(f"Group {g}: {ls_posterior[:, g].mean():.3f}")

789

```