0
# Gaussian Processes
1
2
PyMC3's Gaussian process module provides flexible non-parametric modeling capabilities for regression, classification, and other machine learning tasks. The implementation supports various covariance functions, mean functions, and inference methods including exact, sparse, and Kronecker-structured GPs.
3
4
## Capabilities
5
6
### GP Classes
7
8
Core Gaussian process implementations with different inference methods and computational strategies.
9
10
```python { .api }
11
class Marginal:
12
"""
13
Marginal Gaussian Process for regression with exact inference.
14
15
Integrates out the GP function values analytically, providing exact
16
posterior inference suitable for moderate-sized datasets (< 5000 points).
17
"""
18
19
def __init__(self, cov_func, mean_func=None):
20
"""
21
Initialize marginal GP.
22
23
Parameters:
24
- cov_func: covariance function defining GP prior
25
- mean_func: mean function (zero mean if None)
26
27
Returns:
28
- Marginal: marginal GP object
29
"""
30
31
def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs):
32
"""
33
Add marginal likelihood term to model.
34
35
Parameters:
36
- name: str, variable name
37
- X: array, input locations (n_samples, n_features)
38
- y: array, observed outputs (n_samples,)
39
- noise: float or array, observation noise variance
40
- is_observed: bool, whether y is observed data
41
42
Returns:
43
- TensorVariable: marginal likelihood contribution
44
"""
45
46
def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs):
47
"""
48
Posterior predictive distribution at new locations.
49
50
Parameters:
51
- name: str, variable name for predictions
52
- Xnew: array, new input locations
53
- pred_noise: bool, include predictive noise
54
- given: dict, conditioning data (uses marginal_likelihood if None)
55
56
Returns:
57
- TensorVariable: predictive distribution
58
"""
59
60
def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None):
61
"""
62
Posterior predictive mean and covariance.
63
64
Parameters:
65
- Xnew: array, prediction inputs
66
- point: dict, parameter values for prediction
67
- diag: bool, return only diagonal of covariance
68
- pred_noise: bool, include predictive noise
69
- given: dict, conditioning data
70
71
Returns:
72
- tuple: (mean, variance) predictions
73
"""
74
75
class Latent:
76
"""
77
Latent Gaussian Process for non-Gaussian likelihoods.
78
79
Treats GP function values as latent variables for use with
80
non-Gaussian observation models like Poisson, binomial, etc.
81
"""
82
83
def __init__(self, cov_func, mean_func=None):
84
"""
85
Initialize latent GP.
86
87
Parameters:
88
- cov_func: covariance function
89
- mean_func: mean function (zero if None)
90
"""
91
92
def prior(self, name, X, reparameterize=True, **kwargs):
93
"""
94
GP prior distribution over function values.
95
96
Parameters:
97
- name: str, variable name
98
- X: array, input locations
99
- reparameterize: bool, use non-centered parameterization
100
101
Returns:
102
- TensorVariable: GP function values
103
"""
104
105
def conditional(self, name, Xnew, given=None, **kwargs):
106
"""
107
Conditional distribution at new inputs.
108
109
Parameters:
110
- name: str, variable name
111
- Xnew: array, new input locations
112
- given: dict, conditioning GP values
113
114
Returns:
115
- TensorVariable: conditional GP values
116
"""
117
118
class MarginalSparse:
119
"""
120
Sparse Gaussian Process using inducing points for scalability.
121
122
Approximates full GP using a smaller set of inducing points,
123
enabling GP inference for large datasets (> 10,000 points).
124
"""
125
126
def __init__(self, cov_func, approx='FITC', mean_func=None):
127
"""
128
Initialize sparse GP.
129
130
Parameters:
131
- cov_func: covariance function
132
- approx: str, sparse approximation method ('FITC', 'VFE', 'DTC')
133
- mean_func: mean function
134
"""
135
136
def marginal_likelihood(self, name, X, Xu, y, noise, is_observed=True, **kwargs):
137
"""
138
Sparse marginal likelihood.
139
140
Parameters:
141
- name: str, variable name
142
- X: array, training inputs
143
- Xu: array, inducing point locations
144
- y: array, training outputs
145
- noise: float, observation noise
146
- is_observed: bool, whether y is observed
147
148
Returns:
149
- TensorVariable: sparse marginal likelihood
150
"""
151
152
class MarginalKron:
153
"""
154
Kronecker-structured GP for multi-dimensional inputs.
155
156
Exploits Kronecker structure in covariance for efficient
157
computation with grid-structured or separable inputs.
158
"""
159
160
def __init__(self, cov_funcs, mean_func=None):
161
"""
162
Initialize Kronecker GP.
163
164
Parameters:
165
- cov_funcs: list, covariance functions for each dimension
166
- mean_func: mean function
167
"""
168
169
class LatentKron:
170
"""
171
Latent Kronecker GP for structured multi-dimensional problems.
172
173
Combines Kronecker structure with latent variable formulation
174
for non-Gaussian likelihoods on grid data.
175
"""
176
177
class TP:
178
"""
179
Student's T Process for robust non-parametric regression.
180
181
Heavy-tailed extension of GPs using Student's t distributions
182
for robustness to outliers and model misspecification.
183
"""
184
185
def __init__(self, cov_func, mean_func=None):
186
"""
187
Initialize T process.
188
189
Parameters:
190
- cov_func: covariance function
191
- mean_func: mean function
192
"""
193
```
194
195
### Covariance Functions
196
197
Covariance functions defining GP priors through the `pm.gp.cov` module.
198
199
```python { .api }
200
# Available as pm.gp.cov.ExpQuad, pm.gp.cov.Matern52, etc.
201
202
class ExpQuad:
203
"""
204
Exponentiated quadratic (RBF/squared exponential) covariance.
205
206
Smooth, infinitely differentiable covariance function suitable
207
for modeling smooth processes.
208
"""
209
210
def __init__(self, input_dim, ls=None, ls_inv=None, active_dims=None):
211
"""
212
Parameters:
213
- input_dim: int, number of input dimensions
214
- ls: float or array, length scale parameters
215
- ls_inv: float or array, inverse length scales (alternative to ls)
216
- active_dims: list, active input dimensions
217
"""
218
219
class Matern52:
220
"""
221
Matérn covariance function with ν = 5/2.
222
223
Twice differentiable covariance providing balance between
224
smoothness and flexibility.
225
"""
226
227
class Matern32:
228
"""
229
Matérn covariance function with ν = 3/2.
230
231
Once differentiable covariance for moderately rough functions.
232
"""
233
234
class RatQuad:
235
"""
236
Rational quadratic covariance function.
237
238
Infinite mixture of RBF kernels with different length scales,
239
providing scale-invariant modeling capability.
240
"""
241
242
def __init__(self, input_dim, alpha=None, ls=None, active_dims=None):
243
"""
244
Parameters:
245
- input_dim: int, input dimension
246
- alpha: float, shape parameter controlling tail behavior
247
- ls: float or array, length scale parameters
248
- active_dims: list, active dimensions
249
"""
250
251
class Linear:
252
"""
253
Linear covariance function.
254
255
Models linear relationships and polynomial trends
256
when combined with other covariance functions.
257
"""
258
259
def __init__(self, input_dim, c=None, active_dims=None):
260
"""
261
Parameters:
262
- input_dim: int, input dimension
263
- c: float or array, offset parameters
264
- active_dims: list, active dimensions
265
"""
266
267
class Polynomial:
268
"""
269
Polynomial covariance function.
270
271
Models polynomial relationships of specified degree.
272
"""
273
274
def __init__(self, input_dim, c=None, d=2, offset=None, active_dims=None):
275
"""
276
Parameters:
277
- input_dim: int, input dimension
278
- c: float, scaling parameter
279
- d: int, polynomial degree
280
- offset: float, offset parameter
281
- active_dims: list, active dimensions
282
"""
283
284
class Cosine:
285
"""
286
Cosine covariance function for periodic processes.
287
288
Models exactly periodic functions with known period.
289
"""
290
291
def __init__(self, input_dim, ls=None, active_dims=None):
292
"""
293
Parameters:
294
- input_dim: int, input dimension
295
- ls: float or array, period parameters
296
- active_dims: list, active dimensions
297
"""
298
299
class Periodic:
300
"""
301
Periodic covariance function.
302
303
Combines periodicity with additional smoothness control
304
for quasi-periodic and approximately periodic functions.
305
"""
306
307
def __init__(self, input_dim, period=None, ls=None, active_dims=None):
308
"""
309
Parameters:
310
- input_dim: int, input dimension
311
- period: float or array, period parameters
312
- ls: float or array, length scale parameters
313
- active_dims: list, active dimensions
314
"""
315
316
class WarpedInput:
317
"""
318
Warped input covariance function.
319
320
Applies input transformation before computing covariance,
321
enabling non-stationary modeling.
322
"""
323
324
def __init__(self, cov_func, warp_func, args):
325
"""
326
Parameters:
327
- cov_func: base covariance function
328
- warp_func: input warping function
329
- args: arguments for warping function
330
"""
331
332
class Gibbs:
333
"""
334
Gibbs non-stationary covariance function.
335
336
Length scale varies as function of input location,
337
enabling locally adaptive smoothness.
338
"""
339
340
def __init__(self, input_dim, lengthscale_func, args, active_dims=None):
341
"""
342
Parameters:
343
- input_dim: int, input dimension
344
- lengthscale_func: function defining spatially varying length scale
345
- args: arguments for length scale function
346
- active_dims: list, active dimensions
347
"""
348
349
# Covariance function operators
350
class Add:
351
"""
352
Additive covariance (sum of covariance functions).
353
354
Combines multiple covariance functions additively
355
for modeling multiple patterns simultaneously.
356
"""
357
358
class Prod:
359
"""
360
Multiplicative covariance (product of covariance functions).
361
362
Models interactions between different input dimensions
363
or combines different types of structure.
364
"""
365
366
class Kron:
367
"""
368
Kronecker product covariance for separable multi-dimensional inputs.
369
370
Assumes independence between input dimensions while
371
maintaining within-dimension correlations.
372
"""
373
```
374
375
### Mean Functions
376
377
Mean functions for GP priors through the `pm.gp.mean` module.
378
379
```python { .api }
380
# Available as pm.gp.mean.Zero, pm.gp.mean.Constant, etc.
381
382
class Zero:
383
"""
384
Zero mean function (default).
385
386
Assumes zero prior mean for GP, suitable when
387
data is centered or mean is captured by other model components.
388
"""
389
390
class Constant:
391
"""
392
Constant mean function.
393
394
Non-zero constant prior mean for GP.
395
"""
396
397
def __init__(self, c=None):
398
"""
399
Parameters:
400
- c: float, constant mean value
401
"""
402
403
class Linear:
404
"""
405
Linear mean function.
406
407
Linear trend in GP mean function.
408
"""
409
410
def __init__(self, coeffs=None, intercept=None):
411
"""
412
Parameters:
413
- coeffs: array, linear coefficients
414
- intercept: float, intercept term
415
"""
416
```
417
418
### GP Utilities
419
420
Utility functions for GP modeling and visualization.
421
422
```python { .api }
423
def plot_gp_dist(ax, samples=[], plot_samples=True, palette='Reds',
424
fill_alpha=0.8, samples_alpha=0.7, fill_kwargs=None,
425
samples_kwargs=None):
426
"""
427
Plot GP distribution with uncertainty bands.
428
429
Visualizes GP posterior with credible intervals and
430
optional sample trajectories from the posterior.
431
432
Parameters:
433
- ax: matplotlib axis for plotting
434
- samples: array, GP sample trajectories
435
- plot_samples: bool, whether to plot individual samples
436
- palette: str, color palette for visualization
437
- fill_alpha: float, transparency for uncertainty bands
438
- samples_alpha: float, transparency for sample lines
439
- fill_kwargs: dict, arguments for uncertainty band plotting
440
- samples_kwargs: dict, arguments for sample line plotting
441
442
Returns:
443
- matplotlib artists: plot elements
444
"""
445
446
# GP utilities available in pm.gp.util module
447
def conditional_cov(Kxx, Kxz, Kzz_inv):
448
"""
449
Compute conditional covariance for GP prediction.
450
451
Parameters:
452
- Kxx: array, covariance between prediction points
453
- Kxz: array, cross-covariance between prediction and conditioning points
454
- Kzz_inv: array, inverse covariance of conditioning points
455
456
Returns:
457
- array: conditional covariance matrix
458
"""
459
460
def conditional_mean(Kxz, Kzz_inv, y):
461
"""
462
Compute conditional mean for GP prediction.
463
464
Parameters:
465
- Kxz: array, cross-covariance matrix
466
- Kzz_inv: array, inverse conditioning covariance
467
- y: array, conditioning outputs
468
469
Returns:
470
- array: conditional mean
471
"""
472
473
def stabilize(K, jitter=1e-6):
474
"""
475
Numerically stabilize covariance matrix.
476
477
Parameters:
478
- K: array, covariance matrix
479
- jitter: float, diagonal noise for numerical stability
480
481
Returns:
482
- array: stabilized covariance matrix
483
"""
484
```
485
486
## Usage Examples
487
488
### Basic GP Regression
489
490
```python
491
import pymc3 as pm
492
import numpy as np
493
import matplotlib.pyplot as plt
494
495
# Generate synthetic data
496
np.random.seed(42)
497
n = 50
498
X = np.linspace(0, 10, n)[:, None]
499
true_func = lambda x: np.sin(x) + 0.1 * x**2
500
y = true_func(X.ravel()) + np.random.normal(0, 0.1, n)
501
502
# GP regression model
503
with pm.Model() as gp_model:
504
# Covariance function parameters
505
ls = pm.Gamma('ls', alpha=2, beta=1) # length scale
506
sigma_f = pm.HalfNormal('sigma_f', sigma=1) # signal variance
507
508
# Covariance function
509
cov = sigma_f**2 * pm.gp.cov.ExpQuad(1, ls)
510
511
# GP prior
512
gp = pm.gp.Marginal(cov_func=cov)
513
514
# Observation noise
515
sigma_y = pm.HalfNormal('sigma_y', sigma=0.5)
516
517
# Marginal likelihood
518
y_obs = gp.marginal_likelihood('y_obs', X=X, y=y, noise=sigma_y)
519
520
# Inference
521
trace = pm.sample(1000, tune=1000, chains=2)
522
523
# Prediction
524
X_new = np.linspace(-1, 11, 100)[:, None]
525
with gp_model:
526
f_pred = gp.conditional('f_pred', X_new)
527
pred_samples = pm.sample_posterior_predictive(trace, vars=[f_pred], samples=100)
528
```
529
530
### GP Classification
531
532
```python
533
# GP for binary classification
534
n_train = 100
535
X_train = np.random.randn(n_train, 2)
536
true_boundary = lambda x: x[:, 0] + 0.5 * x[:, 1]**2
537
y_train = (true_boundary(X_train) > 0).astype(int)
538
539
with pm.Model() as gp_classification:
540
# GP parameters
541
ls = pm.Gamma('ls', alpha=2, beta=1, shape=2)
542
543
# Covariance function
544
cov = pm.gp.cov.ExpQuad(2, ls)
545
546
# Latent GP
547
gp = pm.gp.Latent(cov_func=cov)
548
549
# GP prior over latent function
550
f = gp.prior('f', X=X_train)
551
552
# Probit likelihood
553
y_obs = pm.Bernoulli('y_obs', p=pm.math.sigmoid(f), observed=y_train)
554
555
# Sampling
556
trace = pm.sample(1000, tune=1000)
557
558
# Prediction for classification
559
X_test = np.random.randn(50, 2)
560
with gp_classification:
561
f_test = gp.conditional('f_test', X_test)
562
test_samples = pm.sample_posterior_predictive(trace, vars=[f_test])
563
564
# Classification probabilities
565
prob_samples = pm.math.sigmoid(test_samples['f_test'])
566
mean_probs = prob_samples.mean(axis=0)
567
```
568
569
### Sparse GP for Large Datasets
570
571
```python
572
# Large dataset with sparse GP
573
n_large = 5000
574
X_large = np.random.uniform(0, 20, (n_large, 1))
575
y_large = np.sin(X_large.ravel()) + 0.2 * np.random.randn(n_large)
576
577
# Inducing points
578
n_inducing = 50
579
Xu = np.linspace(0, 20, n_inducing)[:, None]
580
581
with pm.Model() as sparse_gp:
582
# GP parameters
583
ls = pm.Gamma('ls', alpha=2, beta=1)
584
sigma_f = pm.HalfNormal('sigma_f', sigma=1)
585
586
# Covariance function
587
cov = sigma_f**2 * pm.gp.cov.Matern52(1, ls)
588
589
# Sparse GP
590
gp = pm.gp.MarginalSparse(cov_func=cov, approx='FITC')
591
592
# Noise parameter
593
sigma_y = pm.HalfNormal('sigma_y', sigma=1)
594
595
# Sparse marginal likelihood
596
y_obs = gp.marginal_likelihood('y_obs',
597
X=X_large,
598
Xu=Xu,
599
y=y_large,
600
noise=sigma_y)
601
602
# Efficient sampling
603
trace = pm.sample(500, tune=500)
604
```
605
606
### Multi-Output GP
607
608
```python
609
# Multi-output GP regression
610
n_outputs = 3
611
X_multi = np.linspace(0, 5, 30)[:, None]
612
613
# Correlated outputs
614
true_funcs = [
615
lambda x: np.sin(2*x),
616
lambda x: np.cos(2*x),
617
lambda x: np.sin(2*x) + np.cos(2*x)
618
]
619
620
Y_multi = np.column_stack([f(X_multi.ravel()) for f in true_funcs])
621
Y_multi += 0.1 * np.random.randn(*Y_multi.shape)
622
623
with pm.Model() as multi_gp:
624
# Shared covariance parameters
625
ls = pm.Gamma('ls', alpha=2, beta=1)
626
627
# Output-specific parameters
628
sigma_f = pm.HalfNormal('sigma_f', sigma=1, shape=n_outputs)
629
sigma_y = pm.HalfNormal('sigma_y', sigma=1, shape=n_outputs)
630
631
# Independent GPs for each output
632
gps = []
633
for i in range(n_outputs):
634
cov_i = sigma_f[i]**2 * pm.gp.cov.ExpQuad(1, ls)
635
gp_i = pm.gp.Marginal(cov_func=cov_i)
636
637
y_obs_i = gp_i.marginal_likelihood(f'y_obs_{i}',
638
X=X_multi,
639
y=Y_multi[:, i],
640
noise=sigma_y[i])
641
gps.append(gp_i)
642
643
trace = pm.sample(1000, tune=1000)
644
```
645
646
### Periodic GP
647
648
```python
649
# Periodic GP for seasonal data
650
t = np.linspace(0, 4*np.pi, 100)
651
y_seasonal = np.sin(t) + 0.5*np.cos(2*t) + 0.1*t + 0.1*np.random.randn(len(t))
652
653
with pm.Model() as periodic_gp:
654
# Periodic covariance parameters
655
period = pm.Gamma('period', alpha=10, beta=1.5) # Expected period ~ 2π
656
ls_periodic = pm.Gamma('ls_periodic', alpha=2, beta=1)
657
658
# Trend parameters
659
ls_trend = pm.Gamma('ls_trend', alpha=2, beta=0.5)
660
661
# Signal variances
662
sigma_periodic = pm.HalfNormal('sigma_periodic', sigma=1)
663
sigma_trend = pm.HalfNormal('sigma_trend', sigma=1)
664
665
# Combined covariance: periodic + trend
666
cov_periodic = sigma_periodic**2 * pm.gp.cov.Periodic(1, period=period, ls=ls_periodic)
667
cov_trend = sigma_trend**2 * pm.gp.cov.ExpQuad(1, ls_trend)
668
cov_combined = cov_periodic + cov_trend
669
670
# GP model
671
gp = pm.gp.Marginal(cov_func=cov_combined)
672
673
# Observation noise
674
sigma_y = pm.HalfNormal('sigma_y', sigma=0.2)
675
676
# Likelihood
677
X_time = t[:, None]
678
y_obs = gp.marginal_likelihood('y_obs', X=X_time, y=y_seasonal, noise=sigma_y)
679
680
trace = pm.sample(1000, tune=1000)
681
682
# Extrapolate to future
683
t_future = np.linspace(0, 6*np.pi, 150)[:, None]
684
with periodic_gp:
685
f_future = gp.conditional('f_future', t_future)
686
future_pred = pm.sample_posterior_predictive(trace, vars=[f_future])
687
```
688
689
### Non-Stationary GP
690
691
```python
692
# Non-stationary GP using input warping
693
X_nonstat = np.linspace(0, 10, 60)[:, None]
694
# Function with varying smoothness
695
y_nonstat = np.where(X_nonstat.ravel() < 5,
696
np.sin(5*X_nonstat.ravel()), # High frequency
697
np.sin(X_nonstat.ravel())) # Low frequency
698
y_nonstat += 0.1 * np.random.randn(len(y_nonstat))
699
700
with pm.Model() as nonstat_gp:
701
# Warping function parameters
702
alpha = pm.Normal('alpha', mu=0, sigma=1)
703
beta = pm.HalfNormal('beta', sigma=1)
704
705
# Input warping: stretch inputs differently in different regions
706
def warp_func(x, alpha, beta):
707
return x + alpha * pm.math.tanh(beta * (x - 5))
708
709
# Base covariance function
710
ls = pm.Gamma('ls', alpha=2, beta=1)
711
sigma_f = pm.HalfNormal('sigma_f', sigma=1)
712
base_cov = sigma_f**2 * pm.gp.cov.ExpQuad(1, ls)
713
714
# Warped covariance
715
cov_warped = pm.gp.cov.WarpedInput(base_cov, warp_func, (alpha, beta))
716
717
# GP with warped inputs
718
gp = pm.gp.Marginal(cov_func=cov_warped)
719
720
# Observation noise
721
sigma_y = pm.HalfNormal('sigma_y', sigma=0.2)
722
723
# Likelihood
724
y_obs = gp.marginal_likelihood('y_obs', X=X_nonstat, y=y_nonstat, noise=sigma_y)
725
726
trace = pm.sample(1000, tune=1000)
727
```
728
729
### Hierarchical GP
730
731
```python
732
# Hierarchical GP with group-specific parameters
733
n_groups = 4
734
n_per_group = 25
735
736
# Generate group data with different characteristics
737
group_data = []
738
group_labels = []
739
for g in range(n_groups):
740
X_g = np.random.uniform(0, 10, n_per_group)[:, None]
741
# Different length scales per group
742
ls_true = 1 + g * 0.5
743
y_g = np.sin(X_g.ravel() / ls_true) + 0.1 * np.random.randn(n_per_group)
744
745
group_data.append((X_g, y_g))
746
group_labels.extend([g] * n_per_group)
747
748
# Combine data
749
X_all = np.vstack([X for X, y in group_data])
750
y_all = np.hstack([y for X, y in group_data])
751
group_idx = np.array(group_labels)
752
753
with pm.Model() as hierarchical_gp:
754
# Hierarchical priors for length scales
755
mu_ls = pm.Gamma('mu_ls', alpha=2, beta=1)
756
sigma_ls = pm.HalfNormal('sigma_ls', sigma=1)
757
ls_group = pm.Gamma('ls_group', alpha=2, beta=1/sigma_ls, shape=n_groups)
758
759
# Shared signal variance
760
sigma_f = pm.HalfNormal('sigma_f', sigma=1)
761
762
# Group-specific GPs
763
gps = []
764
for g in range(n_groups):
765
# Group mask
766
mask = group_idx == g
767
X_g = X_all[mask]
768
y_g = y_all[mask]
769
770
# Group covariance function
771
cov_g = sigma_f**2 * pm.gp.cov.ExpQuad(1, ls_group[g])
772
gp_g = pm.gp.Marginal(cov_func=cov_g)
773
774
# Group likelihood
775
sigma_y = pm.HalfNormal(f'sigma_y_{g}', sigma=0.5)
776
y_obs_g = gp_g.marginal_likelihood(f'y_obs_{g}',
777
X=X_g,
778
y=y_g,
779
noise=sigma_y)
780
gps.append(gp_g)
781
782
trace = pm.sample(1000, tune=1000)
783
784
# Analyze group differences
785
ls_posterior = trace['ls_group']
786
print("Group length scales (posterior means):")
787
for g in range(n_groups):
788
print(f"Group {g}: {ls_posterior[:, g].mean():.3f}")
789
```