0
# Gaussian Processes
1
2
Gaussian process models for non-parametric Bayesian modeling, providing flexible function approximation with uncertainty quantification through kernels, likelihoods, and efficient inference algorithms.
3
4
## Capabilities
5
6
### Kernel Functions
7
8
Kernel functions that define the covariance structure and prior assumptions about function smoothness and behavior in Gaussian process models.
9
10
```python { .api }
11
class Kernel:
12
"""
13
Base class for Gaussian process kernel functions.
14
15
Kernels define the covariance structure of Gaussian processes by
16
specifying how similar function values should be at different inputs.
17
"""
18
19
def forward(self, X: torch.Tensor, Z: torch.Tensor = None, diag: bool = False) -> torch.Tensor:
20
"""
21
Compute kernel matrix or diagonal.
22
23
Parameters:
24
- X (Tensor): First set of inputs of shape (n, input_dim)
25
- Z (Tensor, optional): Second set of inputs of shape (m, input_dim).
26
If None, uses X for both arguments (computes K(X, X))
27
- diag (bool): If True, return only diagonal elements as vector
28
29
Returns:
30
Tensor: Kernel matrix of shape (n, m) or diagonal vector of shape (n,)
31
32
Examples:
33
>>> kernel = RBF(input_dim=2)
34
>>> X = torch.randn(10, 2)
35
>>> K = kernel.forward(X) # Shape: (10, 10)
36
>>> diag_K = kernel.forward(X, diag=True) # Shape: (10,)
37
"""
38
39
class RBF(Kernel):
40
"""
41
Radial Basis Function (RBF) kernel, also known as Gaussian or squared exponential kernel.
42
43
k(x, x') = variance * exp(-0.5 * ||x - x'||^2 / lengthscale^2)
44
45
Encodes smooth function assumptions with characteristic lengthscale.
46
"""
47
48
def __init__(self, input_dim: int, variance: torch.Tensor = None,
49
lengthscale: torch.Tensor = None, active_dims: List[int] = None):
50
"""
51
Parameters:
52
- input_dim (int): Input dimensionality
53
- variance (Tensor, optional): Kernel variance/amplitude parameter
54
- lengthscale (Tensor, optional): Characteristic lengthscale parameter(s)
55
- active_dims (List[int], optional): Dimensions to apply kernel to
56
57
Examples:
58
>>> # Isotropic RBF (same lengthscale for all dimensions)
59
>>> rbf = RBF(input_dim=3, variance=2.0, lengthscale=1.5)
60
>>>
61
>>> # Anisotropic RBF (different lengthscale per dimension)
62
>>> rbf = RBF(input_dim=3, lengthscale=torch.tensor([1.0, 2.0, 0.5]))
63
>>>
64
>>> # Apply to subset of dimensions
65
>>> rbf = RBF(input_dim=5, active_dims=[0, 2, 4])
66
"""
67
68
class Matern32(Kernel):
69
"""
70
Matérn kernel with smoothness parameter ν = 3/2.
71
72
k(x, x') = variance * (1 + sqrt(3) * r) * exp(-sqrt(3) * r)
73
where r = ||x - x'|| / lengthscale
74
75
Less smooth than RBF, allowing for more flexible function shapes.
76
"""
77
78
def __init__(self, input_dim: int, variance: torch.Tensor = None,
79
lengthscale: torch.Tensor = None, active_dims: List[int] = None):
80
"""
81
Parameters:
82
- input_dim (int): Input dimensionality
83
- variance (Tensor, optional): Kernel variance parameter
84
- lengthscale (Tensor, optional): Characteristic lengthscale
85
- active_dims (List[int], optional): Active input dimensions
86
"""
87
88
class Matern52(Kernel):
89
"""
90
Matérn kernel with smoothness parameter ν = 5/2.
91
92
k(x, x') = variance * (1 + sqrt(5) * r + 5/3 * r^2) * exp(-sqrt(5) * r)
93
where r = ||x - x'|| / lengthscale
94
95
Smoother than Matern32 but less smooth than RBF.
96
"""
97
98
def __init__(self, input_dim: int, variance: torch.Tensor = None,
99
lengthscale: torch.Tensor = None, active_dims: List[int] = None):
100
"""Parameters same as Matern32."""
101
102
class Exponential(Kernel):
103
"""
104
Exponential kernel (Matérn with ν = 1/2).
105
106
k(x, x') = variance * exp(-r)
107
where r = ||x - x'|| / lengthscale
108
109
Generates rough, non-differentiable functions.
110
"""
111
112
def __init__(self, input_dim: int, variance: torch.Tensor = None,
113
lengthscale: torch.Tensor = None, active_dims: List[int] = None):
114
"""Parameters same as RBF."""
115
116
class Linear(Kernel):
117
"""
118
Linear kernel for linear function relationships.
119
120
k(x, x') = variance * x^T * x'
121
122
Encodes linear function assumptions.
123
"""
124
125
def __init__(self, input_dim: int, variance: torch.Tensor = None,
126
active_dims: List[int] = None):
127
"""
128
Parameters:
129
- input_dim (int): Input dimensionality
130
- variance (Tensor, optional): Kernel variance parameter
131
- active_dims (List[int], optional): Active input dimensions
132
"""
133
134
class Polynomial(Kernel):
135
"""
136
Polynomial kernel for polynomial function relationships.
137
138
k(x, x') = (variance * x^T * x' + bias)^degree
139
"""
140
141
def __init__(self, input_dim: int, degree: int = 2, variance: torch.Tensor = None,
142
bias: torch.Tensor = None, active_dims: List[int] = None):
143
"""
144
Parameters:
145
- input_dim (int): Input dimensionality
146
- degree (int): Polynomial degree
147
- variance (Tensor, optional): Kernel variance
148
- bias (Tensor, optional): Bias term
149
- active_dims (List[int], optional): Active dimensions
150
"""
151
152
class Periodic(Kernel):
153
"""
154
Periodic kernel for periodic function patterns.
155
156
k(x, x') = variance * exp(-2 * sin^2(π * ||x - x'|| / period) / lengthscale^2)
157
158
Encodes periodic structure with specified period.
159
"""
160
161
def __init__(self, input_dim: int, period: torch.Tensor, variance: torch.Tensor = None,
162
lengthscale: torch.Tensor = None, active_dims: List[int] = None):
163
"""
164
Parameters:
165
- input_dim (int): Input dimensionality
166
- period (Tensor): Period parameter for periodic structure
167
- variance (Tensor, optional): Kernel variance
168
- lengthscale (Tensor, optional): Lengthscale within periods
169
- active_dims (List[int], optional): Active dimensions
170
171
Examples:
172
>>> # Daily periodic pattern
173
>>> periodic = Periodic(input_dim=1, period=24.0)
174
>>>
175
>>> # Seasonal pattern
176
>>> periodic = Periodic(input_dim=1, period=365.25)
177
"""
178
179
class WhiteNoise(Kernel):
180
"""
181
White noise kernel for independent noise.
182
183
k(x, x') = variance * δ(x, x')
184
where δ is Kronecker delta (1 if x == x', 0 otherwise)
185
186
Models independent noise at each point.
187
"""
188
189
def __init__(self, input_dim: int, variance: torch.Tensor = None,
190
active_dims: List[int] = None):
191
"""
192
Parameters:
193
- input_dim (int): Input dimensionality
194
- variance (Tensor, optional): Noise variance
195
- active_dims (List[int], optional): Active dimensions
196
"""
197
198
class Constant(Kernel):
199
"""
200
Constant kernel that returns constant covariance.
201
202
k(x, x') = variance
203
204
Models constant function assumptions.
205
"""
206
207
def __init__(self, input_dim: int, variance: torch.Tensor = None,
208
active_dims: List[int] = None):
209
"""
210
Parameters:
211
- input_dim (int): Input dimensionality
212
- variance (Tensor, optional): Constant variance value
213
- active_dims (List[int], optional): Active dimensions
214
"""
215
```
216
217
### Kernel Operations
218
219
Operations for combining and modifying kernels to create complex covariance structures.
220
221
```python { .api }
222
class Sum(Kernel):
223
"""
224
Sum of multiple kernels: k(x, x') = k1(x, x') + k2(x, x') + ...
225
226
Combines different kernel behaviors additively.
227
"""
228
229
def __init__(self, kern1: Kernel, kern2: Kernel):
230
"""
231
Parameters:
232
- kern1 (Kernel): First kernel
233
- kern2 (Kernel): Second kernel
234
235
Examples:
236
>>> # Combine RBF and periodic components
237
>>> rbf = RBF(input_dim=1, lengthscale=1.0)
238
>>> periodic = Periodic(input_dim=1, period=12.0)
239
>>> combined = Sum(rbf, periodic)
240
"""
241
242
class Product(Kernel):
243
"""
244
Product of multiple kernels: k(x, x') = k1(x, x') * k2(x, x') * ...
245
246
Combines kernel behaviors multiplicatively.
247
"""
248
249
def __init__(self, kern1: Kernel, kern2: Kernel):
250
"""
251
Parameters:
252
- kern1 (Kernel): First kernel
253
- kern2 (Kernel): Second kernel
254
255
Examples:
256
>>> # Modulate RBF with periodic structure
257
>>> rbf = RBF(input_dim=1, lengthscale=2.0)
258
>>> periodic = Periodic(input_dim=1, period=7.0)
259
>>> modulated = Product(rbf, periodic)
260
"""
261
262
class Exponent(Kernel):
263
"""
264
Exponentiated kernel: k(x, x') = k_base(x, x')^exponent
265
266
Raises kernel values to a power.
267
"""
268
269
def __init__(self, kernel: Kernel, exponent: float):
270
"""
271
Parameters:
272
- kernel (Kernel): Base kernel
273
- exponent (float): Exponent value
274
"""
275
276
class VerticalScaling(Kernel):
277
"""
278
Vertically scale kernel: k(x, x') = scale * k_base(x, x')
279
280
Multiplies kernel by a scaling factor.
281
"""
282
283
def __init__(self, kernel: Kernel, scale: torch.Tensor):
284
"""
285
Parameters:
286
- kernel (Kernel): Base kernel to scale
287
- scale (Tensor): Scaling factor
288
"""
289
290
class Warping(Kernel):
291
"""
292
Apply input warping to kernel: k(x, x') = k_base(f(x), f(x'))
293
294
Transforms inputs before applying base kernel.
295
"""
296
297
def __init__(self, kernel: Kernel, iwarping_fn: callable):
298
"""
299
Parameters:
300
- kernel (Kernel): Base kernel
301
- iwarping_fn (callable): Input warping function
302
"""
303
```
304
305
### Likelihood Functions
306
307
Likelihood functions that define the observation model relating GP function values to observed data.
308
309
```python { .api }
310
class Likelihood:
311
"""
312
Base class for GP likelihood functions.
313
314
Defines how GP function values relate to observed data,
315
including noise models and observation transformations.
316
"""
317
318
def forward(self, function_dist: dist.Distribution, y: torch.Tensor = None) -> dist.Distribution:
319
"""
320
Forward pass through likelihood.
321
322
Parameters:
323
- function_dist (Distribution): GP function value distribution
324
- y (Tensor, optional): Observed data
325
326
Returns:
327
Distribution: Observation distribution
328
"""
329
330
class Gaussian(Likelihood):
331
"""
332
Gaussian likelihood for continuous observations with additive noise.
333
334
y = f(x) + ε, where ε ~ N(0, noise_variance)
335
336
Most common likelihood for regression problems.
337
"""
338
339
def __init__(self, noise: torch.Tensor = None, name: str = "Gaussian"):
340
"""
341
Parameters:
342
- noise (Tensor, optional): Noise variance parameter
343
- name (str): Likelihood name for parameter scoping
344
345
Examples:
346
>>> # Fixed noise variance
347
>>> likelihood = Gaussian(noise=0.1)
348
>>>
349
>>> # Learnable noise variance
350
>>> likelihood = Gaussian() # noise will be learned
351
"""
352
353
class Bernoulli(Likelihood):
354
"""
355
Bernoulli likelihood for binary classification.
356
357
p(y = 1 | f) = σ(f) where σ is sigmoid function
358
359
Maps GP function values to class probabilities.
360
"""
361
362
def __init__(self, name: str = "Bernoulli"):
363
"""
364
Parameters:
365
- name (str): Likelihood name for parameter scoping
366
"""
367
368
class Poisson(Likelihood):
369
"""
370
Poisson likelihood for count data.
371
372
p(y | f) = Poisson(exp(f))
373
374
Uses log-link to ensure positive rate parameter.
375
"""
376
377
def __init__(self, name: str = "Poisson"):
378
"""
379
Parameters:
380
- name (str): Likelihood name for parameter scoping
381
"""
382
383
class Beta(Likelihood):
384
"""
385
Beta likelihood for data on unit interval.
386
387
Useful for modeling proportions, rates, or probabilities.
388
"""
389
390
def __init__(self, name: str = "Beta"):
391
"""
392
Parameters:
393
- name (str): Likelihood name for parameter scoping
394
"""
395
396
class Gamma(Likelihood):
397
"""
398
Gamma likelihood for positive continuous data.
399
400
Useful for modeling positive quantities like waiting times.
401
"""
402
403
def __init__(self, name: str = "Gamma"):
404
"""
405
Parameters:
406
- name (str): Likelihood name for parameter scoping
407
"""
408
```
409
410
### GP Models
411
412
Complete Gaussian process models combining kernels and likelihoods for different modeling scenarios.
413
414
```python { .api }
415
class GPModel:
416
"""
417
Base Gaussian process model class.
418
419
Combines kernel functions and likelihood models to create
420
complete GP models for regression and classification.
421
"""
422
423
def __init__(self, X: torch.Tensor, y: torch.Tensor, kernel: Kernel,
424
likelihood: Likelihood, name: str = "GPModel"):
425
"""
426
Parameters:
427
- X (Tensor): Training inputs of shape (n, input_dim)
428
- y (Tensor): Training outputs of shape (n,) or (n, output_dim)
429
- kernel (Kernel): Covariance kernel function
430
- likelihood (Likelihood): Observation likelihood model
431
- name (str): Model name for parameter scoping
432
433
Examples:
434
>>> X_train = torch.randn(100, 2)
435
>>> y_train = torch.randn(100)
436
>>> kernel = RBF(input_dim=2)
437
>>> likelihood = Gaussian()
438
>>> gp = GPModel(X_train, y_train, kernel, likelihood)
439
"""
440
441
def model(self):
442
"""Define the GP generative model."""
443
444
def guide(self):
445
"""Define variational guide for approximate inference."""
446
447
def forward(self, X_new: torch.Tensor, full_cov: bool = False, noiseless: bool = True) -> dist.Distribution:
448
"""
449
Make predictions at new input locations.
450
451
Parameters:
452
- X_new (Tensor): New input locations of shape (m, input_dim)
453
- full_cov (bool): Whether to return full covariance matrix
454
- noiseless (bool): Whether to exclude observation noise from predictions
455
456
Returns:
457
Distribution: Predictive distribution at new locations
458
459
Examples:
460
>>> X_test = torch.randn(20, 2)
461
>>> pred_dist = gp.forward(X_test)
462
>>> pred_mean = pred_dist.mean
463
>>> pred_var = pred_dist.variance
464
"""
465
466
class VariationalGP(GPModel):
467
"""
468
Variational Gaussian process for scalable inference.
469
470
Uses sparse GP approximations with inducing points for
471
efficient inference on large datasets.
472
"""
473
474
def __init__(self, X: torch.Tensor, y: torch.Tensor, kernel: Kernel,
475
likelihood: Likelihood, X_u: torch.Tensor = None,
476
num_inducing: int = None, name: str = "VariationalGP"):
477
"""
478
Parameters:
479
- X (Tensor): Training inputs
480
- y (Tensor): Training outputs
481
- kernel (Kernel): Covariance kernel
482
- likelihood (Likelihood): Observation likelihood
483
- X_u (Tensor, optional): Inducing input locations
484
- num_inducing (int, optional): Number of inducing points (if X_u not provided)
485
- name (str): Model name
486
487
Examples:
488
>>> # Large dataset GP with 50 inducing points
489
>>> X_train = torch.randn(10000, 3)
490
>>> y_train = torch.randn(10000)
491
>>> vgp = VariationalGP(X_train, y_train, RBF(3), Gaussian(), num_inducing=50)
492
"""
493
494
class SparseGPRegression(VariationalGP):
495
"""
496
Sparse GP regression model using variational inference.
497
498
Optimized for regression tasks with Gaussian likelihoods
499
and large datasets.
500
"""
501
pass
502
503
class VariationalSparseGP(VariationalGP):
504
"""
505
General variational sparse GP with flexible likelihoods.
506
507
Supports non-Gaussian likelihoods through variational inference.
508
"""
509
pass
510
```
511
512
### Multi-Output GPs
513
514
Models for handling multiple outputs and structured output spaces.
515
516
```python { .api }
517
class MultiOutputGP(GPModel):
518
"""
519
Multi-output Gaussian process for vector-valued functions.
520
521
Models correlations between different output dimensions
522
using appropriate kernel structures.
523
"""
524
525
def __init__(self, X: torch.Tensor, y: torch.Tensor, kernel: Kernel,
526
likelihood: Likelihood, num_outputs: int, name: str = "MultiOutputGP"):
527
"""
528
Parameters:
529
- X (Tensor): Training inputs
530
- y (Tensor): Training outputs of shape (n, num_outputs)
531
- kernel (Kernel): Base kernel (will be extended for multiple outputs)
532
- likelihood (Likelihood): Output likelihood
533
- num_outputs (int): Number of output dimensions
534
- name (str): Model name
535
"""
536
537
class VariationalMultiOutputGP(MultiOutputGP):
538
"""
539
Variational multi-output GP for scalable multi-output modeling.
540
541
Combines multi-output structure with sparse GP approximations.
542
"""
543
pass
544
```
545
546
### Utility Functions
547
548
Helper functions and utilities for GP modeling and inference.
549
550
```python { .api }
551
def conditional(X_new: torch.Tensor, X_train: torch.Tensor, kernel: Kernel,
552
f_loc: torch.Tensor, f_scale_tril: torch.Tensor = None,
553
full_cov: bool = False, whiten: bool = False,
554
jitter: float = 1e-6) -> dist.Distribution:
555
"""
556
Compute conditional GP distribution p(f* | f, X*, X).
557
558
Parameters:
559
- X_new (Tensor): Test input locations
560
- X_train (Tensor): Training input locations
561
- kernel (Kernel): Covariance kernel
562
- f_loc (Tensor): Mean of training function values
563
- f_scale_tril (Tensor, optional): Cholesky factor of training covariance
564
- full_cov (bool): Whether to return full covariance
565
- whiten (bool): Whether to use whitened parameterization
566
- jitter (float): Jitter for numerical stability
567
568
Returns:
569
Distribution: Conditional GP distribution
570
"""
571
572
def util_gp_prior(X: torch.Tensor, kernel: Kernel, jitter: float = 1e-6) -> dist.Distribution:
573
"""
574
Compute GP prior distribution at given input locations.
575
576
Parameters:
577
- X (Tensor): Input locations
578
- kernel (Kernel): Covariance kernel
579
- jitter (float): Diagonal jitter for numerical stability
580
581
Returns:
582
Distribution: GP prior distribution
583
"""
584
585
def train_gp(gp_model: GPModel, optimizer, num_steps: int = 1000,
586
retain_graph: bool = False) -> List[float]:
587
"""
588
Train GP model using optimization.
589
590
Parameters:
591
- gp_model (GPModel): GP model to train
592
- optimizer: PyTorch optimizer
593
- num_steps (int): Number of optimization steps
594
- retain_graph (bool): Whether to retain computation graph
595
596
Returns:
597
List[float]: Training loss history
598
599
Examples:
600
>>> optimizer = torch.optim.Adam(gp.parameters(), lr=0.01)
601
>>> losses = train_gp(gp, optimizer, num_steps=500)
602
"""
603
```
604
605
## Examples
606
607
### Basic GP Regression
608
609
```python
610
import torch
611
import pyro
612
import pyro.distributions as dist
613
from pyro.contrib.gp import GPModel
614
from pyro.contrib.gp.kernels import RBF
615
from pyro.contrib.gp.likelihoods import Gaussian
616
617
# Generate training data
618
X_train = torch.linspace(0, 10, 50).unsqueeze(-1)
619
y_train = torch.sin(X_train.squeeze()) + 0.1 * torch.randn(50)
620
621
# Define GP model
622
kernel = RBF(input_dim=1, lengthscale=1.0, variance=1.0)
623
likelihood = Gaussian(noise=0.1)
624
gp = GPModel(X_train, y_train, kernel, likelihood)
625
626
# Training
627
optimizer = torch.optim.Adam(gp.parameters(), lr=0.01)
628
for i in range(1000):
629
optimizer.zero_grad()
630
loss = -gp.model().log_prob(y_train)
631
loss.backward()
632
optimizer.step()
633
634
# Prediction
635
X_test = torch.linspace(0, 12, 100).unsqueeze(-1)
636
with torch.no_grad():
637
pred_dist = gp.forward(X_test)
638
pred_mean = pred_dist.mean
639
pred_std = pred_dist.stddev
640
```
641
642
### Sparse GP for Large Datasets
643
644
```python
645
from pyro.contrib.gp import VariationalGP
646
647
# Large dataset
648
X_train = torch.randn(10000, 3)
649
y_train = torch.sin(X_train.sum(dim=1)) + 0.1 * torch.randn(10000)
650
651
# Sparse GP with inducing points
652
kernel = RBF(input_dim=3)
653
likelihood = Gaussian()
654
sparse_gp = VariationalGP(X_train, y_train, kernel, likelihood, num_inducing=100)
655
656
# Training with SVI
657
from pyro.infer import SVI, Trace_ELBO
658
from pyro.optim import Adam
659
660
svi = SVI(sparse_gp.model, sparse_gp.guide, Adam({"lr": 0.01}), Trace_ELBO())
661
662
for step in range(2000):
663
loss = svi.step()
664
if step % 200 == 0:
665
print(f"Step {step}, Loss: {loss}")
666
```
667
668
### Multi-Output GP
669
670
```python
671
from pyro.contrib.gp import MultiOutputGP
672
673
# Multi-dimensional output data
674
X_train = torch.randn(200, 2)
675
y_train = torch.randn(200, 3) # 3-dimensional output
676
677
# Multi-output GP
678
kernel = RBF(input_dim=2)
679
likelihood = Gaussian()
680
multi_gp = MultiOutputGP(X_train, y_train, kernel, likelihood, num_outputs=3)
681
682
# Training and prediction similar to basic GP
683
```
684
685
### Custom Kernel Combination
686
687
```python
688
from pyro.contrib.gp.kernels import RBF, Periodic, WhiteNoise, Sum, Product
689
690
# Complex kernel combining multiple components
691
base_kernel = RBF(input_dim=1, lengthscale=2.0, variance=1.0)
692
periodic_kernel = Periodic(input_dim=1, period=7.0, lengthscale=1.0)
693
noise_kernel = WhiteNoise(input_dim=1, variance=0.1)
694
695
# Combine kernels: RBF + Periodic pattern + independent noise
696
combined_kernel = Sum(Sum(base_kernel, periodic_kernel), noise_kernel)
697
698
# Use in GP model
699
gp = GPModel(X_train, y_train, combined_kernel, Gaussian())
700
```
701
702
### GP Classification
703
704
```python
705
from pyro.contrib.gp.likelihoods import Bernoulli
706
707
# Binary classification data
708
X_train = torch.randn(100, 2)
709
y_train = torch.randint(0, 2, (100,)).float()
710
711
# GP classifier
712
kernel = RBF(input_dim=2)
713
likelihood = Bernoulli()
714
gp_classifier = GPModel(X_train, y_train, kernel, likelihood)
715
716
# Training requires variational inference for non-Gaussian likelihood
717
from pyro.infer import SVI, Trace_ELBO
718
from pyro.optim import Adam
719
720
svi = SVI(gp_classifier.model, gp_classifier.guide, Adam({"lr": 0.01}), Trace_ELBO())
721
722
for step in range(1000):
723
loss = svi.step()
724
```