A flexible, scalable deep probabilistic programming library built on PyTorch for universal probabilistic modeling and inference
—
Gaussian process models for non-parametric Bayesian modeling, providing flexible function approximation with uncertainty quantification through kernels, likelihoods, and efficient inference algorithms.
Kernel functions that define the covariance structure and prior assumptions about function smoothness and behavior in Gaussian process models.
class Kernel:
"""
Base class for Gaussian process kernel functions.
Kernels define the covariance structure of Gaussian processes by
specifying how similar function values should be at different inputs.
"""
def forward(self, X: torch.Tensor, Z: torch.Tensor = None, diag: bool = False) -> torch.Tensor:
"""
Compute kernel matrix or diagonal.
Parameters:
- X (Tensor): First set of inputs of shape (n, input_dim)
- Z (Tensor, optional): Second set of inputs of shape (m, input_dim).
If None, uses X for both arguments (computes K(X, X))
- diag (bool): If True, return only diagonal elements as vector
Returns:
Tensor: Kernel matrix of shape (n, m) or diagonal vector of shape (n,)
Examples:
>>> kernel = RBF(input_dim=2)
>>> X = torch.randn(10, 2)
>>> K = kernel.forward(X) # Shape: (10, 10)
>>> diag_K = kernel.forward(X, diag=True) # Shape: (10,)
"""
class RBF(Kernel):
"""
Radial Basis Function (RBF) kernel, also known as Gaussian or squared exponential kernel.
k(x, x') = variance * exp(-0.5 * ||x - x'||^2 / lengthscale^2)
Encodes smooth function assumptions with characteristic lengthscale.
"""
def __init__(self, input_dim: int, variance: torch.Tensor = None,
lengthscale: torch.Tensor = None, active_dims: List[int] = None):
"""
Parameters:
- input_dim (int): Input dimensionality
- variance (Tensor, optional): Kernel variance/amplitude parameter
- lengthscale (Tensor, optional): Characteristic lengthscale parameter(s)
- active_dims (List[int], optional): Dimensions to apply kernel to
Examples:
>>> # Isotropic RBF (same lengthscale for all dimensions)
>>> rbf = RBF(input_dim=3, variance=2.0, lengthscale=1.5)
>>>
>>> # Anisotropic RBF (different lengthscale per dimension)
>>> rbf = RBF(input_dim=3, lengthscale=torch.tensor([1.0, 2.0, 0.5]))
>>>
>>> # Apply to subset of dimensions
>>> rbf = RBF(input_dim=5, active_dims=[0, 2, 4])
"""
class Matern32(Kernel):
"""
Matérn kernel with smoothness parameter ν = 3/2.
k(x, x') = variance * (1 + sqrt(3) * r) * exp(-sqrt(3) * r)
where r = ||x - x'|| / lengthscale
Less smooth than RBF, allowing for more flexible function shapes.
"""
def __init__(self, input_dim: int, variance: torch.Tensor = None,
lengthscale: torch.Tensor = None, active_dims: List[int] = None):
"""
Parameters:
- input_dim (int): Input dimensionality
- variance (Tensor, optional): Kernel variance parameter
- lengthscale (Tensor, optional): Characteristic lengthscale
- active_dims (List[int], optional): Active input dimensions
"""
class Matern52(Kernel):
"""
Matérn kernel with smoothness parameter ν = 5/2.
k(x, x') = variance * (1 + sqrt(5) * r + 5/3 * r^2) * exp(-sqrt(5) * r)
where r = ||x - x'|| / lengthscale
Smoother than Matern32 but less smooth than RBF.
"""
def __init__(self, input_dim: int, variance: torch.Tensor = None,
lengthscale: torch.Tensor = None, active_dims: List[int] = None):
"""Parameters same as Matern32."""
class Exponential(Kernel):
"""
Exponential kernel (Matérn with ν = 1/2).
k(x, x') = variance * exp(-r)
where r = ||x - x'|| / lengthscale
Generates rough, non-differentiable functions.
"""
def __init__(self, input_dim: int, variance: torch.Tensor = None,
lengthscale: torch.Tensor = None, active_dims: List[int] = None):
"""Parameters same as RBF."""
class Linear(Kernel):
"""
Linear kernel for linear function relationships.
k(x, x') = variance * x^T * x'
Encodes linear function assumptions.
"""
def __init__(self, input_dim: int, variance: torch.Tensor = None,
active_dims: List[int] = None):
"""
Parameters:
- input_dim (int): Input dimensionality
- variance (Tensor, optional): Kernel variance parameter
- active_dims (List[int], optional): Active input dimensions
"""
class Polynomial(Kernel):
"""
Polynomial kernel for polynomial function relationships.
k(x, x') = (variance * x^T * x' + bias)^degree
"""
def __init__(self, input_dim: int, degree: int = 2, variance: torch.Tensor = None,
bias: torch.Tensor = None, active_dims: List[int] = None):
"""
Parameters:
- input_dim (int): Input dimensionality
- degree (int): Polynomial degree
- variance (Tensor, optional): Kernel variance
- bias (Tensor, optional): Bias term
- active_dims (List[int], optional): Active dimensions
"""
class Periodic(Kernel):
"""
Periodic kernel for periodic function patterns.
k(x, x') = variance * exp(-2 * sin^2(π * ||x - x'|| / period) / lengthscale^2)
Encodes periodic structure with specified period.
"""
def __init__(self, input_dim: int, period: torch.Tensor, variance: torch.Tensor = None,
lengthscale: torch.Tensor = None, active_dims: List[int] = None):
"""
Parameters:
- input_dim (int): Input dimensionality
- period (Tensor): Period parameter for periodic structure
- variance (Tensor, optional): Kernel variance
- lengthscale (Tensor, optional): Lengthscale within periods
- active_dims (List[int], optional): Active dimensions
Examples:
>>> # Daily periodic pattern
>>> periodic = Periodic(input_dim=1, period=24.0)
>>>
>>> # Seasonal pattern
>>> periodic = Periodic(input_dim=1, period=365.25)
"""
class WhiteNoise(Kernel):
"""
White noise kernel for independent noise.
k(x, x') = variance * δ(x, x')
where δ is Kronecker delta (1 if x == x', 0 otherwise)
Models independent noise at each point.
"""
def __init__(self, input_dim: int, variance: torch.Tensor = None,
active_dims: List[int] = None):
"""
Parameters:
- input_dim (int): Input dimensionality
- variance (Tensor, optional): Noise variance
- active_dims (List[int], optional): Active dimensions
"""
class Constant(Kernel):
"""
Constant kernel that returns constant covariance.
k(x, x') = variance
Models constant function assumptions.
"""
def __init__(self, input_dim: int, variance: torch.Tensor = None,
active_dims: List[int] = None):
"""
Parameters:
- input_dim (int): Input dimensionality
- variance (Tensor, optional): Constant variance value
- active_dims (List[int], optional): Active dimensions
"""Operations for combining and modifying kernels to create complex covariance structures.
class Sum(Kernel):
"""
Sum of multiple kernels: k(x, x') = k1(x, x') + k2(x, x') + ...
Combines different kernel behaviors additively.
"""
def __init__(self, kern1: Kernel, kern2: Kernel):
"""
Parameters:
- kern1 (Kernel): First kernel
- kern2 (Kernel): Second kernel
Examples:
>>> # Combine RBF and periodic components
>>> rbf = RBF(input_dim=1, lengthscale=1.0)
>>> periodic = Periodic(input_dim=1, period=12.0)
>>> combined = Sum(rbf, periodic)
"""
class Product(Kernel):
"""
Product of multiple kernels: k(x, x') = k1(x, x') * k2(x, x') * ...
Combines kernel behaviors multiplicatively.
"""
def __init__(self, kern1: Kernel, kern2: Kernel):
"""
Parameters:
- kern1 (Kernel): First kernel
- kern2 (Kernel): Second kernel
Examples:
>>> # Modulate RBF with periodic structure
>>> rbf = RBF(input_dim=1, lengthscale=2.0)
>>> periodic = Periodic(input_dim=1, period=7.0)
>>> modulated = Product(rbf, periodic)
"""
class Exponent(Kernel):
"""
Exponentiated kernel: k(x, x') = k_base(x, x')^exponent
Raises kernel values to a power.
"""
def __init__(self, kernel: Kernel, exponent: float):
"""
Parameters:
- kernel (Kernel): Base kernel
- exponent (float): Exponent value
"""
class VerticalScaling(Kernel):
"""
Vertically scale kernel: k(x, x') = scale * k_base(x, x')
Multiplies kernel by a scaling factor.
"""
def __init__(self, kernel: Kernel, scale: torch.Tensor):
"""
Parameters:
- kernel (Kernel): Base kernel to scale
- scale (Tensor): Scaling factor
"""
class Warping(Kernel):
"""
Apply input warping to kernel: k(x, x') = k_base(f(x), f(x'))
Transforms inputs before applying base kernel.
"""
def __init__(self, kernel: Kernel, iwarping_fn: callable):
"""
Parameters:
- kernel (Kernel): Base kernel
- iwarping_fn (callable): Input warping function
"""Likelihood functions that define the observation model relating GP function values to observed data.
class Likelihood:
"""
Base class for GP likelihood functions.
Defines how GP function values relate to observed data,
including noise models and observation transformations.
"""
def forward(self, function_dist: dist.Distribution, y: torch.Tensor = None) -> dist.Distribution:
"""
Forward pass through likelihood.
Parameters:
- function_dist (Distribution): GP function value distribution
- y (Tensor, optional): Observed data
Returns:
Distribution: Observation distribution
"""
class Gaussian(Likelihood):
"""
Gaussian likelihood for continuous observations with additive noise.
y = f(x) + ε, where ε ~ N(0, noise_variance)
Most common likelihood for regression problems.
"""
def __init__(self, noise: torch.Tensor = None, name: str = "Gaussian"):
"""
Parameters:
- noise (Tensor, optional): Noise variance parameter
- name (str): Likelihood name for parameter scoping
Examples:
>>> # Fixed noise variance
>>> likelihood = Gaussian(noise=0.1)
>>>
>>> # Learnable noise variance
>>> likelihood = Gaussian() # noise will be learned
"""
class Bernoulli(Likelihood):
"""
Bernoulli likelihood for binary classification.
p(y = 1 | f) = σ(f) where σ is sigmoid function
Maps GP function values to class probabilities.
"""
def __init__(self, name: str = "Bernoulli"):
"""
Parameters:
- name (str): Likelihood name for parameter scoping
"""
class Poisson(Likelihood):
"""
Poisson likelihood for count data.
p(y | f) = Poisson(exp(f))
Uses log-link to ensure positive rate parameter.
"""
def __init__(self, name: str = "Poisson"):
"""
Parameters:
- name (str): Likelihood name for parameter scoping
"""
class Beta(Likelihood):
"""
Beta likelihood for data on unit interval.
Useful for modeling proportions, rates, or probabilities.
"""
def __init__(self, name: str = "Beta"):
"""
Parameters:
- name (str): Likelihood name for parameter scoping
"""
class Gamma(Likelihood):
"""
Gamma likelihood for positive continuous data.
Useful for modeling positive quantities like waiting times.
"""
def __init__(self, name: str = "Gamma"):
"""
Parameters:
- name (str): Likelihood name for parameter scoping
"""Complete Gaussian process models combining kernels and likelihoods for different modeling scenarios.
class GPModel:
"""
Base Gaussian process model class.
Combines kernel functions and likelihood models to create
complete GP models for regression and classification.
"""
def __init__(self, X: torch.Tensor, y: torch.Tensor, kernel: Kernel,
likelihood: Likelihood, name: str = "GPModel"):
"""
Parameters:
- X (Tensor): Training inputs of shape (n, input_dim)
- y (Tensor): Training outputs of shape (n,) or (n, output_dim)
- kernel (Kernel): Covariance kernel function
- likelihood (Likelihood): Observation likelihood model
- name (str): Model name for parameter scoping
Examples:
>>> X_train = torch.randn(100, 2)
>>> y_train = torch.randn(100)
>>> kernel = RBF(input_dim=2)
>>> likelihood = Gaussian()
>>> gp = GPModel(X_train, y_train, kernel, likelihood)
"""
def model(self):
"""Define the GP generative model."""
def guide(self):
"""Define variational guide for approximate inference."""
def forward(self, X_new: torch.Tensor, full_cov: bool = False, noiseless: bool = True) -> dist.Distribution:
"""
Make predictions at new input locations.
Parameters:
- X_new (Tensor): New input locations of shape (m, input_dim)
- full_cov (bool): Whether to return full covariance matrix
- noiseless (bool): Whether to exclude observation noise from predictions
Returns:
Distribution: Predictive distribution at new locations
Examples:
>>> X_test = torch.randn(20, 2)
>>> pred_dist = gp.forward(X_test)
>>> pred_mean = pred_dist.mean
>>> pred_var = pred_dist.variance
"""
class VariationalGP(GPModel):
"""
Variational Gaussian process for scalable inference.
Uses sparse GP approximations with inducing points for
efficient inference on large datasets.
"""
def __init__(self, X: torch.Tensor, y: torch.Tensor, kernel: Kernel,
likelihood: Likelihood, X_u: torch.Tensor = None,
num_inducing: int = None, name: str = "VariationalGP"):
"""
Parameters:
- X (Tensor): Training inputs
- y (Tensor): Training outputs
- kernel (Kernel): Covariance kernel
- likelihood (Likelihood): Observation likelihood
- X_u (Tensor, optional): Inducing input locations
- num_inducing (int, optional): Number of inducing points (if X_u not provided)
- name (str): Model name
Examples:
>>> # Large dataset GP with 50 inducing points
>>> X_train = torch.randn(10000, 3)
>>> y_train = torch.randn(10000)
>>> vgp = VariationalGP(X_train, y_train, RBF(3), Gaussian(), num_inducing=50)
"""
class SparseGPRegression(VariationalGP):
"""
Sparse GP regression model using variational inference.
Optimized for regression tasks with Gaussian likelihoods
and large datasets.
"""
pass
class VariationalSparseGP(VariationalGP):
"""
General variational sparse GP with flexible likelihoods.
Supports non-Gaussian likelihoods through variational inference.
"""
passModels for handling multiple outputs and structured output spaces.
class MultiOutputGP(GPModel):
"""
Multi-output Gaussian process for vector-valued functions.
Models correlations between different output dimensions
using appropriate kernel structures.
"""
def __init__(self, X: torch.Tensor, y: torch.Tensor, kernel: Kernel,
likelihood: Likelihood, num_outputs: int, name: str = "MultiOutputGP"):
"""
Parameters:
- X (Tensor): Training inputs
- y (Tensor): Training outputs of shape (n, num_outputs)
- kernel (Kernel): Base kernel (will be extended for multiple outputs)
- likelihood (Likelihood): Output likelihood
- num_outputs (int): Number of output dimensions
- name (str): Model name
"""
class VariationalMultiOutputGP(MultiOutputGP):
"""
Variational multi-output GP for scalable multi-output modeling.
Combines multi-output structure with sparse GP approximations.
"""
passHelper functions and utilities for GP modeling and inference.
def conditional(X_new: torch.Tensor, X_train: torch.Tensor, kernel: Kernel,
f_loc: torch.Tensor, f_scale_tril: torch.Tensor = None,
full_cov: bool = False, whiten: bool = False,
jitter: float = 1e-6) -> dist.Distribution:
"""
Compute conditional GP distribution p(f* | f, X*, X).
Parameters:
- X_new (Tensor): Test input locations
- X_train (Tensor): Training input locations
- kernel (Kernel): Covariance kernel
- f_loc (Tensor): Mean of training function values
- f_scale_tril (Tensor, optional): Cholesky factor of training covariance
- full_cov (bool): Whether to return full covariance
- whiten (bool): Whether to use whitened parameterization
- jitter (float): Jitter for numerical stability
Returns:
Distribution: Conditional GP distribution
"""
def util_gp_prior(X: torch.Tensor, kernel: Kernel, jitter: float = 1e-6) -> dist.Distribution:
"""
Compute GP prior distribution at given input locations.
Parameters:
- X (Tensor): Input locations
- kernel (Kernel): Covariance kernel
- jitter (float): Diagonal jitter for numerical stability
Returns:
Distribution: GP prior distribution
"""
def train_gp(gp_model: GPModel, optimizer, num_steps: int = 1000,
retain_graph: bool = False) -> List[float]:
"""
Train GP model using optimization.
Parameters:
- gp_model (GPModel): GP model to train
- optimizer: PyTorch optimizer
- num_steps (int): Number of optimization steps
- retain_graph (bool): Whether to retain computation graph
Returns:
List[float]: Training loss history
Examples:
>>> optimizer = torch.optim.Adam(gp.parameters(), lr=0.01)
>>> losses = train_gp(gp, optimizer, num_steps=500)
"""import torch
import pyro
import pyro.distributions as dist
from pyro.contrib.gp import GPModel
from pyro.contrib.gp.kernels import RBF
from pyro.contrib.gp.likelihoods import Gaussian
# Generate training data
X_train = torch.linspace(0, 10, 50).unsqueeze(-1)
y_train = torch.sin(X_train.squeeze()) + 0.1 * torch.randn(50)
# Define GP model
kernel = RBF(input_dim=1, lengthscale=1.0, variance=1.0)
likelihood = Gaussian(noise=0.1)
gp = GPModel(X_train, y_train, kernel, likelihood)
# Training
optimizer = torch.optim.Adam(gp.parameters(), lr=0.01)
for i in range(1000):
optimizer.zero_grad()
loss = -gp.model().log_prob(y_train)
loss.backward()
optimizer.step()
# Prediction
X_test = torch.linspace(0, 12, 100).unsqueeze(-1)
with torch.no_grad():
pred_dist = gp.forward(X_test)
pred_mean = pred_dist.mean
pred_std = pred_dist.stddevfrom pyro.contrib.gp import VariationalGP
# Large dataset
X_train = torch.randn(10000, 3)
y_train = torch.sin(X_train.sum(dim=1)) + 0.1 * torch.randn(10000)
# Sparse GP with inducing points
kernel = RBF(input_dim=3)
likelihood = Gaussian()
sparse_gp = VariationalGP(X_train, y_train, kernel, likelihood, num_inducing=100)
# Training with SVI
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam
svi = SVI(sparse_gp.model, sparse_gp.guide, Adam({"lr": 0.01}), Trace_ELBO())
for step in range(2000):
loss = svi.step()
if step % 200 == 0:
print(f"Step {step}, Loss: {loss}")from pyro.contrib.gp import MultiOutputGP
# Multi-dimensional output data
X_train = torch.randn(200, 2)
y_train = torch.randn(200, 3) # 3-dimensional output
# Multi-output GP
kernel = RBF(input_dim=2)
likelihood = Gaussian()
multi_gp = MultiOutputGP(X_train, y_train, kernel, likelihood, num_outputs=3)
# Training and prediction similar to basic GPfrom pyro.contrib.gp.kernels import RBF, Periodic, WhiteNoise, Sum, Product
# Complex kernel combining multiple components
base_kernel = RBF(input_dim=1, lengthscale=2.0, variance=1.0)
periodic_kernel = Periodic(input_dim=1, period=7.0, lengthscale=1.0)
noise_kernel = WhiteNoise(input_dim=1, variance=0.1)
# Combine kernels: RBF + Periodic pattern + independent noise
combined_kernel = Sum(Sum(base_kernel, periodic_kernel), noise_kernel)
# Use in GP model
gp = GPModel(X_train, y_train, combined_kernel, Gaussian())from pyro.contrib.gp.likelihoods import Bernoulli
# Binary classification data
X_train = torch.randn(100, 2)
y_train = torch.randint(0, 2, (100,)).float()
# GP classifier
kernel = RBF(input_dim=2)
likelihood = Bernoulli()
gp_classifier = GPModel(X_train, y_train, kernel, likelihood)
# Training requires variational inference for non-Gaussian likelihood
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam
svi = SVI(gp_classifier.model, gp_classifier.guide, Adam({"lr": 0.01}), Trace_ELBO())
for step in range(1000):
loss = svi.step()Install with Tessl CLI
npx tessl i tessl/pypi-pyro-ppl