tessl/pypi-pyro-ppl

A flexible, scalable deep probabilistic programming library built on PyTorch for universal probabilistic modeling and inference

—

Pending

Overview

Eval results

Files

Gaussian Processes

Name: tessl/pypi-pyro-ppl
Author: tessl

Gaussian process models for non-parametric Bayesian modeling, providing flexible function approximation with uncertainty quantification through kernels, likelihoods, and efficient inference algorithms.

Capabilities

Kernel Functions

Kernel functions that define the covariance structure and prior assumptions about function smoothness and behavior in Gaussian process models.

class Kernel:
    """
    Base class for Gaussian process kernel functions.
    
    Kernels define the covariance structure of Gaussian processes by
    specifying how similar function values should be at different inputs.
    """
    
    def forward(self, X: torch.Tensor, Z: torch.Tensor = None, diag: bool = False) -> torch.Tensor:
        """
        Compute kernel matrix or diagonal.
        
        Parameters:
        - X (Tensor): First set of inputs of shape (n, input_dim)
        - Z (Tensor, optional): Second set of inputs of shape (m, input_dim).
          If None, uses X for both arguments (computes K(X, X))
        - diag (bool): If True, return only diagonal elements as vector
        
        Returns:
        Tensor: Kernel matrix of shape (n, m) or diagonal vector of shape (n,)
        
        Examples:
        >>> kernel = RBF(input_dim=2)
        >>> X = torch.randn(10, 2)
        >>> K = kernel.forward(X)  # Shape: (10, 10)
        >>> diag_K = kernel.forward(X, diag=True)  # Shape: (10,)
        """

class RBF(Kernel):
    """
    Radial Basis Function (RBF) kernel, also known as Gaussian or squared exponential kernel.
    
    k(x, x') = variance * exp(-0.5 * ||x - x'||^2 / lengthscale^2)
    
    Encodes smooth function assumptions with characteristic lengthscale.
    """
    
    def __init__(self, input_dim: int, variance: torch.Tensor = None, 
                 lengthscale: torch.Tensor = None, active_dims: List[int] = None):
        """
        Parameters:
        - input_dim (int): Input dimensionality
        - variance (Tensor, optional): Kernel variance/amplitude parameter
        - lengthscale (Tensor, optional): Characteristic lengthscale parameter(s)
        - active_dims (List[int], optional): Dimensions to apply kernel to
        
        Examples:
        >>> # Isotropic RBF (same lengthscale for all dimensions)
        >>> rbf = RBF(input_dim=3, variance=2.0, lengthscale=1.5)
        >>>
        >>> # Anisotropic RBF (different lengthscale per dimension)  
        >>> rbf = RBF(input_dim=3, lengthscale=torch.tensor([1.0, 2.0, 0.5]))
        >>>
        >>> # Apply to subset of dimensions
        >>> rbf = RBF(input_dim=5, active_dims=[0, 2, 4])
        """

class Matern32(Kernel):
    """
    Matérn kernel with smoothness parameter ν = 3/2.
    
    k(x, x') = variance * (1 + sqrt(3) * r) * exp(-sqrt(3) * r)
    where r = ||x - x'|| / lengthscale
    
    Less smooth than RBF, allowing for more flexible function shapes.
    """
    
    def __init__(self, input_dim: int, variance: torch.Tensor = None,
                 lengthscale: torch.Tensor = None, active_dims: List[int] = None):
        """
        Parameters:
        - input_dim (int): Input dimensionality
        - variance (Tensor, optional): Kernel variance parameter
        - lengthscale (Tensor, optional): Characteristic lengthscale
        - active_dims (List[int], optional): Active input dimensions
        """

class Matern52(Kernel):
    """
    Matérn kernel with smoothness parameter ν = 5/2.
    
    k(x, x') = variance * (1 + sqrt(5) * r + 5/3 * r^2) * exp(-sqrt(5) * r)
    where r = ||x - x'|| / lengthscale
    
    Smoother than Matern32 but less smooth than RBF.
    """
    
    def __init__(self, input_dim: int, variance: torch.Tensor = None,
                 lengthscale: torch.Tensor = None, active_dims: List[int] = None):
        """Parameters same as Matern32."""

class Exponential(Kernel):
    """
    Exponential kernel (Matérn with ν = 1/2).
    
    k(x, x') = variance * exp(-r)
    where r = ||x - x'|| / lengthscale
    
    Generates rough, non-differentiable functions.
    """
    
    def __init__(self, input_dim: int, variance: torch.Tensor = None,
                 lengthscale: torch.Tensor = None, active_dims: List[int] = None):
        """Parameters same as RBF."""

class Linear(Kernel):
    """
    Linear kernel for linear function relationships.
    
    k(x, x') = variance * x^T * x'
    
    Encodes linear function assumptions.
    """
    
    def __init__(self, input_dim: int, variance: torch.Tensor = None,
                 active_dims: List[int] = None):
        """
        Parameters:
        - input_dim (int): Input dimensionality
        - variance (Tensor, optional): Kernel variance parameter
        - active_dims (List[int], optional): Active input dimensions
        """

class Polynomial(Kernel):
    """
    Polynomial kernel for polynomial function relationships.
    
    k(x, x') = (variance * x^T * x' + bias)^degree
    """
    
    def __init__(self, input_dim: int, degree: int = 2, variance: torch.Tensor = None,
                 bias: torch.Tensor = None, active_dims: List[int] = None):
        """
        Parameters:
        - input_dim (int): Input dimensionality
        - degree (int): Polynomial degree
        - variance (Tensor, optional): Kernel variance
        - bias (Tensor, optional): Bias term
        - active_dims (List[int], optional): Active dimensions
        """

class Periodic(Kernel):
    """
    Periodic kernel for periodic function patterns.
    
    k(x, x') = variance * exp(-2 * sin^2(π * ||x - x'|| / period) / lengthscale^2)
    
    Encodes periodic structure with specified period.
    """
    
    def __init__(self, input_dim: int, period: torch.Tensor, variance: torch.Tensor = None,
                 lengthscale: torch.Tensor = None, active_dims: List[int] = None):
        """
        Parameters:
        - input_dim (int): Input dimensionality
        - period (Tensor): Period parameter for periodic structure
        - variance (Tensor, optional): Kernel variance
        - lengthscale (Tensor, optional): Lengthscale within periods
        - active_dims (List[int], optional): Active dimensions
        
        Examples:
        >>> # Daily periodic pattern
        >>> periodic = Periodic(input_dim=1, period=24.0)
        >>>
        >>> # Seasonal pattern  
        >>> periodic = Periodic(input_dim=1, period=365.25)
        """

class WhiteNoise(Kernel):
    """
    White noise kernel for independent noise.
    
    k(x, x') = variance * δ(x, x')
    where δ is Kronecker delta (1 if x == x', 0 otherwise)
    
    Models independent noise at each point.
    """
    
    def __init__(self, input_dim: int, variance: torch.Tensor = None,
                 active_dims: List[int] = None):
        """
        Parameters:
        - input_dim (int): Input dimensionality
        - variance (Tensor, optional): Noise variance
        - active_dims (List[int], optional): Active dimensions
        """

class Constant(Kernel):
    """
    Constant kernel that returns constant covariance.
    
    k(x, x') = variance
    
    Models constant function assumptions.
    """
    
    def __init__(self, input_dim: int, variance: torch.Tensor = None,
                 active_dims: List[int] = None):
        """
        Parameters:
        - input_dim (int): Input dimensionality  
        - variance (Tensor, optional): Constant variance value
        - active_dims (List[int], optional): Active dimensions
        """

Kernel Operations

Operations for combining and modifying kernels to create complex covariance structures.

class Sum(Kernel):
    """
    Sum of multiple kernels: k(x, x') = k1(x, x') + k2(x, x') + ...
    
    Combines different kernel behaviors additively.
    """
    
    def __init__(self, kern1: Kernel, kern2: Kernel):
        """
        Parameters:
        - kern1 (Kernel): First kernel
        - kern2 (Kernel): Second kernel
        
        Examples:
        >>> # Combine RBF and periodic components
        >>> rbf = RBF(input_dim=1, lengthscale=1.0)
        >>> periodic = Periodic(input_dim=1, period=12.0)
        >>> combined = Sum(rbf, periodic)
        """

class Product(Kernel):
    """
    Product of multiple kernels: k(x, x') = k1(x, x') * k2(x, x') * ...
    
    Combines kernel behaviors multiplicatively.
    """
    
    def __init__(self, kern1: Kernel, kern2: Kernel):
        """
        Parameters:
        - kern1 (Kernel): First kernel
        - kern2 (Kernel): Second kernel
        
        Examples:
        >>> # Modulate RBF with periodic structure
        >>> rbf = RBF(input_dim=1, lengthscale=2.0)
        >>> periodic = Periodic(input_dim=1, period=7.0)
        >>> modulated = Product(rbf, periodic)
        """

class Exponent(Kernel):
    """
    Exponentiated kernel: k(x, x') = k_base(x, x')^exponent
    
    Raises kernel values to a power.
    """
    
    def __init__(self, kernel: Kernel, exponent: float):
        """
        Parameters:
        - kernel (Kernel): Base kernel
        - exponent (float): Exponent value
        """

class VerticalScaling(Kernel):
    """
    Vertically scale kernel: k(x, x') = scale * k_base(x, x')
    
    Multiplies kernel by a scaling factor.
    """
    
    def __init__(self, kernel: Kernel, scale: torch.Tensor):
        """
        Parameters:
        - kernel (Kernel): Base kernel to scale
        - scale (Tensor): Scaling factor
        """

class Warping(Kernel):
    """
    Apply input warping to kernel: k(x, x') = k_base(f(x), f(x'))
    
    Transforms inputs before applying base kernel.
    """
    
    def __init__(self, kernel: Kernel, iwarping_fn: callable):
        """
        Parameters:
        - kernel (Kernel): Base kernel
        - iwarping_fn (callable): Input warping function
        """

Likelihood Functions

Likelihood functions that define the observation model relating GP function values to observed data.

class Likelihood:
    """
    Base class for GP likelihood functions.
    
    Defines how GP function values relate to observed data,
    including noise models and observation transformations.
    """
    
    def forward(self, function_dist: dist.Distribution, y: torch.Tensor = None) -> dist.Distribution:
        """
        Forward pass through likelihood.
        
        Parameters:
        - function_dist (Distribution): GP function value distribution
        - y (Tensor, optional): Observed data
        
        Returns:
        Distribution: Observation distribution
        """

class Gaussian(Likelihood):
    """
    Gaussian likelihood for continuous observations with additive noise.
    
    y = f(x) + ε, where ε ~ N(0, noise_variance)
    
    Most common likelihood for regression problems.
    """
    
    def __init__(self, noise: torch.Tensor = None, name: str = "Gaussian"):
        """
        Parameters:
        - noise (Tensor, optional): Noise variance parameter
        - name (str): Likelihood name for parameter scoping
        
        Examples:
        >>> # Fixed noise variance
        >>> likelihood = Gaussian(noise=0.1)
        >>>
        >>> # Learnable noise variance
        >>> likelihood = Gaussian()  # noise will be learned
        """

class Bernoulli(Likelihood):
    """
    Bernoulli likelihood for binary classification.
    
    p(y = 1 | f) = σ(f) where σ is sigmoid function
    
    Maps GP function values to class probabilities.
    """
    
    def __init__(self, name: str = "Bernoulli"):
        """
        Parameters:
        - name (str): Likelihood name for parameter scoping
        """

class Poisson(Likelihood):
    """
    Poisson likelihood for count data.
    
    p(y | f) = Poisson(exp(f))
    
    Uses log-link to ensure positive rate parameter.
    """
    
    def __init__(self, name: str = "Poisson"):
        """
        Parameters:
        - name (str): Likelihood name for parameter scoping
        """

class Beta(Likelihood):
    """
    Beta likelihood for data on unit interval.
    
    Useful for modeling proportions, rates, or probabilities.
    """
    
    def __init__(self, name: str = "Beta"):
        """
        Parameters:
        - name (str): Likelihood name for parameter scoping
        """

class Gamma(Likelihood):
    """
    Gamma likelihood for positive continuous data.
    
    Useful for modeling positive quantities like waiting times.
    """
    
    def __init__(self, name: str = "Gamma"):
        """
        Parameters:
        - name (str): Likelihood name for parameter scoping
        """

GP Models

Complete Gaussian process models combining kernels and likelihoods for different modeling scenarios.

class GPModel:
    """
    Base Gaussian process model class.
    
    Combines kernel functions and likelihood models to create
    complete GP models for regression and classification.
    """
    
    def __init__(self, X: torch.Tensor, y: torch.Tensor, kernel: Kernel, 
                 likelihood: Likelihood, name: str = "GPModel"):
        """
        Parameters:
        - X (Tensor): Training inputs of shape (n, input_dim)
        - y (Tensor): Training outputs of shape (n,) or (n, output_dim)
        - kernel (Kernel): Covariance kernel function
        - likelihood (Likelihood): Observation likelihood model
        - name (str): Model name for parameter scoping
        
        Examples:
        >>> X_train = torch.randn(100, 2)
        >>> y_train = torch.randn(100)
        >>> kernel = RBF(input_dim=2)
        >>> likelihood = Gaussian()
        >>> gp = GPModel(X_train, y_train, kernel, likelihood)
        """
    
    def model(self):
        """Define the GP generative model."""
    
    def guide(self):
        """Define variational guide for approximate inference."""
    
    def forward(self, X_new: torch.Tensor, full_cov: bool = False, noiseless: bool = True) -> dist.Distribution:
        """
        Make predictions at new input locations.
        
        Parameters:
        - X_new (Tensor): New input locations of shape (m, input_dim)
        - full_cov (bool): Whether to return full covariance matrix
        - noiseless (bool): Whether to exclude observation noise from predictions
        
        Returns:
        Distribution: Predictive distribution at new locations
        
        Examples:
        >>> X_test = torch.randn(20, 2)
        >>> pred_dist = gp.forward(X_test)
        >>> pred_mean = pred_dist.mean
        >>> pred_var = pred_dist.variance
        """

class VariationalGP(GPModel):
    """
    Variational Gaussian process for scalable inference.
    
    Uses sparse GP approximations with inducing points for
    efficient inference on large datasets.
    """
    
    def __init__(self, X: torch.Tensor, y: torch.Tensor, kernel: Kernel,
                 likelihood: Likelihood, X_u: torch.Tensor = None, 
                 num_inducing: int = None, name: str = "VariationalGP"):
        """
        Parameters:
        - X (Tensor): Training inputs
        - y (Tensor): Training outputs  
        - kernel (Kernel): Covariance kernel
        - likelihood (Likelihood): Observation likelihood
        - X_u (Tensor, optional): Inducing input locations
        - num_inducing (int, optional): Number of inducing points (if X_u not provided)
        - name (str): Model name
        
        Examples:
        >>> # Large dataset GP with 50 inducing points
        >>> X_train = torch.randn(10000, 3)
        >>> y_train = torch.randn(10000)
        >>> vgp = VariationalGP(X_train, y_train, RBF(3), Gaussian(), num_inducing=50)
        """

class SparseGPRegression(VariationalGP):
    """
    Sparse GP regression model using variational inference.
    
    Optimized for regression tasks with Gaussian likelihoods
    and large datasets.
    """
    pass

class VariationalSparseGP(VariationalGP):
    """
    General variational sparse GP with flexible likelihoods.
    
    Supports non-Gaussian likelihoods through variational inference.
    """
    pass

Multi-Output GPs

Models for handling multiple outputs and structured output spaces.

class MultiOutputGP(GPModel):
    """
    Multi-output Gaussian process for vector-valued functions.
    
    Models correlations between different output dimensions
    using appropriate kernel structures.
    """
    
    def __init__(self, X: torch.Tensor, y: torch.Tensor, kernel: Kernel,
                 likelihood: Likelihood, num_outputs: int, name: str = "MultiOutputGP"):
        """
        Parameters:
        - X (Tensor): Training inputs
        - y (Tensor): Training outputs of shape (n, num_outputs)
        - kernel (Kernel): Base kernel (will be extended for multiple outputs)
        - likelihood (Likelihood): Output likelihood
        - num_outputs (int): Number of output dimensions  
        - name (str): Model name
        """

class VariationalMultiOutputGP(MultiOutputGP):
    """
    Variational multi-output GP for scalable multi-output modeling.
    
    Combines multi-output structure with sparse GP approximations.
    """
    pass

Utility Functions

Helper functions and utilities for GP modeling and inference.

def conditional(X_new: torch.Tensor, X_train: torch.Tensor, kernel: Kernel,
                f_loc: torch.Tensor, f_scale_tril: torch.Tensor = None,
                full_cov: bool = False, whiten: bool = False, 
                jitter: float = 1e-6) -> dist.Distribution:
    """
    Compute conditional GP distribution p(f* | f, X*, X).
    
    Parameters:
    - X_new (Tensor): Test input locations
    - X_train (Tensor): Training input locations
    - kernel (Kernel): Covariance kernel
    - f_loc (Tensor): Mean of training function values
    - f_scale_tril (Tensor, optional): Cholesky factor of training covariance
    - full_cov (bool): Whether to return full covariance
    - whiten (bool): Whether to use whitened parameterization
    - jitter (float): Jitter for numerical stability
    
    Returns:
    Distribution: Conditional GP distribution
    """

def util_gp_prior(X: torch.Tensor, kernel: Kernel, jitter: float = 1e-6) -> dist.Distribution:
    """
    Compute GP prior distribution at given input locations.
    
    Parameters:
    - X (Tensor): Input locations
    - kernel (Kernel): Covariance kernel
    - jitter (float): Diagonal jitter for numerical stability
    
    Returns:
    Distribution: GP prior distribution
    """

def train_gp(gp_model: GPModel, optimizer, num_steps: int = 1000,
             retain_graph: bool = False) -> List[float]:
    """
    Train GP model using optimization.
    
    Parameters:
    - gp_model (GPModel): GP model to train
    - optimizer: PyTorch optimizer
    - num_steps (int): Number of optimization steps
    - retain_graph (bool): Whether to retain computation graph
    
    Returns:
    List[float]: Training loss history
    
    Examples:
    >>> optimizer = torch.optim.Adam(gp.parameters(), lr=0.01)
    >>> losses = train_gp(gp, optimizer, num_steps=500)
    """

Examples

Basic GP Regression

import torch
import pyro
import pyro.distributions as dist
from pyro.contrib.gp import GPModel
from pyro.contrib.gp.kernels import RBF
from pyro.contrib.gp.likelihoods import Gaussian

# Generate training data
X_train = torch.linspace(0, 10, 50).unsqueeze(-1)
y_train = torch.sin(X_train.squeeze()) + 0.1 * torch.randn(50)

# Define GP model
kernel = RBF(input_dim=1, lengthscale=1.0, variance=1.0)
likelihood = Gaussian(noise=0.1)
gp = GPModel(X_train, y_train, kernel, likelihood)

# Training
optimizer = torch.optim.Adam(gp.parameters(), lr=0.01)
for i in range(1000):
    optimizer.zero_grad()
    loss = -gp.model().log_prob(y_train)
    loss.backward()
    optimizer.step()

# Prediction
X_test = torch.linspace(0, 12, 100).unsqueeze(-1)
with torch.no_grad():
    pred_dist = gp.forward(X_test)
    pred_mean = pred_dist.mean
    pred_std = pred_dist.stddev

Sparse GP for Large Datasets

from pyro.contrib.gp import VariationalGP

# Large dataset
X_train = torch.randn(10000, 3)
y_train = torch.sin(X_train.sum(dim=1)) + 0.1 * torch.randn(10000)

# Sparse GP with inducing points
kernel = RBF(input_dim=3)
likelihood = Gaussian()
sparse_gp = VariationalGP(X_train, y_train, kernel, likelihood, num_inducing=100)

# Training with SVI
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam

svi = SVI(sparse_gp.model, sparse_gp.guide, Adam({"lr": 0.01}), Trace_ELBO())

for step in range(2000):
    loss = svi.step()
    if step % 200 == 0:
        print(f"Step {step}, Loss: {loss}")

Multi-Output GP

from pyro.contrib.gp import MultiOutputGP

# Multi-dimensional output data
X_train = torch.randn(200, 2)
y_train = torch.randn(200, 3)  # 3-dimensional output

# Multi-output GP
kernel = RBF(input_dim=2)
likelihood = Gaussian()
multi_gp = MultiOutputGP(X_train, y_train, kernel, likelihood, num_outputs=3)

# Training and prediction similar to basic GP

Custom Kernel Combination

from pyro.contrib.gp.kernels import RBF, Periodic, WhiteNoise, Sum, Product

# Complex kernel combining multiple components
base_kernel = RBF(input_dim=1, lengthscale=2.0, variance=1.0)
periodic_kernel = Periodic(input_dim=1, period=7.0, lengthscale=1.0)
noise_kernel = WhiteNoise(input_dim=1, variance=0.1)

# Combine kernels: RBF + Periodic pattern + independent noise
combined_kernel = Sum(Sum(base_kernel, periodic_kernel), noise_kernel)

# Use in GP model
gp = GPModel(X_train, y_train, combined_kernel, Gaussian())

GP Classification

from pyro.contrib.gp.likelihoods import Bernoulli

# Binary classification data
X_train = torch.randn(100, 2)
y_train = torch.randint(0, 2, (100,)).float()

# GP classifier
kernel = RBF(input_dim=2)
likelihood = Bernoulli()
gp_classifier = GPModel(X_train, y_train, kernel, likelihood)

# Training requires variational inference for non-Gaussian likelihood
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam

svi = SVI(gp_classifier.model, gp_classifier.guide, Adam({"lr": 0.01}), Trace_ELBO())

for step in range(1000):
    loss = svi.step()

Install with Tessl CLI