CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pymc3

Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with Theano

68

0.94x
Overview
Eval results
Files

gaussian-processes.mddocs/

Gaussian Processes

PyMC3's Gaussian process module provides flexible non-parametric modeling capabilities for regression, classification, and other machine learning tasks. The implementation supports various covariance functions, mean functions, and inference methods including exact, sparse, and Kronecker-structured GPs.

Capabilities

GP Classes

Core Gaussian process implementations with different inference methods and computational strategies.

class Marginal:
    """
    Marginal Gaussian Process for regression with exact inference.
    
    Integrates out the GP function values analytically, providing exact
    posterior inference suitable for moderate-sized datasets (< 5000 points).
    """
    
    def __init__(self, cov_func, mean_func=None):
        """
        Initialize marginal GP.
        
        Parameters:
        - cov_func: covariance function defining GP prior
        - mean_func: mean function (zero mean if None)
        
        Returns:
        - Marginal: marginal GP object
        """
    
    def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs):
        """
        Add marginal likelihood term to model.
        
        Parameters:
        - name: str, variable name
        - X: array, input locations (n_samples, n_features)
        - y: array, observed outputs (n_samples,)
        - noise: float or array, observation noise variance
        - is_observed: bool, whether y is observed data
        
        Returns:
        - TensorVariable: marginal likelihood contribution
        """
    
    def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs):
        """
        Posterior predictive distribution at new locations.
        
        Parameters:
        - name: str, variable name for predictions
        - Xnew: array, new input locations
        - pred_noise: bool, include predictive noise
        - given: dict, conditioning data (uses marginal_likelihood if None)
        
        Returns:
        - TensorVariable: predictive distribution
        """
    
    def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None):
        """
        Posterior predictive mean and covariance.
        
        Parameters:
        - Xnew: array, prediction inputs
        - point: dict, parameter values for prediction
        - diag: bool, return only diagonal of covariance
        - pred_noise: bool, include predictive noise
        - given: dict, conditioning data
        
        Returns:
        - tuple: (mean, variance) predictions
        """

class Latent:
    """
    Latent Gaussian Process for non-Gaussian likelihoods.
    
    Treats GP function values as latent variables for use with
    non-Gaussian observation models like Poisson, binomial, etc.
    """
    
    def __init__(self, cov_func, mean_func=None):
        """
        Initialize latent GP.
        
        Parameters:
        - cov_func: covariance function
        - mean_func: mean function (zero if None)
        """
    
    def prior(self, name, X, reparameterize=True, **kwargs):
        """
        GP prior distribution over function values.
        
        Parameters:
        - name: str, variable name
        - X: array, input locations
        - reparameterize: bool, use non-centered parameterization
        
        Returns:
        - TensorVariable: GP function values
        """
    
    def conditional(self, name, Xnew, given=None, **kwargs):
        """
        Conditional distribution at new inputs.
        
        Parameters:
        - name: str, variable name
        - Xnew: array, new input locations  
        - given: dict, conditioning GP values
        
        Returns:
        - TensorVariable: conditional GP values
        """

class MarginalSparse:
    """
    Sparse Gaussian Process using inducing points for scalability.
    
    Approximates full GP using a smaller set of inducing points,
    enabling GP inference for large datasets (> 10,000 points).
    """
    
    def __init__(self, cov_func, approx='FITC', mean_func=None):
        """
        Initialize sparse GP.
        
        Parameters:
        - cov_func: covariance function
        - approx: str, sparse approximation method ('FITC', 'VFE', 'DTC')
        - mean_func: mean function
        """
    
    def marginal_likelihood(self, name, X, Xu, y, noise, is_observed=True, **kwargs):
        """
        Sparse marginal likelihood.
        
        Parameters:
        - name: str, variable name
        - X: array, training inputs
        - Xu: array, inducing point locations
        - y: array, training outputs
        - noise: float, observation noise
        - is_observed: bool, whether y is observed
        
        Returns:
        - TensorVariable: sparse marginal likelihood
        """

class MarginalKron:
    """
    Kronecker-structured GP for multi-dimensional inputs.
    
    Exploits Kronecker structure in covariance for efficient
    computation with grid-structured or separable inputs.
    """
    
    def __init__(self, cov_funcs, mean_func=None):
        """
        Initialize Kronecker GP.
        
        Parameters:
        - cov_funcs: list, covariance functions for each dimension
        - mean_func: mean function
        """

class LatentKron:
    """
    Latent Kronecker GP for structured multi-dimensional problems.
    
    Combines Kronecker structure with latent variable formulation
    for non-Gaussian likelihoods on grid data.
    """

class TP:
    """
    Student's T Process for robust non-parametric regression.
    
    Heavy-tailed extension of GPs using Student's t distributions
    for robustness to outliers and model misspecification.
    """
    
    def __init__(self, cov_func, mean_func=None):
        """
        Initialize T process.
        
        Parameters:
        - cov_func: covariance function
        - mean_func: mean function
        """

Covariance Functions

Covariance functions defining GP priors through the pm.gp.cov module.

# Available as pm.gp.cov.ExpQuad, pm.gp.cov.Matern52, etc.

class ExpQuad:
    """
    Exponentiated quadratic (RBF/squared exponential) covariance.
    
    Smooth, infinitely differentiable covariance function suitable
    for modeling smooth processes.
    """
    
    def __init__(self, input_dim, ls=None, ls_inv=None, active_dims=None):
        """
        Parameters:
        - input_dim: int, number of input dimensions
        - ls: float or array, length scale parameters
        - ls_inv: float or array, inverse length scales (alternative to ls)
        - active_dims: list, active input dimensions
        """

class Matern52:
    """
    Matérn covariance function with ν = 5/2.
    
    Twice differentiable covariance providing balance between
    smoothness and flexibility.
    """

class Matern32:
    """
    Matérn covariance function with ν = 3/2.
    
    Once differentiable covariance for moderately rough functions.
    """

class RatQuad:
    """
    Rational quadratic covariance function.
    
    Infinite mixture of RBF kernels with different length scales,
    providing scale-invariant modeling capability.
    """
    
    def __init__(self, input_dim, alpha=None, ls=None, active_dims=None):
        """
        Parameters:
        - input_dim: int, input dimension
        - alpha: float, shape parameter controlling tail behavior
        - ls: float or array, length scale parameters
        - active_dims: list, active dimensions
        """

class Linear:
    """
    Linear covariance function.
    
    Models linear relationships and polynomial trends
    when combined with other covariance functions.
    """
    
    def __init__(self, input_dim, c=None, active_dims=None):
        """
        Parameters:
        - input_dim: int, input dimension
        - c: float or array, offset parameters
        - active_dims: list, active dimensions
        """

class Polynomial:
    """
    Polynomial covariance function.
    
    Models polynomial relationships of specified degree.
    """
    
    def __init__(self, input_dim, c=None, d=2, offset=None, active_dims=None):
        """
        Parameters:
        - input_dim: int, input dimension
        - c: float, scaling parameter
        - d: int, polynomial degree
        - offset: float, offset parameter
        - active_dims: list, active dimensions
        """

class Cosine:
    """
    Cosine covariance function for periodic processes.
    
    Models exactly periodic functions with known period.
    """
    
    def __init__(self, input_dim, ls=None, active_dims=None):
        """
        Parameters:
        - input_dim: int, input dimension
        - ls: float or array, period parameters
        - active_dims: list, active dimensions
        """

class Periodic:
    """
    Periodic covariance function.
    
    Combines periodicity with additional smoothness control
    for quasi-periodic and approximately periodic functions.
    """
    
    def __init__(self, input_dim, period=None, ls=None, active_dims=None):
        """
        Parameters:
        - input_dim: int, input dimension
        - period: float or array, period parameters
        - ls: float or array, length scale parameters
        - active_dims: list, active dimensions
        """

class WarpedInput:
    """
    Warped input covariance function.
    
    Applies input transformation before computing covariance,
    enabling non-stationary modeling.
    """
    
    def __init__(self, cov_func, warp_func, args):
        """
        Parameters:
        - cov_func: base covariance function
        - warp_func: input warping function
        - args: arguments for warping function
        """

class Gibbs:
    """
    Gibbs non-stationary covariance function.
    
    Length scale varies as function of input location,
    enabling locally adaptive smoothness.
    """
    
    def __init__(self, input_dim, lengthscale_func, args, active_dims=None):
        """
        Parameters:
        - input_dim: int, input dimension
        - lengthscale_func: function defining spatially varying length scale
        - args: arguments for length scale function
        - active_dims: list, active dimensions
        """

# Covariance function operators
class Add:
    """
    Additive covariance (sum of covariance functions).
    
    Combines multiple covariance functions additively
    for modeling multiple patterns simultaneously.
    """

class Prod:
    """
    Multiplicative covariance (product of covariance functions).
    
    Models interactions between different input dimensions
    or combines different types of structure.
    """

class Kron:
    """
    Kronecker product covariance for separable multi-dimensional inputs.
    
    Assumes independence between input dimensions while
    maintaining within-dimension correlations.
    """

Mean Functions

Mean functions for GP priors through the pm.gp.mean module.

# Available as pm.gp.mean.Zero, pm.gp.mean.Constant, etc.

class Zero:
    """
    Zero mean function (default).
    
    Assumes zero prior mean for GP, suitable when
    data is centered or mean is captured by other model components.
    """

class Constant:
    """
    Constant mean function.
    
    Non-zero constant prior mean for GP.
    """
    
    def __init__(self, c=None):
        """
        Parameters:
        - c: float, constant mean value
        """

class Linear:
    """
    Linear mean function.
    
    Linear trend in GP mean function.
    """
    
    def __init__(self, coeffs=None, intercept=None):
        """
        Parameters:
        - coeffs: array, linear coefficients
        - intercept: float, intercept term
        """

GP Utilities

Utility functions for GP modeling and visualization.

def plot_gp_dist(ax, samples=[], plot_samples=True, palette='Reds', 
                 fill_alpha=0.8, samples_alpha=0.7, fill_kwargs=None, 
                 samples_kwargs=None):
    """
    Plot GP distribution with uncertainty bands.
    
    Visualizes GP posterior with credible intervals and
    optional sample trajectories from the posterior.
    
    Parameters:
    - ax: matplotlib axis for plotting
    - samples: array, GP sample trajectories  
    - plot_samples: bool, whether to plot individual samples
    - palette: str, color palette for visualization
    - fill_alpha: float, transparency for uncertainty bands
    - samples_alpha: float, transparency for sample lines
    - fill_kwargs: dict, arguments for uncertainty band plotting
    - samples_kwargs: dict, arguments for sample line plotting
    
    Returns:
    - matplotlib artists: plot elements
    """

# GP utilities available in pm.gp.util module
def conditional_cov(Kxx, Kxz, Kzz_inv):
    """
    Compute conditional covariance for GP prediction.
    
    Parameters:
    - Kxx: array, covariance between prediction points
    - Kxz: array, cross-covariance between prediction and conditioning points
    - Kzz_inv: array, inverse covariance of conditioning points
    
    Returns:
    - array: conditional covariance matrix
    """

def conditional_mean(Kxz, Kzz_inv, y):
    """
    Compute conditional mean for GP prediction.
    
    Parameters:
    - Kxz: array, cross-covariance matrix
    - Kzz_inv: array, inverse conditioning covariance
    - y: array, conditioning outputs
    
    Returns:
    - array: conditional mean
    """

def stabilize(K, jitter=1e-6):
    """
    Numerically stabilize covariance matrix.
    
    Parameters:
    - K: array, covariance matrix
    - jitter: float, diagonal noise for numerical stability
    
    Returns:
    - array: stabilized covariance matrix
    """

Usage Examples

Basic GP Regression

import pymc3 as pm
import numpy as np
import matplotlib.pyplot as plt

# Generate synthetic data
np.random.seed(42)
n = 50
X = np.linspace(0, 10, n)[:, None]
true_func = lambda x: np.sin(x) + 0.1 * x**2
y = true_func(X.ravel()) + np.random.normal(0, 0.1, n)

# GP regression model
with pm.Model() as gp_model:
    # Covariance function parameters
    ls = pm.Gamma('ls', alpha=2, beta=1)  # length scale
    sigma_f = pm.HalfNormal('sigma_f', sigma=1)  # signal variance
    
    # Covariance function
    cov = sigma_f**2 * pm.gp.cov.ExpQuad(1, ls)
    
    # GP prior
    gp = pm.gp.Marginal(cov_func=cov)
    
    # Observation noise
    sigma_y = pm.HalfNormal('sigma_y', sigma=0.5)
    
    # Marginal likelihood
    y_obs = gp.marginal_likelihood('y_obs', X=X, y=y, noise=sigma_y)
    
    # Inference
    trace = pm.sample(1000, tune=1000, chains=2)

# Prediction
X_new = np.linspace(-1, 11, 100)[:, None]
with gp_model:
    f_pred = gp.conditional('f_pred', X_new)
    pred_samples = pm.sample_posterior_predictive(trace, vars=[f_pred], samples=100)

GP Classification

# GP for binary classification
n_train = 100
X_train = np.random.randn(n_train, 2)
true_boundary = lambda x: x[:, 0] + 0.5 * x[:, 1]**2
y_train = (true_boundary(X_train) > 0).astype(int)

with pm.Model() as gp_classification:
    # GP parameters
    ls = pm.Gamma('ls', alpha=2, beta=1, shape=2)
    
    # Covariance function  
    cov = pm.gp.cov.ExpQuad(2, ls)
    
    # Latent GP
    gp = pm.gp.Latent(cov_func=cov)
    
    # GP prior over latent function
    f = gp.prior('f', X=X_train)
    
    # Probit likelihood
    y_obs = pm.Bernoulli('y_obs', p=pm.math.sigmoid(f), observed=y_train)
    
    # Sampling
    trace = pm.sample(1000, tune=1000)
    
# Prediction for classification
X_test = np.random.randn(50, 2)
with gp_classification:
    f_test = gp.conditional('f_test', X_test)
    test_samples = pm.sample_posterior_predictive(trace, vars=[f_test])
    
# Classification probabilities
prob_samples = pm.math.sigmoid(test_samples['f_test'])
mean_probs = prob_samples.mean(axis=0)

Sparse GP for Large Datasets

# Large dataset with sparse GP
n_large = 5000
X_large = np.random.uniform(0, 20, (n_large, 1))
y_large = np.sin(X_large.ravel()) + 0.2 * np.random.randn(n_large)

# Inducing points
n_inducing = 50
Xu = np.linspace(0, 20, n_inducing)[:, None]

with pm.Model() as sparse_gp:
    # GP parameters
    ls = pm.Gamma('ls', alpha=2, beta=1)
    sigma_f = pm.HalfNormal('sigma_f', sigma=1)
    
    # Covariance function
    cov = sigma_f**2 * pm.gp.cov.Matern52(1, ls)
    
    # Sparse GP
    gp = pm.gp.MarginalSparse(cov_func=cov, approx='FITC')
    
    # Noise parameter
    sigma_y = pm.HalfNormal('sigma_y', sigma=1)
    
    # Sparse marginal likelihood
    y_obs = gp.marginal_likelihood('y_obs', 
                                  X=X_large, 
                                  Xu=Xu, 
                                  y=y_large, 
                                  noise=sigma_y)
    
    # Efficient sampling
    trace = pm.sample(500, tune=500)

Multi-Output GP

# Multi-output GP regression
n_outputs = 3
X_multi = np.linspace(0, 5, 30)[:, None]

# Correlated outputs
true_funcs = [
    lambda x: np.sin(2*x),
    lambda x: np.cos(2*x),  
    lambda x: np.sin(2*x) + np.cos(2*x)
]

Y_multi = np.column_stack([f(X_multi.ravel()) for f in true_funcs])
Y_multi += 0.1 * np.random.randn(*Y_multi.shape)

with pm.Model() as multi_gp:
    # Shared covariance parameters
    ls = pm.Gamma('ls', alpha=2, beta=1)
    
    # Output-specific parameters
    sigma_f = pm.HalfNormal('sigma_f', sigma=1, shape=n_outputs)
    sigma_y = pm.HalfNormal('sigma_y', sigma=1, shape=n_outputs)
    
    # Independent GPs for each output
    gps = []
    for i in range(n_outputs):
        cov_i = sigma_f[i]**2 * pm.gp.cov.ExpQuad(1, ls)
        gp_i = pm.gp.Marginal(cov_func=cov_i)
        
        y_obs_i = gp_i.marginal_likelihood(f'y_obs_{i}', 
                                          X=X_multi, 
                                          y=Y_multi[:, i], 
                                          noise=sigma_y[i])
        gps.append(gp_i)
    
    trace = pm.sample(1000, tune=1000)

Periodic GP

# Periodic GP for seasonal data
t = np.linspace(0, 4*np.pi, 100)
y_seasonal = np.sin(t) + 0.5*np.cos(2*t) + 0.1*t + 0.1*np.random.randn(len(t))

with pm.Model() as periodic_gp:
    # Periodic covariance parameters
    period = pm.Gamma('period', alpha=10, beta=1.5)  # Expected period ~ 2π
    ls_periodic = pm.Gamma('ls_periodic', alpha=2, beta=1)
    
    # Trend parameters  
    ls_trend = pm.Gamma('ls_trend', alpha=2, beta=0.5)
    
    # Signal variances
    sigma_periodic = pm.HalfNormal('sigma_periodic', sigma=1)
    sigma_trend = pm.HalfNormal('sigma_trend', sigma=1)
    
    # Combined covariance: periodic + trend
    cov_periodic = sigma_periodic**2 * pm.gp.cov.Periodic(1, period=period, ls=ls_periodic)
    cov_trend = sigma_trend**2 * pm.gp.cov.ExpQuad(1, ls_trend)
    cov_combined = cov_periodic + cov_trend
    
    # GP model
    gp = pm.gp.Marginal(cov_func=cov_combined)
    
    # Observation noise
    sigma_y = pm.HalfNormal('sigma_y', sigma=0.2)
    
    # Likelihood
    X_time = t[:, None]
    y_obs = gp.marginal_likelihood('y_obs', X=X_time, y=y_seasonal, noise=sigma_y)
    
    trace = pm.sample(1000, tune=1000)

# Extrapolate to future
t_future = np.linspace(0, 6*np.pi, 150)[:, None]
with periodic_gp:
    f_future = gp.conditional('f_future', t_future)
    future_pred = pm.sample_posterior_predictive(trace, vars=[f_future])

Non-Stationary GP

# Non-stationary GP using input warping
X_nonstat = np.linspace(0, 10, 60)[:, None]
# Function with varying smoothness
y_nonstat = np.where(X_nonstat.ravel() < 5, 
                    np.sin(5*X_nonstat.ravel()),  # High frequency
                    np.sin(X_nonstat.ravel()))    # Low frequency  
y_nonstat += 0.1 * np.random.randn(len(y_nonstat))

with pm.Model() as nonstat_gp:
    # Warping function parameters
    alpha = pm.Normal('alpha', mu=0, sigma=1)
    beta = pm.HalfNormal('beta', sigma=1)
    
    # Input warping: stretch inputs differently in different regions
    def warp_func(x, alpha, beta):
        return x + alpha * pm.math.tanh(beta * (x - 5))
    
    # Base covariance function
    ls = pm.Gamma('ls', alpha=2, beta=1)
    sigma_f = pm.HalfNormal('sigma_f', sigma=1)
    base_cov = sigma_f**2 * pm.gp.cov.ExpQuad(1, ls)
    
    # Warped covariance
    cov_warped = pm.gp.cov.WarpedInput(base_cov, warp_func, (alpha, beta))
    
    # GP with warped inputs
    gp = pm.gp.Marginal(cov_func=cov_warped)
    
    # Observation noise
    sigma_y = pm.HalfNormal('sigma_y', sigma=0.2)
    
    # Likelihood
    y_obs = gp.marginal_likelihood('y_obs', X=X_nonstat, y=y_nonstat, noise=sigma_y)
    
    trace = pm.sample(1000, tune=1000)

Hierarchical GP

# Hierarchical GP with group-specific parameters
n_groups = 4
n_per_group = 25

# Generate group data with different characteristics
group_data = []
group_labels = []
for g in range(n_groups):
    X_g = np.random.uniform(0, 10, n_per_group)[:, None]
    # Different length scales per group
    ls_true = 1 + g * 0.5
    y_g = np.sin(X_g.ravel() / ls_true) + 0.1 * np.random.randn(n_per_group)
    
    group_data.append((X_g, y_g))
    group_labels.extend([g] * n_per_group)

# Combine data
X_all = np.vstack([X for X, y in group_data])  
y_all = np.hstack([y for X, y in group_data])
group_idx = np.array(group_labels)

with pm.Model() as hierarchical_gp:
    # Hierarchical priors for length scales
    mu_ls = pm.Gamma('mu_ls', alpha=2, beta=1)
    sigma_ls = pm.HalfNormal('sigma_ls', sigma=1)
    ls_group = pm.Gamma('ls_group', alpha=2, beta=1/sigma_ls, shape=n_groups)
    
    # Shared signal variance
    sigma_f = pm.HalfNormal('sigma_f', sigma=1)
    
    # Group-specific GPs
    gps = []
    for g in range(n_groups):
        # Group mask
        mask = group_idx == g
        X_g = X_all[mask]
        y_g = y_all[mask]
        
        # Group covariance function
        cov_g = sigma_f**2 * pm.gp.cov.ExpQuad(1, ls_group[g])
        gp_g = pm.gp.Marginal(cov_func=cov_g)
        
        # Group likelihood
        sigma_y = pm.HalfNormal(f'sigma_y_{g}', sigma=0.5)
        y_obs_g = gp_g.marginal_likelihood(f'y_obs_{g}', 
                                          X=X_g, 
                                          y=y_g, 
                                          noise=sigma_y)
        gps.append(gp_g)
    
    trace = pm.sample(1000, tune=1000)
    
# Analyze group differences
ls_posterior = trace['ls_group']
print("Group length scales (posterior means):")
for g in range(n_groups):
    print(f"Group {g}: {ls_posterior[:, g].mean():.3f}")

Install with Tessl CLI

npx tessl i tessl/pypi-pymc3

docs

data-handling.md

distributions.md

gaussian-processes.md

glm.md

index.md

math-functions.md

modeling.md

sampling.md

stats-plots.md

step-methods.md

variational.md

tile.json