Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with Theano
68
PyMC3's Gaussian process module provides flexible non-parametric modeling capabilities for regression, classification, and other machine learning tasks. The implementation supports various covariance functions, mean functions, and inference methods including exact, sparse, and Kronecker-structured GPs.
Core Gaussian process implementations with different inference methods and computational strategies.
class Marginal:
"""
Marginal Gaussian Process for regression with exact inference.
Integrates out the GP function values analytically, providing exact
posterior inference suitable for moderate-sized datasets (< 5000 points).
"""
def __init__(self, cov_func, mean_func=None):
"""
Initialize marginal GP.
Parameters:
- cov_func: covariance function defining GP prior
- mean_func: mean function (zero mean if None)
Returns:
- Marginal: marginal GP object
"""
def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs):
"""
Add marginal likelihood term to model.
Parameters:
- name: str, variable name
- X: array, input locations (n_samples, n_features)
- y: array, observed outputs (n_samples,)
- noise: float or array, observation noise variance
- is_observed: bool, whether y is observed data
Returns:
- TensorVariable: marginal likelihood contribution
"""
def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs):
"""
Posterior predictive distribution at new locations.
Parameters:
- name: str, variable name for predictions
- Xnew: array, new input locations
- pred_noise: bool, include predictive noise
- given: dict, conditioning data (uses marginal_likelihood if None)
Returns:
- TensorVariable: predictive distribution
"""
def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None):
"""
Posterior predictive mean and covariance.
Parameters:
- Xnew: array, prediction inputs
- point: dict, parameter values for prediction
- diag: bool, return only diagonal of covariance
- pred_noise: bool, include predictive noise
- given: dict, conditioning data
Returns:
- tuple: (mean, variance) predictions
"""
class Latent:
"""
Latent Gaussian Process for non-Gaussian likelihoods.
Treats GP function values as latent variables for use with
non-Gaussian observation models like Poisson, binomial, etc.
"""
def __init__(self, cov_func, mean_func=None):
"""
Initialize latent GP.
Parameters:
- cov_func: covariance function
- mean_func: mean function (zero if None)
"""
def prior(self, name, X, reparameterize=True, **kwargs):
"""
GP prior distribution over function values.
Parameters:
- name: str, variable name
- X: array, input locations
- reparameterize: bool, use non-centered parameterization
Returns:
- TensorVariable: GP function values
"""
def conditional(self, name, Xnew, given=None, **kwargs):
"""
Conditional distribution at new inputs.
Parameters:
- name: str, variable name
- Xnew: array, new input locations
- given: dict, conditioning GP values
Returns:
- TensorVariable: conditional GP values
"""
class MarginalSparse:
"""
Sparse Gaussian Process using inducing points for scalability.
Approximates full GP using a smaller set of inducing points,
enabling GP inference for large datasets (> 10,000 points).
"""
def __init__(self, cov_func, approx='FITC', mean_func=None):
"""
Initialize sparse GP.
Parameters:
- cov_func: covariance function
- approx: str, sparse approximation method ('FITC', 'VFE', 'DTC')
- mean_func: mean function
"""
def marginal_likelihood(self, name, X, Xu, y, noise, is_observed=True, **kwargs):
"""
Sparse marginal likelihood.
Parameters:
- name: str, variable name
- X: array, training inputs
- Xu: array, inducing point locations
- y: array, training outputs
- noise: float, observation noise
- is_observed: bool, whether y is observed
Returns:
- TensorVariable: sparse marginal likelihood
"""
class MarginalKron:
"""
Kronecker-structured GP for multi-dimensional inputs.
Exploits Kronecker structure in covariance for efficient
computation with grid-structured or separable inputs.
"""
def __init__(self, cov_funcs, mean_func=None):
"""
Initialize Kronecker GP.
Parameters:
- cov_funcs: list, covariance functions for each dimension
- mean_func: mean function
"""
class LatentKron:
"""
Latent Kronecker GP for structured multi-dimensional problems.
Combines Kronecker structure with latent variable formulation
for non-Gaussian likelihoods on grid data.
"""
class TP:
"""
Student's T Process for robust non-parametric regression.
Heavy-tailed extension of GPs using Student's t distributions
for robustness to outliers and model misspecification.
"""
def __init__(self, cov_func, mean_func=None):
"""
Initialize T process.
Parameters:
- cov_func: covariance function
- mean_func: mean function
"""Covariance functions defining GP priors through the pm.gp.cov module.
# Available as pm.gp.cov.ExpQuad, pm.gp.cov.Matern52, etc.
class ExpQuad:
"""
Exponentiated quadratic (RBF/squared exponential) covariance.
Smooth, infinitely differentiable covariance function suitable
for modeling smooth processes.
"""
def __init__(self, input_dim, ls=None, ls_inv=None, active_dims=None):
"""
Parameters:
- input_dim: int, number of input dimensions
- ls: float or array, length scale parameters
- ls_inv: float or array, inverse length scales (alternative to ls)
- active_dims: list, active input dimensions
"""
class Matern52:
"""
Matérn covariance function with ν = 5/2.
Twice differentiable covariance providing balance between
smoothness and flexibility.
"""
class Matern32:
"""
Matérn covariance function with ν = 3/2.
Once differentiable covariance for moderately rough functions.
"""
class RatQuad:
"""
Rational quadratic covariance function.
Infinite mixture of RBF kernels with different length scales,
providing scale-invariant modeling capability.
"""
def __init__(self, input_dim, alpha=None, ls=None, active_dims=None):
"""
Parameters:
- input_dim: int, input dimension
- alpha: float, shape parameter controlling tail behavior
- ls: float or array, length scale parameters
- active_dims: list, active dimensions
"""
class Linear:
"""
Linear covariance function.
Models linear relationships and polynomial trends
when combined with other covariance functions.
"""
def __init__(self, input_dim, c=None, active_dims=None):
"""
Parameters:
- input_dim: int, input dimension
- c: float or array, offset parameters
- active_dims: list, active dimensions
"""
class Polynomial:
"""
Polynomial covariance function.
Models polynomial relationships of specified degree.
"""
def __init__(self, input_dim, c=None, d=2, offset=None, active_dims=None):
"""
Parameters:
- input_dim: int, input dimension
- c: float, scaling parameter
- d: int, polynomial degree
- offset: float, offset parameter
- active_dims: list, active dimensions
"""
class Cosine:
"""
Cosine covariance function for periodic processes.
Models exactly periodic functions with known period.
"""
def __init__(self, input_dim, ls=None, active_dims=None):
"""
Parameters:
- input_dim: int, input dimension
- ls: float or array, period parameters
- active_dims: list, active dimensions
"""
class Periodic:
"""
Periodic covariance function.
Combines periodicity with additional smoothness control
for quasi-periodic and approximately periodic functions.
"""
def __init__(self, input_dim, period=None, ls=None, active_dims=None):
"""
Parameters:
- input_dim: int, input dimension
- period: float or array, period parameters
- ls: float or array, length scale parameters
- active_dims: list, active dimensions
"""
class WarpedInput:
"""
Warped input covariance function.
Applies input transformation before computing covariance,
enabling non-stationary modeling.
"""
def __init__(self, cov_func, warp_func, args):
"""
Parameters:
- cov_func: base covariance function
- warp_func: input warping function
- args: arguments for warping function
"""
class Gibbs:
"""
Gibbs non-stationary covariance function.
Length scale varies as function of input location,
enabling locally adaptive smoothness.
"""
def __init__(self, input_dim, lengthscale_func, args, active_dims=None):
"""
Parameters:
- input_dim: int, input dimension
- lengthscale_func: function defining spatially varying length scale
- args: arguments for length scale function
- active_dims: list, active dimensions
"""
# Covariance function operators
class Add:
"""
Additive covariance (sum of covariance functions).
Combines multiple covariance functions additively
for modeling multiple patterns simultaneously.
"""
class Prod:
"""
Multiplicative covariance (product of covariance functions).
Models interactions between different input dimensions
or combines different types of structure.
"""
class Kron:
"""
Kronecker product covariance for separable multi-dimensional inputs.
Assumes independence between input dimensions while
maintaining within-dimension correlations.
"""Mean functions for GP priors through the pm.gp.mean module.
# Available as pm.gp.mean.Zero, pm.gp.mean.Constant, etc.
class Zero:
"""
Zero mean function (default).
Assumes zero prior mean for GP, suitable when
data is centered or mean is captured by other model components.
"""
class Constant:
"""
Constant mean function.
Non-zero constant prior mean for GP.
"""
def __init__(self, c=None):
"""
Parameters:
- c: float, constant mean value
"""
class Linear:
"""
Linear mean function.
Linear trend in GP mean function.
"""
def __init__(self, coeffs=None, intercept=None):
"""
Parameters:
- coeffs: array, linear coefficients
- intercept: float, intercept term
"""Utility functions for GP modeling and visualization.
def plot_gp_dist(ax, samples=[], plot_samples=True, palette='Reds',
fill_alpha=0.8, samples_alpha=0.7, fill_kwargs=None,
samples_kwargs=None):
"""
Plot GP distribution with uncertainty bands.
Visualizes GP posterior with credible intervals and
optional sample trajectories from the posterior.
Parameters:
- ax: matplotlib axis for plotting
- samples: array, GP sample trajectories
- plot_samples: bool, whether to plot individual samples
- palette: str, color palette for visualization
- fill_alpha: float, transparency for uncertainty bands
- samples_alpha: float, transparency for sample lines
- fill_kwargs: dict, arguments for uncertainty band plotting
- samples_kwargs: dict, arguments for sample line plotting
Returns:
- matplotlib artists: plot elements
"""
# GP utilities available in pm.gp.util module
def conditional_cov(Kxx, Kxz, Kzz_inv):
"""
Compute conditional covariance for GP prediction.
Parameters:
- Kxx: array, covariance between prediction points
- Kxz: array, cross-covariance between prediction and conditioning points
- Kzz_inv: array, inverse covariance of conditioning points
Returns:
- array: conditional covariance matrix
"""
def conditional_mean(Kxz, Kzz_inv, y):
"""
Compute conditional mean for GP prediction.
Parameters:
- Kxz: array, cross-covariance matrix
- Kzz_inv: array, inverse conditioning covariance
- y: array, conditioning outputs
Returns:
- array: conditional mean
"""
def stabilize(K, jitter=1e-6):
"""
Numerically stabilize covariance matrix.
Parameters:
- K: array, covariance matrix
- jitter: float, diagonal noise for numerical stability
Returns:
- array: stabilized covariance matrix
"""import pymc3 as pm
import numpy as np
import matplotlib.pyplot as plt
# Generate synthetic data
np.random.seed(42)
n = 50
X = np.linspace(0, 10, n)[:, None]
true_func = lambda x: np.sin(x) + 0.1 * x**2
y = true_func(X.ravel()) + np.random.normal(0, 0.1, n)
# GP regression model
with pm.Model() as gp_model:
# Covariance function parameters
ls = pm.Gamma('ls', alpha=2, beta=1) # length scale
sigma_f = pm.HalfNormal('sigma_f', sigma=1) # signal variance
# Covariance function
cov = sigma_f**2 * pm.gp.cov.ExpQuad(1, ls)
# GP prior
gp = pm.gp.Marginal(cov_func=cov)
# Observation noise
sigma_y = pm.HalfNormal('sigma_y', sigma=0.5)
# Marginal likelihood
y_obs = gp.marginal_likelihood('y_obs', X=X, y=y, noise=sigma_y)
# Inference
trace = pm.sample(1000, tune=1000, chains=2)
# Prediction
X_new = np.linspace(-1, 11, 100)[:, None]
with gp_model:
f_pred = gp.conditional('f_pred', X_new)
pred_samples = pm.sample_posterior_predictive(trace, vars=[f_pred], samples=100)# GP for binary classification
n_train = 100
X_train = np.random.randn(n_train, 2)
true_boundary = lambda x: x[:, 0] + 0.5 * x[:, 1]**2
y_train = (true_boundary(X_train) > 0).astype(int)
with pm.Model() as gp_classification:
# GP parameters
ls = pm.Gamma('ls', alpha=2, beta=1, shape=2)
# Covariance function
cov = pm.gp.cov.ExpQuad(2, ls)
# Latent GP
gp = pm.gp.Latent(cov_func=cov)
# GP prior over latent function
f = gp.prior('f', X=X_train)
# Probit likelihood
y_obs = pm.Bernoulli('y_obs', p=pm.math.sigmoid(f), observed=y_train)
# Sampling
trace = pm.sample(1000, tune=1000)
# Prediction for classification
X_test = np.random.randn(50, 2)
with gp_classification:
f_test = gp.conditional('f_test', X_test)
test_samples = pm.sample_posterior_predictive(trace, vars=[f_test])
# Classification probabilities
prob_samples = pm.math.sigmoid(test_samples['f_test'])
mean_probs = prob_samples.mean(axis=0)# Large dataset with sparse GP
n_large = 5000
X_large = np.random.uniform(0, 20, (n_large, 1))
y_large = np.sin(X_large.ravel()) + 0.2 * np.random.randn(n_large)
# Inducing points
n_inducing = 50
Xu = np.linspace(0, 20, n_inducing)[:, None]
with pm.Model() as sparse_gp:
# GP parameters
ls = pm.Gamma('ls', alpha=2, beta=1)
sigma_f = pm.HalfNormal('sigma_f', sigma=1)
# Covariance function
cov = sigma_f**2 * pm.gp.cov.Matern52(1, ls)
# Sparse GP
gp = pm.gp.MarginalSparse(cov_func=cov, approx='FITC')
# Noise parameter
sigma_y = pm.HalfNormal('sigma_y', sigma=1)
# Sparse marginal likelihood
y_obs = gp.marginal_likelihood('y_obs',
X=X_large,
Xu=Xu,
y=y_large,
noise=sigma_y)
# Efficient sampling
trace = pm.sample(500, tune=500)# Multi-output GP regression
n_outputs = 3
X_multi = np.linspace(0, 5, 30)[:, None]
# Correlated outputs
true_funcs = [
lambda x: np.sin(2*x),
lambda x: np.cos(2*x),
lambda x: np.sin(2*x) + np.cos(2*x)
]
Y_multi = np.column_stack([f(X_multi.ravel()) for f in true_funcs])
Y_multi += 0.1 * np.random.randn(*Y_multi.shape)
with pm.Model() as multi_gp:
# Shared covariance parameters
ls = pm.Gamma('ls', alpha=2, beta=1)
# Output-specific parameters
sigma_f = pm.HalfNormal('sigma_f', sigma=1, shape=n_outputs)
sigma_y = pm.HalfNormal('sigma_y', sigma=1, shape=n_outputs)
# Independent GPs for each output
gps = []
for i in range(n_outputs):
cov_i = sigma_f[i]**2 * pm.gp.cov.ExpQuad(1, ls)
gp_i = pm.gp.Marginal(cov_func=cov_i)
y_obs_i = gp_i.marginal_likelihood(f'y_obs_{i}',
X=X_multi,
y=Y_multi[:, i],
noise=sigma_y[i])
gps.append(gp_i)
trace = pm.sample(1000, tune=1000)# Periodic GP for seasonal data
t = np.linspace(0, 4*np.pi, 100)
y_seasonal = np.sin(t) + 0.5*np.cos(2*t) + 0.1*t + 0.1*np.random.randn(len(t))
with pm.Model() as periodic_gp:
# Periodic covariance parameters
period = pm.Gamma('period', alpha=10, beta=1.5) # Expected period ~ 2π
ls_periodic = pm.Gamma('ls_periodic', alpha=2, beta=1)
# Trend parameters
ls_trend = pm.Gamma('ls_trend', alpha=2, beta=0.5)
# Signal variances
sigma_periodic = pm.HalfNormal('sigma_periodic', sigma=1)
sigma_trend = pm.HalfNormal('sigma_trend', sigma=1)
# Combined covariance: periodic + trend
cov_periodic = sigma_periodic**2 * pm.gp.cov.Periodic(1, period=period, ls=ls_periodic)
cov_trend = sigma_trend**2 * pm.gp.cov.ExpQuad(1, ls_trend)
cov_combined = cov_periodic + cov_trend
# GP model
gp = pm.gp.Marginal(cov_func=cov_combined)
# Observation noise
sigma_y = pm.HalfNormal('sigma_y', sigma=0.2)
# Likelihood
X_time = t[:, None]
y_obs = gp.marginal_likelihood('y_obs', X=X_time, y=y_seasonal, noise=sigma_y)
trace = pm.sample(1000, tune=1000)
# Extrapolate to future
t_future = np.linspace(0, 6*np.pi, 150)[:, None]
with periodic_gp:
f_future = gp.conditional('f_future', t_future)
future_pred = pm.sample_posterior_predictive(trace, vars=[f_future])# Non-stationary GP using input warping
X_nonstat = np.linspace(0, 10, 60)[:, None]
# Function with varying smoothness
y_nonstat = np.where(X_nonstat.ravel() < 5,
np.sin(5*X_nonstat.ravel()), # High frequency
np.sin(X_nonstat.ravel())) # Low frequency
y_nonstat += 0.1 * np.random.randn(len(y_nonstat))
with pm.Model() as nonstat_gp:
# Warping function parameters
alpha = pm.Normal('alpha', mu=0, sigma=1)
beta = pm.HalfNormal('beta', sigma=1)
# Input warping: stretch inputs differently in different regions
def warp_func(x, alpha, beta):
return x + alpha * pm.math.tanh(beta * (x - 5))
# Base covariance function
ls = pm.Gamma('ls', alpha=2, beta=1)
sigma_f = pm.HalfNormal('sigma_f', sigma=1)
base_cov = sigma_f**2 * pm.gp.cov.ExpQuad(1, ls)
# Warped covariance
cov_warped = pm.gp.cov.WarpedInput(base_cov, warp_func, (alpha, beta))
# GP with warped inputs
gp = pm.gp.Marginal(cov_func=cov_warped)
# Observation noise
sigma_y = pm.HalfNormal('sigma_y', sigma=0.2)
# Likelihood
y_obs = gp.marginal_likelihood('y_obs', X=X_nonstat, y=y_nonstat, noise=sigma_y)
trace = pm.sample(1000, tune=1000)# Hierarchical GP with group-specific parameters
n_groups = 4
n_per_group = 25
# Generate group data with different characteristics
group_data = []
group_labels = []
for g in range(n_groups):
X_g = np.random.uniform(0, 10, n_per_group)[:, None]
# Different length scales per group
ls_true = 1 + g * 0.5
y_g = np.sin(X_g.ravel() / ls_true) + 0.1 * np.random.randn(n_per_group)
group_data.append((X_g, y_g))
group_labels.extend([g] * n_per_group)
# Combine data
X_all = np.vstack([X for X, y in group_data])
y_all = np.hstack([y for X, y in group_data])
group_idx = np.array(group_labels)
with pm.Model() as hierarchical_gp:
# Hierarchical priors for length scales
mu_ls = pm.Gamma('mu_ls', alpha=2, beta=1)
sigma_ls = pm.HalfNormal('sigma_ls', sigma=1)
ls_group = pm.Gamma('ls_group', alpha=2, beta=1/sigma_ls, shape=n_groups)
# Shared signal variance
sigma_f = pm.HalfNormal('sigma_f', sigma=1)
# Group-specific GPs
gps = []
for g in range(n_groups):
# Group mask
mask = group_idx == g
X_g = X_all[mask]
y_g = y_all[mask]
# Group covariance function
cov_g = sigma_f**2 * pm.gp.cov.ExpQuad(1, ls_group[g])
gp_g = pm.gp.Marginal(cov_func=cov_g)
# Group likelihood
sigma_y = pm.HalfNormal(f'sigma_y_{g}', sigma=0.5)
y_obs_g = gp_g.marginal_likelihood(f'y_obs_{g}',
X=X_g,
y=y_g,
noise=sigma_y)
gps.append(gp_g)
trace = pm.sample(1000, tune=1000)
# Analyze group differences
ls_posterior = trace['ls_group']
print("Group length scales (posterior means):")
for g in range(n_groups):
print(f"Group {g}: {ls_posterior[:, g].mean():.3f}")Install with Tessl CLI
npx tessl i tessl/pypi-pymc3docs
evals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10