Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with PyTensor
—
PyMC provides a comprehensive framework for Gaussian process (GP) modeling, offering flexible covariance functions, mean functions, and efficient implementations for various GP applications including regression, classification, and time series modeling.
The standard GP implementation for regression and interpolation:
from pymc.gp import Marginal
import pymc as pm
class Marginal:
"""
Marginal Gaussian Process implementation.
Parameters:
- cov_func: Covariance function
- mean_func: Mean function (default: Zero)
Methods:
- marginal_likelihood: Add GP likelihood to model
- conditional: Compute conditional distribution
- predict: Make predictions at new locations
"""
def __init__(self, cov_func, mean_func=None):
pass
def marginal_likelihood(self, name, X, y, noise=None, is_observed=True, **kwargs):
"""
Add marginal likelihood to the model.
Parameters:
- name (str): Name for the GP likelihood
- X (array): Input locations for training data
- y (array): Observed output values
- noise (float/array): Observation noise variance
- is_observed (bool): Whether y contains observed data
Returns:
- gp_likelihood: GP likelihood random variable
"""
pass
def conditional(self, name, Xnew, given=None, **kwargs):
"""
Compute conditional GP distribution at new points.
Parameters:
- name (str): Name for conditional distribution
- Xnew (array): New input locations
- given (dict): Conditioning data {'X': X_obs, 'y': y_obs, 'noise': noise}
Returns:
- conditional_gp: Conditional GP distribution
"""
pass
# Basic GP regression example
with pm.Model() as gp_model:
# GP hyperparameters
ls = pm.Gamma('ls', alpha=2, beta=1) # Length scale
eta = pm.HalfNormal('eta', sigma=2) # Signal variance
sigma = pm.HalfNormal('sigma', sigma=1) # Noise variance
# Covariance function
cov_func = eta**2 * pm.gp.cov.ExpQuad(1, ls)
# GP likelihood
gp = pm.gp.Marginal(cov_func=cov_func)
y_obs = gp.marginal_likelihood('y_obs', X=X_train, y=y_train, noise=sigma)
# Sample posterior
trace = pm.sample()Efficient approximations for large datasets:
from pymc.gp import MarginalApprox
class MarginalApprox:
"""
Marginal GP with inducing point approximation.
Parameters:
- cov_func: Covariance function
- approx (str): Approximation method ('FITC', 'VFE', 'DTC')
Methods:
- marginal_likelihood: Add approximate GP likelihood
"""
# Sparse GP with inducing points
with pm.Model() as sparse_gp:
# Select inducing points
Xu = X_train[::10] # Subset of training points
# GP with sparse approximation
gp_sparse = pm.gp.MarginalApprox(cov_func=cov_func, approx='FITC')
y_sparse = gp_sparse.marginal_likelihood(
'y_sparse', X=X_train, Xu=Xu, y=y_train, noise=sigma
)from pymc.gp import MarginalSparse
# Variational sparse GP
with pm.Model() as variational_sparse:
# Inducing point locations and values
Xu = pm.Data('Xu', inducing_inputs)
f_u = pm.MvNormal('f_u', mu=np.zeros(n_inducing), cov=K_uu)
gp_sparse = pm.gp.MarginalSparse(cov_func=cov_func)
y_sparse = gp_sparse.marginal_likelihood(
'y_sparse', X=X_train, Xu=Xu, fu=f_u, y=y_train, noise=sigma
)Efficient GPs for grid-structured data:
from pymc.gp import MarginalKron
class MarginalKron:
"""
Kronecker-structured marginal GP for multidimensional grids.
Parameters:
- cov_funcs (list): List of 1D covariance functions
- mean_func: Mean function
"""
# Kronecker GP for 2D grid data
with pm.Model() as kronecker_gp:
# Separate covariance for each dimension
cov_x = eta1**2 * pm.gp.cov.Matern52(1, ls1)
cov_y = eta2**2 * pm.gp.cov.Matern52(1, ls2)
# Kronecker GP
gp_kron = pm.gp.MarginalKron(cov_funcs=[cov_x, cov_y])
z_obs = gp_kron.marginal_likelihood('z_obs', X=grid_coords, y=grid_values,
noise=sigma)For classification and non-Gaussian likelihoods:
from pymc.gp import Latent
class Latent:
"""
Latent Gaussian Process for non-Gaussian likelihoods.
Parameters:
- cov_func: Covariance function
- mean_func: Mean function
Methods:
- prior: Define GP prior distribution
- conditional: Conditional distribution at new points
"""
def prior(self, name, X, **kwargs):
"""
Define GP prior at input locations.
Parameters:
- name (str): Name for GP prior variable
- X (array): Input locations
Returns:
- f_prior: GP prior distribution
"""
pass
# GP classification example
with pm.Model() as gp_classification:
# GP hyperparameters
ls = pm.Gamma('ls', alpha=2, beta=1)
eta = pm.HalfNormal('eta', sigma=2)
# Covariance function
cov_func = eta**2 * pm.gp.cov.Matern32(1, ls)
# Latent GP
gp = pm.gp.Latent(cov_func=cov_func)
f = gp.prior('f', X=X_train)
# Probit link for classification
p = pm.invprobit(f)
y_obs = pm.Bernoulli('y_obs', p=p, observed=y_binary)from pymc.gp import LatentKron
# Latent Kronecker GP for spatial classification
with pm.Model() as spatial_classification:
gp_kron = pm.gp.LatentKron(cov_funcs=[cov_x, cov_y])
f_latent = gp_kron.prior('f_latent', X=spatial_coords)
# Spatial logistic regression
p_spatial = pm.invlogit(f_latent)
y_spatial = pm.Bernoulli('y_spatial', p=p_spatial, observed=spatial_outcomes)Robust alternative to Gaussian processes:
from pymc.gp import TP
class TP:
"""
Student-t Process implementation.
Parameters:
- cov_func: Covariance function
- nu: Degrees of freedom parameter
"""
# Robust regression with t-process
with pm.Model() as tp_model:
# t-process parameters
nu = pm.Gamma('nu', alpha=2, beta=0.1) # Degrees of freedom
# t-process
tp = pm.gp.TP(cov_func=cov_func, nu=nu)
y_tp = tp.marginal_likelihood('y_tp', X=X_train, y=y_train, noise=sigma)Efficient approximation for large-scale GPs:
from pymc.gp import HSGP, HSGPPeriodic
class HSGP:
"""
Hilbert Space Gaussian Process approximation.
Parameters:
- m (list): Number of basis functions per dimension
- L (list): Boundary values per dimension
- cov_func: Covariance function to approximate
"""
def __init__(self, m, L, cov_func):
pass
def prior_linearized(self, Xs):
"""
Compute linearized prior representation.
Parameters:
- Xs (array): Input locations
Returns:
- phi: Basis function matrix
- sqrt_psd: Square root of power spectral density
"""
pass
# HSGP for large datasets
with pm.Model() as hsgp_model:
# HSGP parameters
m = [50] # Number of basis functions
L = [2.0] # Boundary condition
# HSGP approximation
hsgp = pm.gp.HSGP(m=m, L=L, cov_func=cov_func)
phi, sqrt_psd = hsgp.prior_linearized(X_train)
# Linear model with HSGP features
beta = pm.Normal('beta', 0, 1, shape=m[0])
f_hsgp = phi @ (sqrt_psd * beta)
# Likelihood
y_hsgp = pm.Normal('y_hsgp', mu=f_hsgp, sigma=sigma, observed=y_train)class HSGPPeriodic:
"""
HSGP for periodic functions.
Parameters:
- m (int): Number of basis functions
- period (float): Period of the function
- cov_func: Periodic covariance function
"""
# Periodic GP approximation
with pm.Model() as periodic_hsgp:
# Periodic parameters
period = 12.0 # Annual cycle
m_periodic = 20
hsgp_periodic = pm.gp.HSGPPeriodic(
m=m_periodic,
period=period,
cov_func=periodic_cov
)import pymc.gp.cov as cov
# Exponentiated Quadratic (RBF/Squared Exponential)
class ExpQuad:
"""
Exponentiated quadratic covariance function.
Parameters:
- input_dim (int): Number of input dimensions
- ls (float/array): Length scale(s)
- active_dims (list): Active input dimensions
"""
def __init__(self, input_dim, ls, active_dims=None):
pass
eq_cov = cov.ExpQuad(input_dim=2, ls=[1.0, 2.0])
# Matérn Covariance Functions
matern32 = cov.Matern32(input_dim=1, ls=1.5) # Matérn 3/2
matern52 = cov.Matern52(input_dim=1, ls=1.5) # Matérn 5/2
# Rational Quadratic
rat_quad = cov.RatQuad(input_dim=1, ls=1.0, alpha=2.0)
# Exponential (Matérn 1/2)
exponential = cov.Exponential(input_dim=1, ls=1.0)# Linear covariance
linear_cov = cov.Linear(input_dim=1, c=0.5)
# Polynomial covariance
poly_cov = cov.Polynomial(input_dim=1, c=1.0, d=3, offset=0.0)
# Gibbs (non-stationary)
def length_scale_func(x):
return 0.1 + 0.9 * pm.math.sigmoid(x)
gibbs_cov = cov.Gibbs(input_dim=1, lengthscale_func=length_scale_func)# Periodic covariance
periodic_cov = cov.Periodic(input_dim=1, period=12, ls=1.0)
# Cosine covariance
cosine_cov = cov.Cosine(input_dim=1, ls=1.0)# Constant covariance (bias term)
constant_cov = cov.Constant(c=2.0)
# White noise
white_noise = cov.WhiteNoise(sigma=0.1)
# Scaled covariance
scaled_cov = cov.ScaledCov(input_dim=1, scaling_func=scaling_function)
# Warped input covariance
warp_func = lambda x: pm.math.tanh(x)
warped_cov = cov.WarpedInput(input_dim=1, cov_func=base_cov, warp_func=warp_func)# Addition (sum of independent processes)
combined_cov = cov1 + cov2 + cov3
# Multiplication (product of covariances)
product_cov = cov1 * cov2
# Example: Trend + Periodic + Noise
trend_cov = cov.Linear(input_dim=1, c=1.0)
periodic_cov = cov.Periodic(input_dim=1, period=12, ls=1.0)
noise_cov = cov.WhiteNoise(sigma=0.1)
full_cov = trend_cov + periodic_cov + noise_cov# Covariance functions for specific dimensions
cov_dim1 = cov.Matern52(input_dim=3, ls=1.0, active_dims=[0]) # Only 1st dim
cov_dim23 = cov.ExpQuad(input_dim=3, ls=[0.5, 2.0], active_dims=[1, 2]) # 2nd,3rd dims
# Combined multidimensional covariance
multi_cov = cov_dim1 + cov_dim23import pymc.gp.mean as mean
# Zero mean (default)
class Zero:
"""Zero mean function."""
pass
zero_mean = mean.Zero()
# Constant mean
class Constant:
"""
Constant mean function.
Parameters:
- c (float): Constant value
"""
constant_mean = mean.Constant(c=2.5)
# Linear mean
class Linear:
"""
Linear mean function.
Parameters:
- coeffs (array): Linear coefficients
- intercept (float): Intercept term
"""
linear_mean = mean.Linear(coeffs=[0.5, -1.2], intercept=1.0)# Coregionalization model for multiple outputs
with pm.Model() as multioutput_gp:
# Shared latent functions
n_latent = 2
n_outputs = 3
# Mixing matrix
W = pm.Normal('W', 0, 1, shape=(n_outputs, n_latent))
# Latent GPs
gps = []
for i in range(n_latent):
gp_i = pm.gp.Latent(cov_func=shared_cov)
f_i = gp_i.prior(f'f_{i}', X=X_train)
gps.append(f_i)
# Linear combinations for each output
F_latent = pm.math.stack(gps, axis=1) # Shape: (n_obs, n_latent)
F_outputs = pm.math.dot(F_latent, W.T) # Shape: (n_obs, n_outputs)
# Output likelihoods
for j in range(n_outputs):
y_j = pm.Normal(f'y_{j}', mu=F_outputs[:, j], sigma=sigma_j[j],
observed=Y_train[:, j])# Hierarchical GP with group-specific parameters
with pm.Model() as hierarchical_gp:
# Global hyperparameters
mu_ls = pm.Gamma('mu_ls', alpha=2, beta=1)
sigma_ls = pm.HalfNormal('sigma_ls', sigma=0.5)
# Group-specific length scales
ls_groups = pm.Lognormal('ls_groups', mu=pm.math.log(mu_ls),
sigma=sigma_ls, shape=n_groups)
# Group-specific GPs
for g in range(n_groups):
cov_g = eta**2 * cov.Matern52(1, ls_groups[g])
gp_g = pm.gp.Marginal(cov_func=cov_g)
# Group data
X_g = X_train[group_idx == g]
y_g = y_train[group_idx == g]
y_obs_g = gp_g.marginal_likelihood(f'y_obs_{g}', X=X_g, y=y_g, noise=sigma)# GP with input-dependent noise
with pm.Model() as heteroscedastic_gp:
# Main regression GP
gp_mean = pm.gp.Marginal(cov_func=cov_func)
f_mean = gp_mean.prior('f_mean', X=X_train)
# Noise variance GP (log scale)
gp_noise = pm.gp.Marginal(cov_func=noise_cov_func)
f_noise = gp_noise.prior('f_noise', X=X_train)
log_sigma = pm.Deterministic('log_sigma', f_noise)
# Heteroscedastic likelihood
y_hetero = pm.Normal('y_hetero', mu=f_mean, sigma=pm.math.exp(log_sigma),
observed=y_train)# Compare different covariance functions
covariance_functions = {
'matern32': cov.Matern32(1, ls),
'matern52': cov.Matern52(1, ls),
'exp_quad': cov.ExpQuad(1, ls),
'rational_quad': cov.RatQuad(1, ls, alpha)
}
models = {}
traces = {}
for name, cov_func in covariance_functions.items():
with pm.Model() as model:
gp = pm.gp.Marginal(cov_func=cov_func)
y_obs = gp.marginal_likelihood('y_obs', X=X_train, y=y_train, noise=sigma)
trace = pm.sample()
models[name] = model
traces[name] = trace
# Compare models
comparison = pm.compare(traces)# Complete GP prediction workflow
with pm.Model() as gp_model:
# Model definition...
trace = pm.sample()
# Predict at new locations
with gp_model:
# Conditional distribution for predictions
f_pred = gp.conditional('f_pred', Xnew=X_test)
# Sample predictions
pred_samples = pm.sample_posterior_predictive(
trace,
var_names=['f_pred'],
predictions=True
)
# Extract prediction statistics
pred_mean = pred_samples.predictions['f_pred'].mean(dim=['chain', 'draw'])
pred_std = pred_samples.predictions['f_pred'].std(dim=['chain', 'draw'])PyMC's Gaussian process framework provides a flexible and efficient foundation for non-parametric Bayesian modeling, supporting everything from simple regression to complex multi-output and spatiotemporal models.
Install with Tessl CLI
npx tessl i tessl/pypi-pymc