Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with PyTensor
npx @tessl/cli install tessl/pypi-pymc@5.25.0PyMC is a powerful Python library for probabilistic programming and Bayesian modeling. It provides an intuitive interface for building complex statistical models, offering automatic differentiation, efficient sampling algorithms, and seamless integration with the scientific Python ecosystem. PyMC enables users to specify models in a natural mathematical notation and automatically handles the computational details of Bayesian inference.
PyMC is built on PyTensor for automatic differentiation and compilation, providing fast and efficient computation for complex probabilistic models. It supports various inference methods including Markov Chain Monte Carlo (MCMC), variational inference, and approximate Bayesian computation.
pip install pymcimport pymc as pm
# Core modeling components
from pymc import Model, Deterministic, Potential
# Probability distributions
from pymc import Normal, Bernoulli, Beta, Gamma, Poisson
from pymc import MvNormal, Dirichlet, Categorical
# Sampling and inference
from pymc import sample, sample_prior_predictive, sample_posterior_predictive
from pymc import find_MAP, NUTS, Metropolis
# Variational inference
from pymc import ADVI, fit
# Gaussian processes
from pymc.gp import Marginal, Latent
from pymc.gp.cov import ExpQuad, Matern52
# Mathematical utilities
from pymc import logit, invlogit, logsumexp
# Data handling
from pymc import Data, Minibatch
# Model visualization and debugging
from pymc import model_to_graphviz, model_to_mermaid, model_to_networkx
# Exception handling
from pymc import SamplingError, IncorrectArgumentsError, ShapeError
# Log-probability functions
from pymc import logp, logcdf, icdf
# ODE integration (requires installation of sunode for performance)
from pymc import odeimport pymc as pm
import numpy as np
import arviz as az
# Generate synthetic data
np.random.seed(42)
X = np.random.randn(100, 2)
true_beta = np.array([1.5, -2.0])
y = X @ true_beta + np.random.randn(100) * 0.5
# Define Bayesian model
with pm.Model() as linear_model:
# Priors for regression coefficients
beta = pm.Normal('beta', mu=0, sigma=1, shape=2)
# Prior for noise standard deviation
sigma = pm.HalfNormal('sigma', sigma=1)
# Linear combination
mu = pm.math.dot(X, beta)
# Likelihood
likelihood = pm.Normal('likelihood', mu=mu, sigma=sigma, observed=y)
# Sample from posterior
trace = pm.sample(2000, tune=1000, chains=4)
# Analyze results
print(az.summary(trace))import pymc as pm
# Hierarchical model for grouped data
with pm.Model() as hierarchical_model:
# Hyperpriors
mu_alpha = pm.Normal('mu_alpha', mu=0, sigma=10)
sigma_alpha = pm.HalfNormal('sigma_alpha', sigma=1)
# Group-level parameters
alpha = pm.Normal('alpha', mu=mu_alpha, sigma=sigma_alpha, shape=n_groups)
# Individual-level likelihood
y_obs = pm.Normal('y_obs', mu=alpha[group_idx], sigma=1, observed=data)
# Sample
trace = pm.sample()PyMC's architecture consists of several key components:
Model class provides a context manager for defining probabilistic modelsPyMC includes a comprehensive collection of probability distributions:
Multiple inference methods are supported:
PyMC provides 80+ probability distributions for modeling various types of data and uncertainty.
# Continuous distributions
alpha = pm.Normal('alpha', mu=0, sigma=1)
beta = pm.Beta('beta', alpha=2, beta=5)
rate = pm.Gamma('rate', alpha=1, beta=1)
# Discrete distributions
success = pm.Bernoulli('success', p=0.7)
counts = pm.Poisson('counts', mu=3.5)
# Multivariate distributions
mu_vec = pm.MvNormal('mu_vec', mu=np.zeros(3), cov=np.eye(3))
probs = pm.Dirichlet('probs', a=np.ones(4))Complete Distributions Reference
Advanced sampling algorithms with automatic tuning and diagnostics.
# Main sampling interface
trace = pm.sample(
draws=2000, # Number of samples
tune=1000, # Tuning samples
chains=4, # Number of chains
cores=4, # Parallel chains
step=None, # Auto step method selection
target_accept=0.8 # Target acceptance rate
)
# Custom step methods
step = pm.NUTS(target_accept=0.9)
trace = pm.sample(step=step)
# Predictive sampling
prior_pred = pm.sample_prior_predictive(samples=1000)
posterior_pred = pm.sample_posterior_predictive(trace)Flexible framework for non-parametric Bayesian modeling.
# Define GP with covariance function
with pm.Model() as gp_model:
# Length scale and variance
ls = pm.Gamma('ls', alpha=2, beta=1)
eta = pm.HalfNormal('eta', sigma=1)
# Covariance function
cov_func = eta**2 * pm.gp.cov.ExpQuad(1, ls)
# GP implementation
gp = pm.gp.Marginal(cov_func=cov_func)
# Observe data
y_obs = gp.marginal_likelihood('y_obs', X=X_obs, y=y_obs, noise=sigma)Complete Gaussian Processes Reference
Fast approximate inference for large-scale models.
# Automatic Differentiation Variational Inference
with model:
# Mean-field approximation
approx = pm.fit(method='advi', n=50000)
# Full-rank approximation
approx = pm.fit(method='fullrank_advi', n=50000)
# Sample from approximation
trace = approx.sample(2000)
# Custom optimization
advi = pm.ADVI()
approx = pm.fit(n=50000, method=advi, optimizer=pm.adam(learning_rate=0.01))Complete Variational Inference Reference
Core utilities for building and manipulating probabilistic models.
# Model context and utilities
with pm.Model() as model:
# Get current model context
current_model = pm.modelcontext()
# Deterministic transformations
log_odds = pm.Deterministic('log_odds', pm.math.log(p / (1 - p)))
# Custom potential terms
custom_prior = pm.Potential('custom_prior', custom_log_prob)
# Update data in model
pm.set_data({'X_new': new_X_data})Comprehensive diagnostics via ArviZ integration.
# Convergence diagnostics
rhat = pm.rhat(trace)
ess = pm.ess(trace)
mcse_stats = pm.mcse(trace)
# Model comparison
loo_stats = pm.loo(trace, model)
waic_stats = pm.waic(trace, model)
# Custom log-likelihood computation
log_likelihood = pm.compute_log_likelihood(trace, model)Essential mathematical functions for model building.
# Link functions
log_odds = pm.logit(probability)
probability = pm.invlogit(log_odds)
z_score = pm.probit(probability)
# Numerical utilities
log_sum = pm.logsumexp(log_values)
stable_sum = pm.logaddexp(log_a, log_b)
# Matrix operations
triangular_matrix = pm.expand_packed_triangular(packed_values)Efficient data containers and trace storage systems.
# Data containers
X_data = pm.Data('X_data', X_observed)
y_data = pm.Data('y_data', y_observed)
# Minibatch data for large datasets
mb_X = pm.Minibatch(X_large, batch_size=128)
mb_y = pm.Minibatch(y_large, batch_size=128)
# Trace backends
trace = pm.sample(trace=pm.backends.NDArray()) # In-memory
trace = pm.sample(trace=pm.backends.ZarrTrace()) # Zarr storage
# ArviZ conversion
idata = pm.to_inference_data(trace, model=model)Complete Data Handling Reference
Solve systems of ODEs as part of probabilistic models for dynamic systems modeling.
# Basic ODE system definition
from pymc import ode
# Define ODE system
def lotka_volterra(y, t, p):
"""Lotka-Volterra predator-prey equations."""
return [p[0] * y[0] - p[1] * y[0] * y[1],
p[2] * y[0] * y[1] - p[3] * y[1]]
# Create ODE solution in model context
with pm.Model() as ode_model:
# Parameters
alpha = pm.LogNormal('alpha', 0, 1)
beta = pm.LogNormal('beta', 0, 1)
gamma = pm.LogNormal('gamma', 0, 1)
delta = pm.LogNormal('delta', 0, 1)
# Initial conditions
y0 = [1.0, 1.0]
# ODE solution
ode_solution = ode.DifferentialEquation(
func=lotka_volterra,
times=np.linspace(0, 10, 100),
n_states=2,
n_theta=4,
t0=0
)Generate visual representations of model structure and relationships.
# Generate model graphs
graphviz_graph = pm.model_to_graphviz(model)
mermaid_diagram = pm.model_to_mermaid(model)
networkx_graph = pm.model_to_networkx(model)
# Visualize model structure
graphviz_graph.render('model_structure', format='png')Handle PyMC-specific errors and warnings during model building and sampling.
# Common PyMC exceptions
try:
trace = pm.sample(model=model)
except pm.SamplingError as e:
print(f"Sampling failed: {e}")
except pm.ShapeError as e:
print(f"Shape mismatch: {e}")
except pm.IncorrectArgumentsError as e:
print(f"Invalid arguments: {e}")PyMC supports the complete Bayesian modeling workflow:
PyMC integrates seamlessly with ArviZ for visualization and diagnostics, providing a complete toolkit for Bayesian analysis in Python.