Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with Theano
npx @tessl/cli install tessl/pypi-pymc3@3.11.0A probabilistic programming library for Python that allows users to build Bayesian models with a simple Python API and fit them using Markov Chain Monte Carlo (MCMC) methods. PyMC3 provides a comprehensive suite of tools for Bayesian statistical modeling, including a large collection of probability distributions, advanced MCMC sampling algorithms, variational inference methods, and model checking utilities.
pip install pymc3import pymc3 as pmCommon workflow imports:
import pymc3 as pm
import numpy as np
import matplotlib.pyplot as plt
import arviz as az # For plotting and diagnosticsimport pymc3 as pm
import numpy as np
# Generate synthetic data
np.random.seed(42)
n = 100
true_alpha = 1.0
true_beta = 2.5
true_sigma = 0.5
x = np.random.randn(n)
y = true_alpha + true_beta * x + np.random.normal(0, true_sigma, n)
# Build Bayesian linear regression model
with pm.Model() as model:
# Priors for unknown model parameters
alpha = pm.Normal('alpha', mu=0, sigma=10)
beta = pm.Normal('beta', mu=0, sigma=10)
sigma = pm.HalfNormal('sigma', sigma=1)
# Expected value of outcome
mu = alpha + beta * x
# Likelihood (sampling distribution) of observations
y_obs = pm.Normal('y_obs', mu=mu, sigma=sigma, observed=y)
# Inference - draw posterior samples
trace = pm.sample(1000, tune=1000, return_inferencedata=True)
# Analyze results
print(pm.summary(trace))
# Plot results
import arviz as az
az.plot_trace(trace)
plt.show()PyMC3 follows a hierarchical architecture that separates model specification from inference:
Model class manages all random variables, transformations, and computational graphThe library leverages Theano for automatic differentiation, enabling gradient-based inference methods and efficient computation on CPU/GPU.
Comprehensive library of 60+ probability distributions including continuous, discrete, multivariate, time series, and mixture distributions. All distributions support automatic broadcasting, shape inference, and parameter transformations.
# Continuous distributions
class Normal: ...
class Beta: ...
class Gamma: ...
class StudentT: ...
# Discrete distributions
class Binomial: ...
class Poisson: ...
class Categorical: ...
# Multivariate distributions
class MvNormal: ...
class Dirichlet: ...
class LKJCorr: ...Essential components for building Bayesian models including the Model context manager, random variables, deterministic variables, and potential functions for custom likelihood terms.
class Model:
"""Main model container class."""
def Deterministic(name, var, model=None): ...
def Potential(name, var, model=None): ...
def Data(name, value, **kwargs): ...
class ObservedRV: ...
class FreeRV: ...Advanced Markov Chain Monte Carlo algorithms including the No-U-Turn Sampler (NUTS), Hamiltonian Monte Carlo, and various Metropolis methods with automatic step method assignment and tuning.
def sample(draws=1000, step=None, init='auto', chains=None, **kwargs):
"""Draw samples from the posterior distribution using MCMC."""
def sample_posterior_predictive(trace, samples=None, model=None, **kwargs):
"""Generate posterior predictive samples."""
class NUTS: ...
class HamiltonianMC: ...
class Metropolis: ...Scalable approximate inference methods including Automatic Differentiation Variational Inference (ADVI), Stein Variational Gradient Descent, and normalizing flows with various approximation families.
def fit(n=10000, method='advi', model=None, **kwargs):
"""Fit a model using variational inference."""
class ADVI: ...
class FullRankADVI: ...
class SVGD: ...
class MeanField: ...
class FullRank: ...Flexible Gaussian process implementation with various covariance functions, mean functions, and inference methods including marginal, latent, and sparse GP formulations.
class Marginal: ...
class Latent: ...
class MarginalSparse: ...
class MarginalKron: ...
# Available in pm.gp.cov and pm.gp.mean modules
class ExpQuad: ...
class Matern52: ...
class Linear: ...High-level interface for generalized linear models with family-specific distributions and link functions, providing streamlined model specification for common regression tasks.
class GLM:
"""Generalized Linear Model implementation."""
class LinearComponent:
"""Linear component for GLM construction."""
# Available in pm.glm.families module
class Normal: ...
class Binomial: ...
class Poisson: ...Comprehensive model diagnostics, convergence assessment, and visualization capabilities through tight integration with ArviZ, including posterior analysis, model comparison, and publication-ready plots.
# Convergence diagnostics
def r_hat(trace): ...
def ess(trace): ...
def mcse(trace): ...
# Model comparison
def compare(models, ic='waic'): ...
def waic(trace, model=None): ...
def loo(trace, model=None): ...
# Plotting functions
def plot_trace(trace): ...
def plot_posterior(trace): ...
def plot_forest(trace): ...Comprehensive suite of MCMC step methods including Hamiltonian Monte Carlo, Metropolis variants, and specialized samplers with automatic step method selection and adaptive tuning.
class NUTS: ...
class HamiltonianMC: ...
class Metropolis: ...
class Slice: ...
class EllipticalSlice: ...
class CompoundStep: ...Specialized data structures for observed data, minibatch processing, and generator-based data sources, enabling efficient memory usage and streaming data processing.
class Data: ...
class Minibatch: ...
class GeneratorAdapter: ...
def align_minibatches(*minibatches): ...
def get_data(filename): ...Comprehensive mathematical functions for tensor operations, link functions, and specialized computations with automatic differentiation support.
def logit(p): ...
def invlogit(x): ...
def probit(p): ...
def invprobit(x): ...
def logsumexp(x): ...
def logaddexp(a, b): ...
def expand_packed_triangular(n, packed): ...
def kronecker(*Ks): ...