Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with PyTensor
—
PyMC provides a comprehensive collection of 80+ probability distributions for modeling various types of data and uncertainty. All distributions support automatic differentiation, efficient sampling, and seamless integration with PyMC's inference engines.
All PyMC distributions inherit from a common base class and share consistent APIs:
from pymc import Distribution, Continuous, Discrete
class Distribution:
"""
Base class for all PyMC probability distributions.
Parameters:
- name (str): Name of the random variable
- observed (array-like, optional): Observed data values
- dims (tuple, optional): Dimension names for the variable
- **kwargs: Distribution-specific parameters
"""
def __init__(self, name, observed=None, dims=None, **kwargs):
pass
def random(self, point=None, size=None):
"""Generate random samples from the distribution."""
pass
def logp(self, value):
"""Calculate log-probability density/mass."""
passThe Gaussian/normal distribution is fundamental for continuous modeling:
from pymc import Normal
# Basic normal distribution
x = Normal('x', mu=0, sigma=1)
# With array parameters
means = Normal('means', mu=[0, 1, 2], sigma=[1, 0.5, 2])
# Multivariate case - use MvNormal insteadContinuous distribution on the interval [0, 1], commonly used for probabilities:
from pymc import Beta
# Beta distribution for probability parameter
p = Beta('p', alpha=2, beta=5)
# Parameterized by mean and concentration
prob = Beta('prob', mu=0.3, sigma=0.1) # Alternative parameterizationContinuous distribution for positive values, often used for rate parameters:
from pymc import Gamma
# Rate parameter
rate = Gamma('rate', alpha=2, beta=1)
# Scale parameterization
scale_param = Gamma('scale_param', alpha=2, beta=1)
# Inverse-Gamma for variance parameters
variance = pm.InverseGamma('variance', alpha=1, beta=1)Heavy-tailed alternative to normal distribution:
from pymc import StudentT
# Standard Student's t
t_var = StudentT('t_var', nu=3, mu=0, sigma=1)
# Half Student's t for positive values
positive_t = pm.HalfStudentT('positive_t', nu=3, sigma=1)from pymc import Exponential, Laplace, LogNormal
# Exponential distribution
rate_param = Exponential('rate_param', lam=1.0)
# Laplace (double exponential) distribution
location = Laplace('location', mu=0, b=1)
# Log-normal distribution
log_scale = LogNormal('log_scale', mu=0, sigma=1)from pymc import Uniform, HalfFlat, Flat
# Uniform distribution on interval
uniform_var = Uniform('uniform_var', lower=0, upper=10)
# Flat priors (improper uniform distributions)
flat_prior = Flat('flat_prior')
half_flat_prior = HalfFlat('half_flat_prior') # For positive values# Cauchy distribution (heavy tails)
cauchy_var = pm.Cauchy('cauchy_var', alpha=0, beta=1)
# Chi-squared distribution
chi2_var = pm.ChiSquared('chi2_var', nu=3)
# Pareto distribution (power law)
pareto_var = pm.Pareto('pareto_var', alpha=1, m=1)
# Weibull distribution
weibull_var = pm.Weibull('weibull_var', alpha=1, beta=2)
# Von Mises (circular) distribution
circular_var = pm.VonMises('circular_var', mu=0, kappa=1)
# Skew-normal distribution
skewed_var = pm.SkewNormal('skewed_var', mu=0, sigma=1, alpha=3)
# Asymmetric Laplace distribution
asymmetric_laplace = pm.AsymmetricLaplace('asymmetric_laplace', b=1, kappa=0.5, mu=0)
# Kumaraswamy distribution (alternative to Beta)
kumaraswamy = pm.Kumaraswamy('kumaraswamy', a=2, b=3)
# Rice distribution (Rician distribution)
rice_var = pm.Rice('rice_var', nu=1, sigma=1)
# Ex-Gaussian distribution (exponentially modified Gaussian)
ex_gaussian = pm.ExGaussian('ex_gaussian', mu=0, sigma=1, nu=0.5)
# Moyal distribution
moyal_var = pm.Moyal('moyal_var', mu=0, sigma=1)
# Logit-normal distribution
logit_normal = pm.LogitNormal('logit_normal', mu=0, sigma=1)
# Dirac delta distribution (point mass)
dirac_delta = pm.DiracDelta('dirac_delta', c=0)Basic discrete distributions for binary and count data:
from pymc import Bernoulli, Binomial, BetaBinomial
# Bernoulli for binary outcomes
success = Bernoulli('success', p=0.7)
# Binomial for count of successes
num_successes = Binomial('num_successes', n=10, p=0.3)
# Beta-binomial for overdispersed counts
overdispersed = BetaBinomial('overdispersed', alpha=2, beta=5, n=20)from pymc import Poisson, NegativeBinomial, ZeroInflatedPoisson
# Poisson distribution for count data
counts = Poisson('counts', mu=3.5)
# Negative binomial for overdispersed counts
overdispersed_counts = NegativeBinomial('overdispersed_counts', mu=3, alpha=2)
# Zero-inflated Poisson
zero_inflated = ZeroInflatedPoisson('zero_inflated', psi=0.2, mu=3)from pymc import Categorical, DiscreteUniform
# Categorical distribution
category = Categorical('category', p=[0.2, 0.3, 0.5])
# Discrete uniform distribution
discrete_uniform = DiscreteUniform('discrete_uniform', lower=1, upper=6)# Geometric distribution
waiting_time = pm.Geometric('waiting_time', p=0.1)
# Hypergeometric distribution
hypergeom = pm.HyperGeometric('hypergeom', N=20, K=7, n=12)
# Constant distribution
constant_val = pm.ConstantDist('constant_val', c=5)
# Discrete Weibull distribution
discrete_weibull = pm.DiscreteWeibull('discrete_weibull', q=0.8, beta=1.5)
# Ordered distributions for ordinal data
ordered_logistic = pm.OrderedLogistic('ordered_logistic', eta=0, cutpoints=[1, 2, 3])
ordered_probit = pm.OrderedProbit('ordered_probit', eta=0, cutpoints=[1, 2, 3])The cornerstone of multivariate continuous modeling:
from pymc import MvNormal
import numpy as np
# With covariance matrix
mu = np.zeros(3)
cov = np.eye(3)
mv_normal = MvNormal('mv_normal', mu=mu, cov=cov)
# With precision matrix
prec = np.eye(3)
mv_normal_prec = MvNormal('mv_normal_prec', mu=mu, tau=prec)
# With Cholesky decomposition
chol = np.linalg.cholesky(cov)
mv_normal_chol = MvNormal('mv_normal_chol', mu=mu, chol=chol)For probability vectors that sum to 1:
from pymc import Dirichlet
# Dirichlet distribution for probability simplex
a = np.ones(4) * 2 # Concentration parameters
prob_vector = Dirichlet('prob_vector', a=a)
# Stick-breaking construction
stick_weights = pm.StickBreakingWeights('stick_weights', alpha=1, K=5)# Wishart distribution for covariance matrices
wishart_cov = pm.Wishart('wishart_cov', nu=5, V=np.eye(3))
# LKJ correlation matrix prior
corr_matrix = pm.LKJCorr('corr_matrix', n=3, eta=2)
# LKJ Cholesky covariance
lkj_cov = pm.LKJCholeskyCov('lkj_cov', n=3, eta=2, sd_dist=pm.HalfNormal.dist(1))
# Matrix normal distribution
matrix_normal = pm.MatrixNormal('matrix_normal', mu=np.zeros((3, 4)),
rowcov=np.eye(3), colcov=np.eye(4))
# Zero-sum normal (constraint: sum to zero)
zero_sum_normal = pm.ZeroSumNormal('zero_sum_normal', sigma=1, shape=4)
# Polyamacher-Gamma distribution for sparse modeling
polya_gamma = pm.PolyaGamma('polya_gamma', h=1, z=0)from pymc import Multinomial, DirichletMultinomial
# Multinomial distribution
n_trials = 100
p_categories = [0.2, 0.3, 0.5]
multinomial_counts = Multinomial('multinomial_counts', n=n_trials, p=p_categories)
# Dirichlet-multinomial (overdispersed)
dirichlet_mult = DirichletMultinomial('dirichlet_mult', n=100, a=[2, 3, 5])from pymc import RandomWalk, GaussianRandomWalk, MvGaussianRandomWalk
# Basic random walk
random_walk = RandomWalk('random_walk', mu=0.1, sigma=1, steps=100)
# Gaussian random walk (more efficient)
gaussian_rw = GaussianRandomWalk('gaussian_rw', mu=0, sigma=1, steps=100)
# Multivariate Gaussian random walk
mv_rw = MvGaussianRandomWalk('mv_rw', mu=np.zeros(2), chol=np.eye(2), steps=100)from pymc import AR
# Autoregressive process
ar_coeffs = [0.8, -0.2] # AR(2) coefficients
ar_process = AR('ar_process', rho=ar_coeffs, sigma=1, steps=100)
# With constant term
ar_with_constant = AR('ar_with_constant', rho=ar_coeffs, sigma=1,
constant=True, steps=100)from pymc import GARCH11
# GARCH(1,1) process for volatility modeling
garch_process = GARCH11('garch_process', omega=0.01, alpha_1=0.1,
beta_1=0.8, initial_vol=0.1, steps=100)from pymc import Mixture, NormalMixture
# General mixture distribution
components = [pm.Normal.dist(mu=0, sigma=1), pm.Normal.dist(mu=5, sigma=2)]
weights = [0.3, 0.7]
mixture = Mixture('mixture', w=weights, comp_dists=components)
# Normal mixture (optimized implementation)
normal_mixture = NormalMixture('normal_mixture', w=[0.4, 0.6],
mu=[0, 3], sigma=[1, 1.5])# Zero-inflated Poisson
zip_model = pm.ZeroInflatedPoisson('zip_model', psi=0.3, mu=2.5)
# Zero-inflated binomial
zib_model = pm.ZeroInflatedBinomial('zib_model', psi=0.2, n=10, p=0.4)
# Zero-inflated negative binomial
zinb_model = pm.ZeroInflatedNegativeBinomial('zinb_model', psi=0.1, mu=3, alpha=2)# Hurdle Poisson (two-part model)
hurdle_poisson = pm.HurdlePoisson('hurdle_poisson', psi=0.3, mu=2.5)
# Hurdle negative binomial
hurdle_nb = pm.HurdleNegativeBinomial('hurdle_nb', psi=0.2, mu=3, alpha=2)
# Hurdle log-normal
hurdle_lognorm = pm.HurdleLogNormal('hurdle_lognorm', psi=0.4, mu=1, sigma=0.5)from pymc import Truncated
# Truncated normal distribution
truncated_normal = Truncated('truncated_normal',
pm.Normal.dist(mu=0, sigma=1),
lower=-2, upper=2)
# Truncated exponential
truncated_exp = Truncated('truncated_exp',
pm.Exponential.dist(lam=1),
lower=0.1, upper=5)from pymc import Censored
# Left-censored normal distribution
censored_normal = Censored('censored_normal',
pm.Normal.dist(mu=0, sigma=1),
lower=None, upper=2)
# Interval-censored data
interval_censored = Censored('interval_censored',
pm.LogNormal.dist(mu=0, sigma=1),
lower=0.5, upper=3)from pymc import CustomDist
import pytensor.tensor as pt
def custom_logp(value, mu, sigma):
"""Custom log-probability function."""
return pm.logp(pm.Normal.dist(mu=mu, sigma=sigma), value)
def custom_random(mu, sigma, rng=None, size=None):
"""Custom random sampling function."""
return rng.normal(mu, sigma, size=size)
# Create custom distribution
custom_dist = CustomDist('custom_dist',
mu=0, sigma=1,
logp=custom_logp,
random=custom_random)from pymc import DensityDist
def custom_likelihood(value, theta):
"""Custom likelihood function."""
return pt.sum(value * pt.log(theta) - theta)
# Density-only distribution
density_dist = DensityDist('density_dist',
theta=1.5,
logp=custom_likelihood,
observed=observed_data)# Draw samples from distributions
samples = pm.draw(Normal.dist(mu=0, sigma=1), draws=1000)
# Draw from multiple distributions
multi_samples = pm.draw([Normal.dist(0, 1), Beta.dist(2, 3)], draws=500)# Create distribution objects (not random variables)
normal_dist = pm.Normal.dist(mu=0, sigma=1)
beta_dist = pm.Beta.dist(alpha=2, beta=5)
# Use in transformations
transformed = pm.Deterministic('transformed', pm.logit(beta_dist))# Hierarchical normal model
with pm.Model() as hierarchical:
# Hyperparameters
mu_mu = pm.Normal('mu_mu', 0, 10)
sigma_mu = pm.HalfNormal('sigma_mu', 5)
# Group means
mu = pm.Normal('mu', mu=mu_mu, sigma=sigma_mu, shape=n_groups)
# Observations
sigma = pm.HalfNormal('sigma', 2)
y = pm.Normal('y', mu=mu[group_idx], sigma=sigma, observed=data)# Mixture of regression models
with pm.Model() as mixture_regression:
# Mixture weights
w = pm.Dirichlet('w', a=np.ones(n_components))
# Component-specific parameters
beta = pm.Normal('beta', 0, 1, shape=(n_components, n_features))
sigma = pm.HalfNormal('sigma', 1, shape=n_components)
# Component means
mu = pm.math.dot(X, beta.T) # Shape: (n_obs, n_components)
# Mixture likelihood
components = [pm.Normal.dist(mu=mu[:, i], sigma=sigma[i])
for i in range(n_components)]
y = pm.Mixture('y', w=w, comp_dists=components, observed=data)PyMC's distribution system provides a flexible foundation for building complex probabilistic models. The consistent API across all distributions enables easy composition and transformation of uncertainty representations.
Install with Tessl CLI
npx tessl i tessl/pypi-pymc