CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pymc

Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with PyTensor

Pending
Overview
Eval results
Files

distributions.mddocs/

PyMC Probability Distributions

PyMC provides a comprehensive collection of 80+ probability distributions for modeling various types of data and uncertainty. All distributions support automatic differentiation, efficient sampling, and seamless integration with PyMC's inference engines.

Core Distribution Interface

All PyMC distributions inherit from a common base class and share consistent APIs:

from pymc import Distribution, Continuous, Discrete

class Distribution:
    """
    Base class for all PyMC probability distributions.
    
    Parameters:
    - name (str): Name of the random variable
    - observed (array-like, optional): Observed data values
    - dims (tuple, optional): Dimension names for the variable
    - **kwargs: Distribution-specific parameters
    """
    
    def __init__(self, name, observed=None, dims=None, **kwargs):
        pass
    
    def random(self, point=None, size=None):
        """Generate random samples from the distribution."""
        pass
    
    def logp(self, value):
        """Calculate log-probability density/mass."""
        pass

Continuous Distributions

Normal Distribution

The Gaussian/normal distribution is fundamental for continuous modeling:

from pymc import Normal

# Basic normal distribution
x = Normal('x', mu=0, sigma=1)

# With array parameters
means = Normal('means', mu=[0, 1, 2], sigma=[1, 0.5, 2])

# Multivariate case - use MvNormal instead

Beta Distribution

Continuous distribution on the interval [0, 1], commonly used for probabilities:

from pymc import Beta

# Beta distribution for probability parameter
p = Beta('p', alpha=2, beta=5)

# Parameterized by mean and concentration
prob = Beta('prob', mu=0.3, sigma=0.1)  # Alternative parameterization

Gamma Distribution

Continuous distribution for positive values, often used for rate parameters:

from pymc import Gamma

# Rate parameter
rate = Gamma('rate', alpha=2, beta=1)

# Scale parameterization
scale_param = Gamma('scale_param', alpha=2, beta=1)

# Inverse-Gamma for variance parameters  
variance = pm.InverseGamma('variance', alpha=1, beta=1)

Student's t-Distribution

Heavy-tailed alternative to normal distribution:

from pymc import StudentT

# Standard Student's t
t_var = StudentT('t_var', nu=3, mu=0, sigma=1)

# Half Student's t for positive values
positive_t = pm.HalfStudentT('positive_t', nu=3, sigma=1)

Exponential and Related Distributions

from pymc import Exponential, Laplace, LogNormal

# Exponential distribution
rate_param = Exponential('rate_param', lam=1.0)

# Laplace (double exponential) distribution
location = Laplace('location', mu=0, b=1)

# Log-normal distribution
log_scale = LogNormal('log_scale', mu=0, sigma=1)

Uniform Distributions

from pymc import Uniform, HalfFlat, Flat

# Uniform distribution on interval
uniform_var = Uniform('uniform_var', lower=0, upper=10)

# Flat priors (improper uniform distributions)
flat_prior = Flat('flat_prior')
half_flat_prior = HalfFlat('half_flat_prior')  # For positive values

Specialized Continuous Distributions

# Cauchy distribution (heavy tails)
cauchy_var = pm.Cauchy('cauchy_var', alpha=0, beta=1)

# Chi-squared distribution
chi2_var = pm.ChiSquared('chi2_var', nu=3)

# Pareto distribution (power law)
pareto_var = pm.Pareto('pareto_var', alpha=1, m=1)

# Weibull distribution
weibull_var = pm.Weibull('weibull_var', alpha=1, beta=2)

# Von Mises (circular) distribution
circular_var = pm.VonMises('circular_var', mu=0, kappa=1)

# Skew-normal distribution
skewed_var = pm.SkewNormal('skewed_var', mu=0, sigma=1, alpha=3)

# Asymmetric Laplace distribution
asymmetric_laplace = pm.AsymmetricLaplace('asymmetric_laplace', b=1, kappa=0.5, mu=0)

# Kumaraswamy distribution (alternative to Beta)
kumaraswamy = pm.Kumaraswamy('kumaraswamy', a=2, b=3)

# Rice distribution (Rician distribution)
rice_var = pm.Rice('rice_var', nu=1, sigma=1)

# Ex-Gaussian distribution (exponentially modified Gaussian)
ex_gaussian = pm.ExGaussian('ex_gaussian', mu=0, sigma=1, nu=0.5)

# Moyal distribution
moyal_var = pm.Moyal('moyal_var', mu=0, sigma=1)

# Logit-normal distribution
logit_normal = pm.LogitNormal('logit_normal', mu=0, sigma=1)

# Dirac delta distribution (point mass)
dirac_delta = pm.DiracDelta('dirac_delta', c=0)

Discrete Distributions

Bernoulli and Binomial

Basic discrete distributions for binary and count data:

from pymc import Bernoulli, Binomial, BetaBinomial

# Bernoulli for binary outcomes
success = Bernoulli('success', p=0.7)

# Binomial for count of successes
num_successes = Binomial('num_successes', n=10, p=0.3)

# Beta-binomial for overdispersed counts
overdispersed = BetaBinomial('overdispersed', alpha=2, beta=5, n=20)

Poisson and Related Distributions

from pymc import Poisson, NegativeBinomial, ZeroInflatedPoisson

# Poisson distribution for count data
counts = Poisson('counts', mu=3.5)

# Negative binomial for overdispersed counts
overdispersed_counts = NegativeBinomial('overdispersed_counts', mu=3, alpha=2)

# Zero-inflated Poisson
zero_inflated = ZeroInflatedPoisson('zero_inflated', psi=0.2, mu=3)

Categorical Distributions

from pymc import Categorical, DiscreteUniform

# Categorical distribution
category = Categorical('category', p=[0.2, 0.3, 0.5])

# Discrete uniform distribution
discrete_uniform = DiscreteUniform('discrete_uniform', lower=1, upper=6)

Other Discrete Distributions

# Geometric distribution
waiting_time = pm.Geometric('waiting_time', p=0.1)

# Hypergeometric distribution  
hypergeom = pm.HyperGeometric('hypergeom', N=20, K=7, n=12)

# Constant distribution
constant_val = pm.ConstantDist('constant_val', c=5)

# Discrete Weibull distribution
discrete_weibull = pm.DiscreteWeibull('discrete_weibull', q=0.8, beta=1.5)

# Ordered distributions for ordinal data
ordered_logistic = pm.OrderedLogistic('ordered_logistic', eta=0, cutpoints=[1, 2, 3])
ordered_probit = pm.OrderedProbit('ordered_probit', eta=0, cutpoints=[1, 2, 3])

Multivariate Distributions

Multivariate Normal

The cornerstone of multivariate continuous modeling:

from pymc import MvNormal
import numpy as np

# With covariance matrix
mu = np.zeros(3)
cov = np.eye(3)
mv_normal = MvNormal('mv_normal', mu=mu, cov=cov)

# With precision matrix  
prec = np.eye(3)
mv_normal_prec = MvNormal('mv_normal_prec', mu=mu, tau=prec)

# With Cholesky decomposition
chol = np.linalg.cholesky(cov)
mv_normal_chol = MvNormal('mv_normal_chol', mu=mu, chol=chol)

Dirichlet Distribution

For probability vectors that sum to 1:

from pymc import Dirichlet

# Dirichlet distribution for probability simplex
a = np.ones(4) * 2  # Concentration parameters
prob_vector = Dirichlet('prob_vector', a=a)

# Stick-breaking construction
stick_weights = pm.StickBreakingWeights('stick_weights', alpha=1, K=5)

Matrix and Structured Distributions

# Wishart distribution for covariance matrices
wishart_cov = pm.Wishart('wishart_cov', nu=5, V=np.eye(3))

# LKJ correlation matrix prior
corr_matrix = pm.LKJCorr('corr_matrix', n=3, eta=2)

# LKJ Cholesky covariance
lkj_cov = pm.LKJCholeskyCov('lkj_cov', n=3, eta=2, sd_dist=pm.HalfNormal.dist(1))

# Matrix normal distribution
matrix_normal = pm.MatrixNormal('matrix_normal', mu=np.zeros((3, 4)), 
                               rowcov=np.eye(3), colcov=np.eye(4))

# Zero-sum normal (constraint: sum to zero)
zero_sum_normal = pm.ZeroSumNormal('zero_sum_normal', sigma=1, shape=4)

# Polyamacher-Gamma distribution for sparse modeling
polya_gamma = pm.PolyaGamma('polya_gamma', h=1, z=0)

Multinomial Distributions

from pymc import Multinomial, DirichletMultinomial

# Multinomial distribution
n_trials = 100
p_categories = [0.2, 0.3, 0.5]
multinomial_counts = Multinomial('multinomial_counts', n=n_trials, p=p_categories)

# Dirichlet-multinomial (overdispersed)
dirichlet_mult = DirichletMultinomial('dirichlet_mult', n=100, a=[2, 3, 5])

Time Series Distributions

Random Walk Processes

from pymc import RandomWalk, GaussianRandomWalk, MvGaussianRandomWalk

# Basic random walk
random_walk = RandomWalk('random_walk', mu=0.1, sigma=1, steps=100)

# Gaussian random walk (more efficient)
gaussian_rw = GaussianRandomWalk('gaussian_rw', mu=0, sigma=1, steps=100)

# Multivariate Gaussian random walk
mv_rw = MvGaussianRandomWalk('mv_rw', mu=np.zeros(2), chol=np.eye(2), steps=100)

Autoregressive Processes

from pymc import AR

# Autoregressive process
ar_coeffs = [0.8, -0.2]  # AR(2) coefficients
ar_process = AR('ar_process', rho=ar_coeffs, sigma=1, steps=100)

# With constant term
ar_with_constant = AR('ar_with_constant', rho=ar_coeffs, sigma=1, 
                      constant=True, steps=100)

Volatility Models

from pymc import GARCH11

# GARCH(1,1) process for volatility modeling
garch_process = GARCH11('garch_process', omega=0.01, alpha_1=0.1, 
                        beta_1=0.8, initial_vol=0.1, steps=100)

Mixture Distributions

General Mixture Models

from pymc import Mixture, NormalMixture

# General mixture distribution
components = [pm.Normal.dist(mu=0, sigma=1), pm.Normal.dist(mu=5, sigma=2)]
weights = [0.3, 0.7]
mixture = Mixture('mixture', w=weights, comp_dists=components)

# Normal mixture (optimized implementation)
normal_mixture = NormalMixture('normal_mixture', w=[0.4, 0.6], 
                               mu=[0, 3], sigma=[1, 1.5])

Zero-Inflated Models

# Zero-inflated Poisson
zip_model = pm.ZeroInflatedPoisson('zip_model', psi=0.3, mu=2.5)

# Zero-inflated binomial
zib_model = pm.ZeroInflatedBinomial('zib_model', psi=0.2, n=10, p=0.4)

# Zero-inflated negative binomial  
zinb_model = pm.ZeroInflatedNegativeBinomial('zinb_model', psi=0.1, mu=3, alpha=2)

Hurdle Models

# Hurdle Poisson (two-part model)
hurdle_poisson = pm.HurdlePoisson('hurdle_poisson', psi=0.3, mu=2.5)

# Hurdle negative binomial
hurdle_nb = pm.HurdleNegativeBinomial('hurdle_nb', psi=0.2, mu=3, alpha=2)

# Hurdle log-normal
hurdle_lognorm = pm.HurdleLogNormal('hurdle_lognorm', psi=0.4, mu=1, sigma=0.5)

Distribution Transformations

Truncated Distributions

from pymc import Truncated

# Truncated normal distribution
truncated_normal = Truncated('truncated_normal', 
                            pm.Normal.dist(mu=0, sigma=1),
                            lower=-2, upper=2)

# Truncated exponential
truncated_exp = Truncated('truncated_exp',
                         pm.Exponential.dist(lam=1),
                         lower=0.1, upper=5)

Censored Distributions

from pymc import Censored

# Left-censored normal distribution
censored_normal = Censored('censored_normal',
                          pm.Normal.dist(mu=0, sigma=1),
                          lower=None, upper=2)

# Interval-censored data
interval_censored = Censored('interval_censored',
                            pm.LogNormal.dist(mu=0, sigma=1),
                            lower=0.5, upper=3)

Custom Distributions

CustomDist for New Distributions

from pymc import CustomDist
import pytensor.tensor as pt

def custom_logp(value, mu, sigma):
    """Custom log-probability function."""
    return pm.logp(pm.Normal.dist(mu=mu, sigma=sigma), value)

def custom_random(mu, sigma, rng=None, size=None):
    """Custom random sampling function."""
    return rng.normal(mu, sigma, size=size)

# Create custom distribution
custom_dist = CustomDist('custom_dist',
                        mu=0, sigma=1,
                        logp=custom_logp,
                        random=custom_random)

DensityDist for Likelihood-Only Models

from pymc import DensityDist

def custom_likelihood(value, theta):
    """Custom likelihood function."""
    return pt.sum(value * pt.log(theta) - theta)

# Density-only distribution
density_dist = DensityDist('density_dist', 
                          theta=1.5,
                          logp=custom_likelihood,
                          observed=observed_data)

Distribution Utilities

Drawing Samples

# Draw samples from distributions
samples = pm.draw(Normal.dist(mu=0, sigma=1), draws=1000)

# Draw from multiple distributions
multi_samples = pm.draw([Normal.dist(0, 1), Beta.dist(2, 3)], draws=500)

Working with Distribution Objects

# Create distribution objects (not random variables)
normal_dist = pm.Normal.dist(mu=0, sigma=1)
beta_dist = pm.Beta.dist(alpha=2, beta=5)

# Use in transformations
transformed = pm.Deterministic('transformed', pm.logit(beta_dist))

Usage Patterns

Hierarchical Models

# Hierarchical normal model
with pm.Model() as hierarchical:
    # Hyperparameters
    mu_mu = pm.Normal('mu_mu', 0, 10)
    sigma_mu = pm.HalfNormal('sigma_mu', 5)
    
    # Group means
    mu = pm.Normal('mu', mu=mu_mu, sigma=sigma_mu, shape=n_groups)
    
    # Observations
    sigma = pm.HalfNormal('sigma', 2)
    y = pm.Normal('y', mu=mu[group_idx], sigma=sigma, observed=data)

Mixture of Experts

# Mixture of regression models
with pm.Model() as mixture_regression:
    # Mixture weights
    w = pm.Dirichlet('w', a=np.ones(n_components))
    
    # Component-specific parameters
    beta = pm.Normal('beta', 0, 1, shape=(n_components, n_features))
    sigma = pm.HalfNormal('sigma', 1, shape=n_components)
    
    # Component means
    mu = pm.math.dot(X, beta.T)  # Shape: (n_obs, n_components)
    
    # Mixture likelihood
    components = [pm.Normal.dist(mu=mu[:, i], sigma=sigma[i]) 
                  for i in range(n_components)]
    y = pm.Mixture('y', w=w, comp_dists=components, observed=data)

PyMC's distribution system provides a flexible foundation for building complex probabilistic models. The consistent API across all distributions enables easy composition and transformation of uncertainty representations.

Install with Tessl CLI

npx tessl i tessl/pypi-pymc

docs

data.md

distributions.md

gp.md

index.md

math.md

model.md

ode.md

sampling.md

stats.md

variational.md

tile.json