Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with PyTensor
—
PyMC provides essential mathematical functions for probabilistic modeling, including link functions, numerical utilities, and specialized operations for working with probability distributions and statistical models.
Link functions transform parameters between different scales and domains, crucial for generalized linear models and constraint handling.
import pymc as pm
def logit(x):
"""
Logit function (log-odds transformation).
Parameters:
- x (array-like): Input values in [0, 1]
Returns:
- logit_x: Log-odds values in (-∞, ∞)
Formula: logit(x) = log(x / (1 - x))
"""
def invlogit(x):
"""
Inverse logit function (logistic/sigmoid transformation).
Parameters:
- x (array-like): Input values in (-∞, ∞)
Returns:
- prob_x: Probability values in [0, 1]
Formula: invlogit(x) = 1 / (1 + exp(-x))
"""
# Transform probabilities to log-odds scale
probability = 0.7
log_odds = pm.logit(probability)
print(f"P = {probability} -> logit(P) = {log_odds:.3f}")
# Transform log-odds back to probability scale
back_to_prob = pm.invlogit(log_odds)
print(f"logit^(-1)({log_odds:.3f}) = {back_to_prob:.3f}")
# Vectorized operations
probabilities = np.array([0.1, 0.3, 0.5, 0.7, 0.9])
log_odds_vec = pm.logit(probabilities)
recovered_probs = pm.invlogit(log_odds_vec)
# Use in models for bounded parameters
with pm.Model() as logistic_model:
# Unconstrained parameter
alpha_raw = pm.Normal('alpha_raw', 0, 2)
# Transform to probability scale
success_prob = pm.Deterministic('success_prob', pm.invlogit(alpha_raw))
# Binomial likelihood
y = pm.Binomial('y', n=10, p=success_prob, observed=successes)def probit(x):
"""
Probit function (inverse normal CDF transformation).
Parameters:
- x (array-like): Input values in [0, 1]
Returns:
- probit_x: Standard normal quantiles in (-∞, ∞)
"""
def invprobit(x):
"""
Inverse probit function (normal CDF transformation).
Parameters:
- x (array-like): Input values in (-∞, ∞)
Returns:
- prob_x: Probability values in [0, 1]
"""
# Probit transformation (alternative to logit)
prob_value = 0.8
probit_value = pm.probit(prob_value)
recovered = pm.invprobit(probit_value)
print(f"P = {prob_value} -> probit(P) = {probit_value:.3f}")
print(f"probit^(-1)({probit_value:.3f}) = {recovered:.3f}")
# Probit regression model
with pm.Model() as probit_regression:
# Linear predictor
beta = pm.Normal('beta', 0, 1, shape=X.shape[1])
linear_pred = pm.math.dot(X, beta)
# Probit link
success_prob = pm.Deterministic('success_prob', pm.invprobit(linear_pred))
# Bernoulli likelihood
y = pm.Bernoulli('y', p=success_prob, observed=binary_outcomes)def logsumexp(x, axis=None, keepdims=False):
"""
Numerically stable computation of log(sum(exp(x))).
Parameters:
- x (array-like): Input array
- axis (int, optional): Axis along which to compute
- keepdims (bool): Keep reduced dimensions
Returns:
- result: log(sum(exp(x))) computed stably
"""
def logaddexp(x1, x2):
"""
Numerically stable computation of log(exp(x1) + exp(x2)).
Parameters:
- x1, x2 (array-like): Input values
Returns:
- result: log(exp(x1) + exp(x2)) computed stably
"""
# Stable computation of mixture probabilities
log_weights = np.array([-2.3, -1.8, -3.1]) # Log mixture weights
log_normalizing_constant = pm.logsumexp(log_weights)
normalized_log_weights = log_weights - log_normalizing_constant
print(f"Log weights: {log_weights}")
print(f"Log Z: {log_normalizing_constant:.3f}")
print(f"Normalized log weights: {normalized_log_weights}")
# Stable addition in log space
log_a = -10.5
log_b = -10.2
log_sum = pm.logaddexp(log_a, log_b)
print(f"log(exp({log_a}) + exp({log_b})) = {log_sum:.6f}")
# Compare with naive computation (may underflow)
naive_sum = np.log(np.exp(log_a) + np.exp(log_b)) # May be -inf
print(f"Naive computation: {naive_sum}")def expand_packed_triangular(n, packed, lower=True, diagonal_only=False):
"""
Expand packed triangular matrix to full matrix.
Parameters:
- n (int): Dimension of output matrix
- packed (array): Packed triangular values
- lower (bool): Expand as lower triangular (default: True)
- diagonal_only (bool): Only diagonal elements
Returns:
- matrix: Expanded n×n matrix
"""
# Pack lower triangular matrix
n_dim = 3
n_packed = n_dim * (n_dim + 1) // 2 # Number of packed elements
# Packed values for lower triangle (column-major order)
packed_values = np.array([1.0, 0.5, 2.0, 0.3, 0.8, 1.5]) # [L11, L21, L22, L31, L32, L33]
# Expand to full matrix
L_matrix = pm.expand_packed_triangular(n_dim, packed_values, lower=True)
print("Lower triangular matrix:")
print(L_matrix.eval())
# Use in Cholesky decomposition parameterization
with pm.Model() as cholesky_model:
# Packed Cholesky factors
L_packed = pm.Normal('L_packed', 0, 1, shape=n_packed)
# Expand to Cholesky matrix
L = pm.expand_packed_triangular(n_dim, L_packed, lower=True)
# Covariance matrix
Sigma = pm.math.dot(L, L.T)
# Multivariate normal with Cholesky parameterization
y = pm.MvNormal('y', mu=np.zeros(n_dim), chol=L, observed=data)# Common tensor operations available through pm.math
# Matrix multiplication
A = pm.Data('A', np.random.randn(5, 3))
B = pm.Data('B', np.random.randn(3, 4))
C = pm.math.dot(A, B) # Matrix product
# Element-wise operations
x = pm.Normal('x', 0, 1, shape=10)
squared = pm.math.sqr(x) # Element-wise square
sqrt_x = pm.math.sqrt(pm.math.abs(x)) # Element-wise square root
exp_x = pm.math.exp(x) # Element-wise exponential
log_x = pm.math.log(pm.math.abs(x)) # Element-wise logarithm
# Trigonometric functions
sin_x = pm.math.sin(x) # Sine
cos_x = pm.math.cos(x) # Cosine
tan_x = pm.math.tan(x) # Tangent
# Hyperbolic functions
sinh_x = pm.math.sinh(x) # Hyperbolic sine
cosh_x = pm.math.cosh(x) # Hyperbolic cosine
tanh_x = pm.math.tanh(x) # Hyperbolic tangent# Statistical operations
data_tensor = pm.Data('data', np.random.randn(100, 5))
# Summary statistics
mean_val = pm.math.mean(data_tensor, axis=0) # Column means
std_val = pm.math.std(data_tensor, axis=0) # Column standard deviations
var_val = pm.math.var(data_tensor, axis=0) # Column variances
# Quantiles and percentiles
median_val = pm.math.median(data_tensor, axis=0)
max_val = pm.math.max(data_tensor, axis=0)
min_val = pm.math.min(data_tensor, axis=0)
# Cumulative operations
cumsum = pm.math.cumsum(data_tensor, axis=0) # Cumulative sum
cumprod = pm.math.cumprod(data_tensor, axis=0) # Cumulative product# Logical and comparison operations
x = pm.Normal('x', 0, 1, shape=5)
y = pm.Normal('y', 0, 1, shape=5)
# Comparisons
greater = pm.math.gt(x, y) # x > y
less_equal = pm.math.le(x, 0) # x <= 0
equal = pm.math.eq(x, 0) # x == 0
# Logical operations
and_result = pm.math.and_(pm.math.gt(x, 0), pm.math.lt(x, 1)) # 0 < x < 1
or_result = pm.math.or_(pm.math.lt(x, -1), pm.math.gt(x, 1)) # x < -1 OR x > 1
# Conditional operations (switch/where)
positive_x = pm.math.switch(pm.math.gt(x, 0), x, 0) # max(x, 0)
clipped_x = pm.math.clip(x, -2, 2) # Clip x to [-2, 2]# Define custom mathematical functions using PyTensor operations
def softplus(x):
"""Softplus function: log(1 + exp(x))"""
return pm.math.log(1 + pm.math.exp(x))
def stable_softplus(x):
"""Numerically stable softplus"""
return pm.math.switch(pm.math.gt(x, 20), x, pm.math.log(1 + pm.math.exp(x)))
def swish(x):
"""Swish activation function: x * sigmoid(x)"""
return x * pm.invlogit(x)
def gelu(x):
"""Gaussian Error Linear Unit (approximate)"""
return 0.5 * x * (1 + pm.math.tanh(np.sqrt(2/np.pi) * (x + 0.044715 * x**3)))
# Use custom functions in models
with pm.Model() as custom_model:
z = pm.Normal('z', 0, 1, shape=10)
# Apply custom transformations
softplus_z = pm.Deterministic('softplus_z', stable_softplus(z))
swish_z = pm.Deterministic('swish_z', swish(z))def log1p(x):
"""Compute log(1 + x) accurately for small x"""
return pm.math.log1p(x)
def expm1(x):
"""Compute exp(x) - 1 accurately for small x"""
return pm.math.expm1(x)
def log1mexp(x):
"""Compute log(1 - exp(x)) stably for x < 0"""
return pm.math.switch(
pm.math.gt(x, -0.693), # log(0.5)
pm.math.log(-pm.math.expm1(x)),
pm.math.log1p(-pm.math.exp(x))
)
# Example: Stable computation of survival function
def log_survival_function(log_hazard_cumulative):
"""Log survival function: log(1 - CDF) = log(1 - exp(log_CDF))"""
return log1mexp(log_hazard_cumulative)
with pm.Model() as survival_model:
# Log cumulative hazard
log_cum_hazard = pm.Normal('log_cum_hazard', -2, 1)
# Stable log survival probability
log_survival = pm.Deterministic('log_survival',
log_survival_function(log_cum_hazard))# Cholesky decomposition utilities
def cholesky_decomposition(matrix):
"""Cholesky decomposition of positive definite matrix"""
return pm.math.cholesky(matrix)
def solve_triangular(A, b, lower=True):
"""Solve triangular system Ax = b"""
return pm.math.solve_triangular(A, b, lower=lower)
# Example: Efficient multivariate normal computation
with pm.Model() as efficient_mvn:
# Covariance parameters
sigma = pm.HalfNormal('sigma', 1, shape=n_dim)
L_corr = pm.LKJCholeskyCov('L_corr', n=n_dim, eta=1, sd_dist=sigma)
# Efficient sampling using Cholesky
z = pm.Normal('z', 0, 1, shape=n_dim)
x = pm.Deterministic('x', pm.math.dot(L_corr, z))
# Likelihood
y = pm.Normal('y', mu=x, sigma=0.1, observed=data)# Array manipulation utilities
x = pm.Normal('x', 0, 1, shape=(5, 3))
# Reshaping
x_flat = pm.math.flatten(x) # Flatten to 1D
x_reshaped = pm.math.reshape(x, (3, 5)) # Reshape to (3, 5)
# Broadcasting
y = pm.Normal('y', 0, 1, shape=5)
broadcasted = x + y[:, None] # Broadcast y across columns
# Dimension manipulation
x_expanded = pm.math.expand_dims(y, axis=1) # Add dimension
x_squeezed = pm.math.squeeze(x_expanded) # Remove dimensions of size 1
# Stacking and concatenation
z = pm.Normal('z', 0, 1, shape=(5, 3))
stacked = pm.math.stack([x, z], axis=0) # Stack along new axis
concatenated = pm.math.concatenate([x, z], axis=1) # Concatenate along axis# Using mathematical utilities in custom distributions
def custom_logp(value, mu, sigma):
"""Custom log-probability function using mathematical utilities"""
# Standardize
standardized = (value - mu) / sigma
# Custom transformation
transformed = stable_softplus(standardized)
# Log-probability computation
logp_val = -0.5 * pm.math.sum(transformed**2)
logp_val -= pm.math.sum(pm.math.log(sigma)) # Jacobian
return logp_val
# Use in CustomDist
with pm.Model() as custom_dist_model:
mu = pm.Normal('mu', 0, 1)
sigma = pm.HalfNormal('sigma', 1)
x = pm.CustomDist('x', mu=mu, sigma=sigma,
logp=custom_logp,
observed=data)# Common transformations for constrained parameters
def constrain_positive(x):
"""Ensure parameter is positive using softplus"""
return stable_softplus(x)
def constrain_unit_interval(x):
"""Constrain parameter to [0, 1] using sigmoid"""
return pm.invlogit(x)
def constrain_correlation(x):
"""Constrain to [-1, 1] using tanh"""
return pm.math.tanh(x)
def stick_breaking(x):
"""Stick-breaking transform for simplex"""
# x should be shape (..., K-1) for K-simplex
stick_segments = pm.invlogit(x)
# Compute stick-breaking weights
remaining_stick = 1.0
weights = []
for i in range(x.shape[-1]):
weight = stick_segments[..., i] * remaining_stick
weights.append(weight)
remaining_stick -= weight
# Last weight gets remaining stick
weights.append(remaining_stick)
return pm.math.stack(weights, axis=-1)
# Example usage in Dirichlet-like model
with pm.Model() as stick_breaking_model:
# Unconstrained parameters
raw_weights = pm.Normal('raw_weights', 0, 1, shape=K-1)
# Transform to simplex
simplex_weights = pm.Deterministic('simplex_weights',
stick_breaking(raw_weights))
# Categorical likelihood
y = pm.Categorical('y', p=simplex_weights, observed=categories)def logp(dist, value):
"""
Compute log-probability density/mass function.
Parameters:
- dist: PyMC distribution object
- value: Values at which to evaluate log-probability
Returns:
- log_prob: Log-probability values
"""
def logcdf(dist, value):
"""
Compute log cumulative distribution function.
Parameters:
- dist: PyMC distribution object
- value: Values at which to evaluate log-CDF
Returns:
- log_cdf: Log-CDF values
"""
def icdf(dist, q):
"""
Compute inverse cumulative distribution function (quantile function).
Parameters:
- dist: PyMC distribution object
- q: Quantile values in [0, 1]
Returns:
- quantiles: Values corresponding to given quantiles
"""
# Example usage
normal_dist = pm.Normal.dist(mu=0, sigma=1)
# Log-probability at specific values
log_prob_values = pm.logp(normal_dist, np.array([-1, 0, 1]))
# Log-CDF at specific values
log_cdf_values = pm.logcdf(normal_dist, np.array([-1, 0, 1]))
# Quantiles (inverse CDF)
quantiles = pm.icdf(normal_dist, np.array([0.025, 0.5, 0.975])) # 95% CIdef expand_packed_triangular(n, packed):
"""
Expand packed triangular values into full triangular matrix.
Parameters:
- n (int): Dimension of the triangular matrix
- packed (array): Packed triangular values (n*(n+1)/2 elements)
Returns:
- triangular_matrix: Lower triangular matrix of shape (n, n)
"""
# Example: create correlation matrix from packed values
n_dims = 3
n_packed = n_dims * (n_dims - 1) // 2 # Number of off-diagonal elements
packed_corr = np.array([0.5, -0.3, 0.7]) # 3 correlation values
# Expand to correlation matrix
corr_matrix = pm.expand_packed_triangular(n_dims, packed_corr, diag=1.0)# Advanced numerical functions for edge cases
def logaddexp_m(x, y):
"""Numerically stable log(exp(x) + exp(y)) with memory optimization."""
def floatX(x):
"""Convert to PyTensor's floating point type."""
def constant(x):
"""Create constant tensor."""
def shared(value, name=None):
"""Create shared variable."""
# Usage in models
with pm.Model() as advanced_model:
# Create shared parameter for updating
shared_param = pm.math.shared(np.array([1.0, 2.0]), 'shared_param')
# Convert to correct dtype
precise_value = pm.math.floatX(3.14159265359)
# Create constant
fixed_value = pm.math.constant(2.718281828)PyMC's mathematical utilities provide the essential building blocks for creating sophisticated probabilistic models with proper numerical stability and efficient computation.
Install with Tessl CLI
npx tessl i tessl/pypi-pymc