tessl/pypi-pymc

Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with PyTensor

—

Pending

Overview

Eval results

Files

PyMC Mathematical Utilities

Name: tessl/pypi-pymc
Author: tessl

PyMC provides essential mathematical functions for probabilistic modeling, including link functions, numerical utilities, and specialized operations for working with probability distributions and statistical models.

Link Functions

Link functions transform parameters between different scales and domains, crucial for generalized linear models and constraint handling.

Logit and Inverse Logit

import pymc as pm

def logit(x):
    """
    Logit function (log-odds transformation).
    
    Parameters:
    - x (array-like): Input values in [0, 1]
    
    Returns:
    - logit_x: Log-odds values in (-∞, ∞)
    
    Formula: logit(x) = log(x / (1 - x))
    """
    
def invlogit(x):
    """
    Inverse logit function (logistic/sigmoid transformation).
    
    Parameters:
    - x (array-like): Input values in (-∞, ∞)
    
    Returns:
    - prob_x: Probability values in [0, 1]
    
    Formula: invlogit(x) = 1 / (1 + exp(-x))
    """

# Transform probabilities to log-odds scale
probability = 0.7
log_odds = pm.logit(probability)
print(f"P = {probability} -> logit(P) = {log_odds:.3f}")

# Transform log-odds back to probability scale  
back_to_prob = pm.invlogit(log_odds)
print(f"logit^(-1)({log_odds:.3f}) = {back_to_prob:.3f}")

# Vectorized operations
probabilities = np.array([0.1, 0.3, 0.5, 0.7, 0.9])
log_odds_vec = pm.logit(probabilities)
recovered_probs = pm.invlogit(log_odds_vec)

# Use in models for bounded parameters
with pm.Model() as logistic_model:
    # Unconstrained parameter
    alpha_raw = pm.Normal('alpha_raw', 0, 2)
    
    # Transform to probability scale
    success_prob = pm.Deterministic('success_prob', pm.invlogit(alpha_raw))
    
    # Binomial likelihood
    y = pm.Binomial('y', n=10, p=success_prob, observed=successes)

Probit and Inverse Probit

def probit(x):
    """
    Probit function (inverse normal CDF transformation).
    
    Parameters:
    - x (array-like): Input values in [0, 1]
    
    Returns:
    - probit_x: Standard normal quantiles in (-∞, ∞)
    """
    
def invprobit(x):
    """
    Inverse probit function (normal CDF transformation).
    
    Parameters:
    - x (array-like): Input values in (-∞, ∞)
    
    Returns:
    - prob_x: Probability values in [0, 1]
    """

# Probit transformation (alternative to logit)
prob_value = 0.8
probit_value = pm.probit(prob_value)
recovered = pm.invprobit(probit_value)

print(f"P = {prob_value} -> probit(P) = {probit_value:.3f}")
print(f"probit^(-1)({probit_value:.3f}) = {recovered:.3f}")

# Probit regression model
with pm.Model() as probit_regression:
    # Linear predictor
    beta = pm.Normal('beta', 0, 1, shape=X.shape[1])
    linear_pred = pm.math.dot(X, beta)
    
    # Probit link
    success_prob = pm.Deterministic('success_prob', pm.invprobit(linear_pred))
    
    # Bernoulli likelihood
    y = pm.Bernoulli('y', p=success_prob, observed=binary_outcomes)

Numerical Utilities

Logarithmic Operations

def logsumexp(x, axis=None, keepdims=False):
    """
    Numerically stable computation of log(sum(exp(x))).
    
    Parameters:
    - x (array-like): Input array
    - axis (int, optional): Axis along which to compute
    - keepdims (bool): Keep reduced dimensions
    
    Returns:
    - result: log(sum(exp(x))) computed stably
    """

def logaddexp(x1, x2):
    """
    Numerically stable computation of log(exp(x1) + exp(x2)).
    
    Parameters:
    - x1, x2 (array-like): Input values
    
    Returns:
    - result: log(exp(x1) + exp(x2)) computed stably
    """

# Stable computation of mixture probabilities
log_weights = np.array([-2.3, -1.8, -3.1])  # Log mixture weights
log_normalizing_constant = pm.logsumexp(log_weights)
normalized_log_weights = log_weights - log_normalizing_constant

print(f"Log weights: {log_weights}")
print(f"Log Z: {log_normalizing_constant:.3f}")
print(f"Normalized log weights: {normalized_log_weights}")

# Stable addition in log space
log_a = -10.5
log_b = -10.2
log_sum = pm.logaddexp(log_a, log_b)
print(f"log(exp({log_a}) + exp({log_b})) = {log_sum:.6f}")

# Compare with naive computation (may underflow)
naive_sum = np.log(np.exp(log_a) + np.exp(log_b))  # May be -inf
print(f"Naive computation: {naive_sum}")

Matrix Operations

def expand_packed_triangular(n, packed, lower=True, diagonal_only=False):
    """
    Expand packed triangular matrix to full matrix.
    
    Parameters:
    - n (int): Dimension of output matrix
    - packed (array): Packed triangular values
    - lower (bool): Expand as lower triangular (default: True)
    - diagonal_only (bool): Only diagonal elements
    
    Returns:
    - matrix: Expanded n×n matrix
    """

# Pack lower triangular matrix
n_dim = 3
n_packed = n_dim * (n_dim + 1) // 2  # Number of packed elements

# Packed values for lower triangle (column-major order)
packed_values = np.array([1.0, 0.5, 2.0, 0.3, 0.8, 1.5])  # [L11, L21, L22, L31, L32, L33]

# Expand to full matrix
L_matrix = pm.expand_packed_triangular(n_dim, packed_values, lower=True)
print("Lower triangular matrix:")
print(L_matrix.eval())

# Use in Cholesky decomposition parameterization
with pm.Model() as cholesky_model:
    # Packed Cholesky factors
    L_packed = pm.Normal('L_packed', 0, 1, shape=n_packed)
    
    # Expand to Cholesky matrix
    L = pm.expand_packed_triangular(n_dim, L_packed, lower=True)
    
    # Covariance matrix
    Sigma = pm.math.dot(L, L.T)
    
    # Multivariate normal with Cholesky parameterization
    y = pm.MvNormal('y', mu=np.zeros(n_dim), chol=L, observed=data)

Specialized Mathematical Functions

Tensor Operations

# Common tensor operations available through pm.math

# Matrix multiplication
A = pm.Data('A', np.random.randn(5, 3))
B = pm.Data('B', np.random.randn(3, 4))
C = pm.math.dot(A, B)  # Matrix product

# Element-wise operations
x = pm.Normal('x', 0, 1, shape=10)
squared = pm.math.sqr(x)           # Element-wise square
sqrt_x = pm.math.sqrt(pm.math.abs(x))  # Element-wise square root
exp_x = pm.math.exp(x)             # Element-wise exponential
log_x = pm.math.log(pm.math.abs(x)) # Element-wise logarithm

# Trigonometric functions
sin_x = pm.math.sin(x)             # Sine
cos_x = pm.math.cos(x)             # Cosine
tan_x = pm.math.tan(x)             # Tangent

# Hyperbolic functions
sinh_x = pm.math.sinh(x)           # Hyperbolic sine
cosh_x = pm.math.cosh(x)           # Hyperbolic cosine
tanh_x = pm.math.tanh(x)           # Hyperbolic tangent

Statistical Functions

# Statistical operations
data_tensor = pm.Data('data', np.random.randn(100, 5))

# Summary statistics
mean_val = pm.math.mean(data_tensor, axis=0)    # Column means
std_val = pm.math.std(data_tensor, axis=0)      # Column standard deviations  
var_val = pm.math.var(data_tensor, axis=0)      # Column variances

# Quantiles and percentiles
median_val = pm.math.median(data_tensor, axis=0)
max_val = pm.math.max(data_tensor, axis=0)
min_val = pm.math.min(data_tensor, axis=0)

# Cumulative operations
cumsum = pm.math.cumsum(data_tensor, axis=0)    # Cumulative sum
cumprod = pm.math.cumprod(data_tensor, axis=0)  # Cumulative product

Comparison and Logical Operations

# Logical and comparison operations
x = pm.Normal('x', 0, 1, shape=5)
y = pm.Normal('y', 0, 1, shape=5)

# Comparisons
greater = pm.math.gt(x, y)         # x > y
less_equal = pm.math.le(x, 0)      # x <= 0
equal = pm.math.eq(x, 0)           # x == 0

# Logical operations
and_result = pm.math.and_(pm.math.gt(x, 0), pm.math.lt(x, 1))  # 0 < x < 1
or_result = pm.math.or_(pm.math.lt(x, -1), pm.math.gt(x, 1))   # x < -1 OR x > 1

# Conditional operations (switch/where)
positive_x = pm.math.switch(pm.math.gt(x, 0), x, 0)  # max(x, 0)
clipped_x = pm.math.clip(x, -2, 2)                   # Clip x to [-2, 2]

Advanced Mathematical Utilities

Custom Mathematical Functions

# Define custom mathematical functions using PyTensor operations
def softplus(x):
    """Softplus function: log(1 + exp(x))"""
    return pm.math.log(1 + pm.math.exp(x))

def stable_softplus(x):
    """Numerically stable softplus"""
    return pm.math.switch(pm.math.gt(x, 20), x, pm.math.log(1 + pm.math.exp(x)))

def swish(x):
    """Swish activation function: x * sigmoid(x)"""
    return x * pm.invlogit(x)

def gelu(x):
    """Gaussian Error Linear Unit (approximate)"""
    return 0.5 * x * (1 + pm.math.tanh(np.sqrt(2/np.pi) * (x + 0.044715 * x**3)))

# Use custom functions in models
with pm.Model() as custom_model:
    z = pm.Normal('z', 0, 1, shape=10)
    
    # Apply custom transformations
    softplus_z = pm.Deterministic('softplus_z', stable_softplus(z))
    swish_z = pm.Deterministic('swish_z', swish(z))

Numerical Stability Utilities

def log1p(x):
    """Compute log(1 + x) accurately for small x"""
    return pm.math.log1p(x)

def expm1(x):  
    """Compute exp(x) - 1 accurately for small x"""
    return pm.math.expm1(x)

def log1mexp(x):
    """Compute log(1 - exp(x)) stably for x < 0"""
    return pm.math.switch(
        pm.math.gt(x, -0.693),  # log(0.5)
        pm.math.log(-pm.math.expm1(x)),
        pm.math.log1p(-pm.math.exp(x))
    )

# Example: Stable computation of survival function
def log_survival_function(log_hazard_cumulative):
    """Log survival function: log(1 - CDF) = log(1 - exp(log_CDF))"""
    return log1mexp(log_hazard_cumulative)

with pm.Model() as survival_model:
    # Log cumulative hazard
    log_cum_hazard = pm.Normal('log_cum_hazard', -2, 1)
    
    # Stable log survival probability
    log_survival = pm.Deterministic('log_survival', 
                                   log_survival_function(log_cum_hazard))

Matrix Decompositions and Operations

# Cholesky decomposition utilities
def cholesky_decomposition(matrix):
    """Cholesky decomposition of positive definite matrix"""
    return pm.math.cholesky(matrix)

def solve_triangular(A, b, lower=True):
    """Solve triangular system Ax = b"""
    return pm.math.solve_triangular(A, b, lower=lower)

# Example: Efficient multivariate normal computation
with pm.Model() as efficient_mvn:
    # Covariance parameters
    sigma = pm.HalfNormal('sigma', 1, shape=n_dim)
    L_corr = pm.LKJCholeskyCov('L_corr', n=n_dim, eta=1, sd_dist=sigma)
    
    # Efficient sampling using Cholesky
    z = pm.Normal('z', 0, 1, shape=n_dim)
    x = pm.Deterministic('x', pm.math.dot(L_corr, z))
    
    # Likelihood
    y = pm.Normal('y', mu=x, sigma=0.1, observed=data)

Broadcasting and Reshaping

# Array manipulation utilities
x = pm.Normal('x', 0, 1, shape=(5, 3))

# Reshaping
x_flat = pm.math.flatten(x)                    # Flatten to 1D
x_reshaped = pm.math.reshape(x, (3, 5))        # Reshape to (3, 5)

# Broadcasting
y = pm.Normal('y', 0, 1, shape=5)
broadcasted = x + y[:, None]                   # Broadcast y across columns

# Dimension manipulation
x_expanded = pm.math.expand_dims(y, axis=1)    # Add dimension
x_squeezed = pm.math.squeeze(x_expanded)       # Remove dimensions of size 1

# Stacking and concatenation
z = pm.Normal('z', 0, 1, shape=(5, 3))
stacked = pm.math.stack([x, z], axis=0)        # Stack along new axis
concatenated = pm.math.concatenate([x, z], axis=1)  # Concatenate along axis

Integration with Probability Distributions

Custom Distribution Support

# Using mathematical utilities in custom distributions
def custom_logp(value, mu, sigma):
    """Custom log-probability function using mathematical utilities"""
    
    # Standardize
    standardized = (value - mu) / sigma
    
    # Custom transformation
    transformed = stable_softplus(standardized)
    
    # Log-probability computation
    logp_val = -0.5 * pm.math.sum(transformed**2)
    logp_val -= pm.math.sum(pm.math.log(sigma))  # Jacobian
    
    return logp_val

# Use in CustomDist
with pm.Model() as custom_dist_model:
    mu = pm.Normal('mu', 0, 1)
    sigma = pm.HalfNormal('sigma', 1)
    
    x = pm.CustomDist('x', mu=mu, sigma=sigma, 
                      logp=custom_logp, 
                      observed=data)

Transformation Utilities

# Common transformations for constrained parameters
def constrain_positive(x):
    """Ensure parameter is positive using softplus"""
    return stable_softplus(x)

def constrain_unit_interval(x):
    """Constrain parameter to [0, 1] using sigmoid"""
    return pm.invlogit(x)

def constrain_correlation(x):
    """Constrain to [-1, 1] using tanh"""
    return pm.math.tanh(x)

def stick_breaking(x):
    """Stick-breaking transform for simplex"""
    # x should be shape (..., K-1) for K-simplex
    stick_segments = pm.invlogit(x)
    
    # Compute stick-breaking weights
    remaining_stick = 1.0
    weights = []
    
    for i in range(x.shape[-1]):
        weight = stick_segments[..., i] * remaining_stick
        weights.append(weight)
        remaining_stick -= weight
    
    # Last weight gets remaining stick
    weights.append(remaining_stick)
    
    return pm.math.stack(weights, axis=-1)

# Example usage in Dirichlet-like model
with pm.Model() as stick_breaking_model:
    # Unconstrained parameters
    raw_weights = pm.Normal('raw_weights', 0, 1, shape=K-1)
    
    # Transform to simplex
    simplex_weights = pm.Deterministic('simplex_weights', 
                                      stick_breaking(raw_weights))
    
    # Categorical likelihood
    y = pm.Categorical('y', p=simplex_weights, observed=categories)

Advanced Mathematical Functions

Log-Probability and CDF Functions

def logp(dist, value):
    """
    Compute log-probability density/mass function.
    
    Parameters:
    - dist: PyMC distribution object
    - value: Values at which to evaluate log-probability
    
    Returns:
    - log_prob: Log-probability values
    """

def logcdf(dist, value):
    """
    Compute log cumulative distribution function.
    
    Parameters:
    - dist: PyMC distribution object
    - value: Values at which to evaluate log-CDF
    
    Returns:
    - log_cdf: Log-CDF values
    """

def icdf(dist, q):
    """
    Compute inverse cumulative distribution function (quantile function).
    
    Parameters:
    - dist: PyMC distribution object
    - q: Quantile values in [0, 1]
    
    Returns:
    - quantiles: Values corresponding to given quantiles
    """

# Example usage
normal_dist = pm.Normal.dist(mu=0, sigma=1)

# Log-probability at specific values
log_prob_values = pm.logp(normal_dist, np.array([-1, 0, 1]))

# Log-CDF at specific values
log_cdf_values = pm.logcdf(normal_dist, np.array([-1, 0, 1]))

# Quantiles (inverse CDF)
quantiles = pm.icdf(normal_dist, np.array([0.025, 0.5, 0.975]))  # 95% CI

Matrix and Triangular Operations

def expand_packed_triangular(n, packed):
    """
    Expand packed triangular values into full triangular matrix.
    
    Parameters:
    - n (int): Dimension of the triangular matrix
    - packed (array): Packed triangular values (n*(n+1)/2 elements)
    
    Returns:
    - triangular_matrix: Lower triangular matrix of shape (n, n)
    """

# Example: create correlation matrix from packed values
n_dims = 3
n_packed = n_dims * (n_dims - 1) // 2  # Number of off-diagonal elements
packed_corr = np.array([0.5, -0.3, 0.7])  # 3 correlation values

# Expand to correlation matrix
corr_matrix = pm.expand_packed_triangular(n_dims, packed_corr, diag=1.0)

Additional Numerical Utilities

# Advanced numerical functions for edge cases
def logaddexp_m(x, y):
    """Numerically stable log(exp(x) + exp(y)) with memory optimization."""
    
def floatX(x):
    """Convert to PyTensor's floating point type."""
    
def constant(x):
    """Create constant tensor."""
    
def shared(value, name=None):
    """Create shared variable."""

# Usage in models
with pm.Model() as advanced_model:
    # Create shared parameter for updating
    shared_param = pm.math.shared(np.array([1.0, 2.0]), 'shared_param')
    
    # Convert to correct dtype
    precise_value = pm.math.floatX(3.14159265359)
    
    # Create constant
    fixed_value = pm.math.constant(2.718281828)

PyMC's mathematical utilities provide the essential building blocks for creating sophisticated probabilistic models with proper numerical stability and efficient computation.

Install with Tessl CLI