Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with PyTensor
—
PyMC's modeling framework provides the foundational components for building probabilistic models. The core classes and functions enable model specification, transformation, and manipulation within a coherent probabilistic programming paradigm.
from pymc import Model
class Model:
"""
Main container for probabilistic models in PyMC.
Parameters:
- name (str, optional): Name of the model
- coords (dict, optional): Coordinate specifications for dimensions
- check_bounds (bool): Check parameter bounds during sampling (default: True)
Attributes:
- free_RVs (list): List of free random variables
- observed_RVs (list): List of observed random variables
- deterministics (list): List of deterministic variables
- potentials (list): List of potential terms
- named_vars (dict): Dictionary of named variables
"""
def __init__(self, name='', coords=None, check_bounds=True):
pass
def __enter__(self):
"""Enter model context."""
return self
def __exit__(self, *args):
"""Exit model context."""
pass
def compile_logp(self, vars=None, jacobian=True):
"""
Compile log-probability function.
Parameters:
- vars (list, optional): Variables to include
- jacobian (bool): Include Jacobian terms for transforms
Returns:
- logp_func: Compiled log-probability function
"""
pass
def compile_fn(self, outs, ins=None, **kwargs):
"""
Compile PyTensor function from model variables.
Parameters:
- outs: Output variables
- ins (list, optional): Input variables
- **kwargs: Additional compilation options
Returns:
- compiled_fn: Compiled PyTensor function
"""
pass
# Basic model definition
with pm.Model() as basic_model:
# Model components go here
alpha = pm.Normal('alpha', mu=0, sigma=1)
beta = pm.Normal('beta', mu=0, sigma=1)
sigma = pm.HalfNormal('sigma', sigma=1)
# Linear combination
mu = alpha + beta * x_data
# Likelihood
y_obs = pm.Normal('y_obs', mu=mu, sigma=sigma, observed=y_data)
# Model with coordinates and naming
coords = {
'regions': ['north', 'south', 'east', 'west'],
'time': range(2000, 2021)
}
with pm.Model(name='regional_model', coords=coords) as model:
# Region-specific parameters
alpha = pm.Normal('alpha', 0, 1, dims='regions')
# Time-varying effects
beta = pm.GaussianRandomWalk('beta', sigma=0.1, dims='time')def modelcontext(model=None):
"""
Get the current model from context or return specified model.
Parameters:
- model (Model, optional): Explicit model to return
Returns:
- model: Current model in context
"""
# Get current model
current_model = pm.modelcontext()
# Access model properties
print(f"Free RVs: {len(current_model.free_RVs)}")
print(f"Observed RVs: {len(current_model.observed_RVs)}")
print(f"Deterministics: {len(current_model.deterministics)}")def set_data(new_data, model=None, coords=None):
"""
Update data in existing model.
Parameters:
- new_data (dict): Dictionary of new data values
- model (Model, optional): Target model
- coords (dict, optional): New coordinate specifications
"""
# Define model with data containers
with pm.Model() as dynamic_model:
# Data containers that can be updated
X_data = pm.Data('X_data', X_train)
y_data = pm.Data('y_data', y_train)
# Model using data
beta = pm.Normal('beta', 0, 1, shape=X_train.shape[1])
mu = pm.math.dot(X_data, beta)
y_pred = pm.Normal('y_pred', mu=mu, sigma=1, observed=y_data)
# Sample with training data
trace = pm.sample()
# Update for prediction on new data
pm.set_data({'X_data': X_test, 'y_data': None})
with dynamic_model:
post_pred = pm.sample_posterior_predictive(trace)Random variables are created automatically when distributions are instantiated within a model context:
with pm.Model() as model:
# Creates random variable named 'parameter'
parameter = pm.Normal('parameter', mu=0, sigma=1)
# Access random variable properties
print(f"Name: {parameter.name}")
print(f"Distribution: {parameter.distribution}")
print(f"Transform: {parameter.transform}")
print(f"Shape: {parameter.shape}")from pymc import Deterministic
class Deterministic:
"""
Deterministic transformation of other variables.
Parameters:
- name (str): Variable name
- var: PyTensor expression
- dims (tuple, optional): Dimension names
- model (Model, optional): Model context
Attributes:
- name (str): Variable name
- owner: PyTensor variable
"""
def __init__(self, name, var, dims=None, model=None):
pass
# Deterministic transformations
with pm.Model() as model:
# Parameters
alpha = pm.Normal('alpha', 0, 1)
beta = pm.Normal('beta', 0, 1)
# Deterministic combinations
linear_pred = pm.Deterministic('linear_pred', alpha + beta * x_data)
# Transformed parameters
log_odds = pm.Deterministic('log_odds', pm.math.log(p / (1 - p)))
# Complex transformations
interaction = pm.Deterministic('interaction',
alpha * beta + pm.math.exp(alpha))# Centered vs non-centered parameterization
with pm.Model() as hierarchical:
# Hyperparameters
mu_alpha = pm.Normal('mu_alpha', 0, 10)
sigma_alpha = pm.HalfNormal('sigma_alpha', 5)
# Non-centered parameterization (better for sampling)
alpha_raw = pm.Normal('alpha_raw', 0, 1, shape=n_groups)
alpha = pm.Deterministic('alpha', mu_alpha + sigma_alpha * alpha_raw)
# Soft constraints via deterministic
alpha_constrained = pm.Deterministic('alpha_constrained',
pm.math.clip(alpha, -10, 10))from pymc import Potential
class Potential:
"""
Add arbitrary log-probability terms to the model.
Parameters:
- name (str): Name for the potential
- var: PyTensor expression for log-probability
- model (Model, optional): Model context
"""
def __init__(self, name, var, model=None):
pass
# Custom priors and constraints
with pm.Model() as constrained_model:
# Parameters
theta = pm.Uniform('theta', 0, 10, shape=3)
# Custom constraint: sum of theta must be less than 5
constraint = pm.Potential('sum_constraint',
pm.math.switch(pm.math.sum(theta) < 5, 0, -np.inf))
# Custom prior that's not available as distribution
custom_prior = pm.Potential('custom_prior',
-0.5 * pm.math.sum((theta - 2)**4))
# Soft constraints with penalty
soft_constraint = pm.Potential('soft_constraint',
-10 * pm.math.maximum(0, pm.math.sum(theta) - 8))from pymc.model.transform.conditioning import do
def do(model, interventions):
"""
Apply causal interventions to model (do-operator).
Parameters:
- model (Model): Original model
- interventions (dict): Variable interventions {var_name: value}
Returns:
- intervened_model: Model with interventions applied
"""
# Causal inference with interventions
with pm.Model() as causal_model:
# Structural model
X = pm.Normal('X', 0, 1)
Y = pm.Normal('Y', mu=2*X + 1, sigma=0.5) # Y depends on X
Z = pm.Normal('Z', mu=X + Y, sigma=0.3) # Z depends on X and Y
# Observational model
obs_trace = pm.sample()
# Interventional model: do(X = 1.5)
intervened_model = pm.do(causal_model, {'X': 1.5})
with intervened_model:
# Sample from intervened distribution
intervention_trace = pm.sample()from pymc.model.transform.conditioning import observe
def observe(model, observations):
"""
Condition model on observed values.
Parameters:
- model (Model): Original model
- observations (dict): Observed values {var_name: value}
Returns:
- conditioned_model: Model conditioned on observations
"""
# Conditional inference
with pm.Model() as joint_model:
X = pm.Normal('X', 0, 1)
Y = pm.Normal('Y', mu=X, sigma=0.5)
# Condition on observed X
conditioned_model = pm.observe(joint_model, {'X': observed_x_values})
with conditioned_model:
# Sample conditional distribution P(Y|X=observed)
conditional_trace = pm.sample()def compile_fn(outs, ins=None, mode=None, **kwargs):
"""
Compile PyTensor function from model variables.
Parameters:
- outs: Output variables or expressions
- ins (list, optional): Input variables
- mode: Compilation mode ('FAST_RUN', 'FAST_COMPILE', etc.)
- **kwargs: Additional PyTensor compilation options
Returns:
- compiled_fn: Compiled function
"""
with pm.Model() as model:
# Parameters
alpha = pm.Normal('alpha', 0, 1)
beta = pm.Normal('beta', 0, 1)
x = pm.Data('x', x_data)
# Expression to compile
prediction = alpha + beta * x
# Compile prediction function
predict_fn = pm.compile_fn([prediction], [alpha, beta, x])
# Use compiled function
pred_values = predict_fn(alpha_val, beta_val, x_new)
# Compile log-probability function
with model:
logp_fn = model.compile_logp()
# Evaluate log-probability at point
test_point = {'alpha': 0.5, 'beta': -1.2}
log_prob = logp_fn(test_point)def create_regression_model(X, y, priors=None):
"""Factory function for regression models."""
if priors is None:
priors = {'alpha': (0, 10), 'beta': (0, 5), 'sigma': 1}
with pm.Model() as regression_model:
# Priors
alpha = pm.Normal('alpha', *priors['alpha'])
beta = pm.Normal('beta', *priors['beta'], shape=X.shape[1])
sigma = pm.HalfNormal('sigma', priors['sigma'])
# Linear model
mu = alpha + pm.math.dot(X, beta)
# Likelihood
y_obs = pm.Normal('y_obs', mu=mu, sigma=sigma, observed=y)
return regression_model
# Create multiple models with different priors
model1 = create_regression_model(X_data, y_data)
model2 = create_regression_model(X_data, y_data,
priors={'alpha': (0, 1), 'beta': (0, 1), 'sigma': 0.5})# Combine submodels
def create_measurement_model():
with pm.Model() as measurement:
# Measurement error
sigma_m = pm.HalfNormal('sigma_m', 1)
return measurement, sigma_m
def create_process_model():
with pm.Model() as process:
# Process parameters
mu_p = pm.Normal('mu_p', 0, 5)
sigma_p = pm.HalfNormal('sigma_p', 2)
return process, (mu_p, sigma_p)
# Combine models
measurement_model, sigma_m = create_measurement_model()
process_model, (mu_p, sigma_p) = create_process_model()
with pm.Model() as combined_model:
# Include submodel variables
sigma_m_combined = pm.HalfNormal('sigma_m', 1)
mu_p_combined = pm.Normal('mu_p', 0, 5)
sigma_p_combined = pm.HalfNormal('sigma_p', 2)
# Latent process
latent = pm.Normal('latent', mu=mu_p_combined, sigma=sigma_p_combined)
# Observed measurements
observed = pm.Normal('observed', mu=latent, sigma=sigma_m_combined,
observed=measurements)def create_dynamic_model(data_stream):
"""Create model that adapts to streaming data."""
with pm.Model() as dynamic:
# Time-varying parameters
n_timepoints = len(data_stream)
# Random walk parameters
innovation_sd = pm.HalfNormal('innovation_sd', 0.1)
initial_state = pm.Normal('initial_state', 0, 1)
# State evolution
states = [initial_state]
for t in range(1, n_timepoints):
state_t = pm.Normal(f'state_{t}',
mu=states[t-1],
sigma=innovation_sd)
states.append(state_t)
# Observations
obs_sd = pm.HalfNormal('obs_sd', 1)
for t, data_t in enumerate(data_stream):
pm.Normal(f'obs_{t}', mu=states[t], sigma=obs_sd,
observed=data_t)
return dynamic# Analyze model structure
with pm.Model() as analysis_model:
# Complex model...
# Get all variables
free_vars = analysis_model.free_RVs
observed_vars = analysis_model.observed_RVs
deterministic_vars = analysis_model.deterministics
print("Free variables:")
for var in free_vars:
print(f" {var.name}: {var.distribution}")
print("Deterministic variables:")
for var in deterministic_vars:
print(f" {var.name}: shape {var.shape}")
# Get variable by name
specific_var = analysis_model['parameter_name']
# Check model dimensions
print(f"Model coordinates: {analysis_model.coords}")
print(f"Model RNG: {analysis_model.rng}")def validate_model(model):
"""Validate model structure and parameters."""
with model:
# Check for common issues
try:
# Compile log-probability function
logp_fn = model.compile_logp()
# Test evaluation at random point
test_point = model.initial_point()
logp_val = logp_fn(test_point)
if np.isnan(logp_val) or np.isinf(logp_val):
print("Warning: Model has numerical issues")
print(f"Model log-probability: {logp_val}")
except Exception as e:
print(f"Model validation failed: {e}")
# Check parameter shapes
for var in model.free_RVs:
print(f"{var.name}: shape {var.shape}, dtype {var.dtype}")
# Usage
validate_model(my_model)Point = Dict[str, np.ndarray] # Type alias for parameter points
# Working with points
with pm.Model() as model:
alpha = pm.Normal('alpha', 0, 1)
beta = pm.Normal('beta', 0, 1, shape=3)
# Get initial point
initial_point = model.initial_point()
print(f"Initial point: {initial_point}")
# Create custom point
custom_point = {'alpha': 1.5, 'beta': np.array([0.5, -0.2, 0.8])}
# Evaluate log-probability at point
logp_fn = model.compile_logp()
logp_value = logp_fn(custom_point)PyMC's model framework provides a flexible foundation for building complex probabilistic models while maintaining clean separation between model specification, compilation, and inference.
Install with Tessl CLI
npx tessl i tessl/pypi-pymc