Tessl Tile for pypi/pymc3@3.11.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

data-handling.md distributions.md gaussian-processes.md glm.md index.md math-functions.md modeling.md sampling.md stats-plots.md step-methods.md variational.md

glm.mddocs/

0
# Generalized Linear Models
1

2
PyMC3's GLM module provides a high-level interface for generalized linear models with automatic handling of design matrices, link functions, and family-specific distributions. This streamlines the construction of regression models for various response types including continuous, binary, count, and categorical data.
3

4
## Capabilities
5

6
### GLM Classes
7

8
High-level classes for building generalized linear models with automatic configuration.
9

10
```python { .api }
11
class GLM:
12
    """
13
    Generalized Linear Model with automatic family and link function handling.
14
    
15
    Provides a high-level interface for GLM construction that automatically
16
    handles design matrix creation, appropriate distributions, link functions,
17
    and parameter priors based on the specified family.
18
    """
19
    
20
    def __init__(self, y, X, labels=None, intercept=True, family=None, 
21
                 priors=None, **kwargs):
22
        """
23
        Initialize GLM.
24
        
25
        Parameters:
26
        - y: array, response variable
27
        - X: array, design matrix or predictor variables  
28
        - labels: list, variable names for predictors
29
        - intercept: bool, include intercept term
30
        - family: GLMFamily, response distribution family
31
        - priors: dict, prior specifications for parameters
32
        - kwargs: additional arguments for family-specific configuration
33
        
34
        Returns:
35
        - GLM: configured GLM object
36
        
37
        Example:
38
        # Linear regression
39
        glm = pm.GLM(y=height, X=weight, family=pm.glm.families.Normal())
40
        
41
        # Logistic regression  
42
        glm = pm.GLM(y=success, X=predictors, family=pm.glm.families.Binomial())
43
        
44
        # Poisson regression
45
        glm = pm.GLM(y=counts, X=features, family=pm.glm.families.Poisson())
46
        """
47
    
48
    def fit(self, draws=1000, **kwargs):
49
        """
50
        Fit GLM using MCMC sampling.
51
        
52
        Parameters:
53
        - draws: int, number of posterior samples
54
        - kwargs: arguments passed to pm.sample()
55
        
56
        Returns:
57
        - InferenceData: posterior samples and diagnostics
58
        """
59
    
60
    def predict(self, X_new, trace=None, **kwargs):
61
        """
62
        Generate predictions for new data.
63
        
64
        Parameters:
65
        - X_new: array, new predictor values
66
        - trace: InferenceData, posterior samples for prediction
67
        - kwargs: additional prediction arguments
68
        
69
        Returns:
70
        - dict: predictive samples
71
        """
72

73
class LinearComponent:
74
    """
75
    Linear component builder for GLM construction.
76
    
77
    Handles design matrix creation, variable naming, and prior
78
    specification for linear predictors in GLM models.
79
    """
80
    
81
    def __init__(self, x, intercept=True, labels=None, priors=None):
82
        """
83
        Initialize linear component.
84
        
85
        Parameters:
86
        - x: array, predictor matrix
87
        - intercept: bool, include intercept
88
        - labels: list, predictor names
89
        - priors: dict, prior distributions for coefficients
90
        
91
        Returns:
92
        - LinearComponent: linear component object
93
        """
94
    
95
    def build(self, name=''):
96
        """
97
        Build linear predictor variables.
98
        
99
        Parameters:
100
        - name: str, prefix for variable names
101
        
102
        Returns:
103
        - TensorVariable: linear predictor
104
        """
105
```
106

107
### GLM Families
108

109
Distribution families for different response types available in `pm.glm.families`.
110

111
```python { .api }
112
class Normal:
113
    """
114
    Normal family for continuous responses (linear regression).
115
    
116
    Uses identity link function and normal distribution for
117
    modeling continuous response variables.
118
    """
119
    
120
    def __init__(self, priors=None):
121
        """
122
        Initialize Normal family.
123
        
124
        Parameters:
125
        - priors: dict, prior specifications {'sigma': prior_dist}
126
        """
127
    
128
    @property
129
    def link(self):
130
        """Identity link function: η = μ"""
131
    
132
    @property
133
    def likelihood(self):
134
        """Normal likelihood distribution"""
135

136
class Binomial:
137
    """
138
    Binomial family for binary and binomial responses.
139
    
140
    Uses logit link function and binomial distribution for
141
    modeling binary outcomes or success/failure counts.
142
    """
143
    
144
    def __init__(self, n=1, priors=None):
145
        """
146
        Initialize Binomial family.
147
        
148
        Parameters:
149
        - n: int or array, number of trials (1 for binary)
150
        - priors: dict, prior specifications
151
        """
152
    
153
    @property  
154
    def link(self):
155
        """Logit link function: η = log(p/(1-p))"""
156

157
class Poisson:
158
    """
159
    Poisson family for count responses.
160
    
161
    Uses log link function and Poisson distribution for
162
    modeling count data and rate processes.
163
    """
164
    
165
    def __init__(self, priors=None):
166
        """
167
        Initialize Poisson family.
168
        
169
        Parameters:
170
        - priors: dict, prior specifications
171
        """
172
    
173
    @property
174
    def link(self):
175
        """Log link function: η = log(λ)"""
176

177
class NegativeBinomial:
178
    """
179
    Negative binomial family for overdispersed count data.
180
    
181
    Uses log link with additional dispersion parameter for
182
    modeling count data with variance greater than mean.
183
    """
184
    
185
    def __init__(self, priors=None):
186
        """
187
        Initialize NegativeBinomial family.
188
        
189
        Parameters:
190
        - priors: dict, prior specifications {'alpha': prior_dist}
191
        """
192

193
class Gamma:
194
    """
195
    Gamma family for positive continuous responses.
196
    
197
    Uses log link function and gamma distribution for
198
    modeling positive continuous variables like duration, cost.
199
    """
200
    
201
    def __init__(self, priors=None):
202
        """
203
        Initialize Gamma family.
204
        
205
        Parameters:
206
        - priors: dict, prior specifications {'alpha': prior_dist}
207
        """
208

209
class StudentT:
210
    """
211
    Student's t family for robust regression.
212
    
213
    Uses identity link with Student's t distribution for
214
    robust modeling of continuous responses with outliers.
215
    """
216
    
217
    def __init__(self, priors=None):
218
        """
219
        Initialize StudentT family.
220
        
221
        Parameters:
222
        - priors: dict, prior specifications {'nu': prior_dist, 'sigma': prior_dist}
223
        """
224
```
225

226
## Usage Examples
227

228
### Linear Regression with GLM
229

230
```python
231
import pymc3 as pm
232
import numpy as np
233
import pandas as pd
234

235
# Generate sample data
236
np.random.seed(42)
237
n = 100
238
X = np.random.randn(n, 3)
239
true_coef = np.array([1.5, -2.0, 0.5])
240
true_intercept = 0.3
241
y = true_intercept + X @ true_coef + np.random.normal(0, 0.5, n)
242

243
# GLM linear regression
244
with pm.Model() as linear_glm:
245
    # Automatic GLM construction
246
    glm = pm.GLM(y=y, X=X, 
247
                labels=['x1', 'x2', 'x3'],
248
                family=pm.glm.families.Normal())
249
    
250
    # Sample
251
    trace = pm.sample(1000, tune=1000)
252
    
253
    # Predictions
254
    X_new = np.random.randn(20, 3)
255
    predictions = glm.predict(X_new, trace=trace)
256

257
# Manual prior specification
258
with pm.Model() as linear_glm_priors:
259
    # Custom priors
260
    priors = {
261
        'Intercept': pm.Normal('Intercept', mu=0, sigma=10),
262
        'x1': pm.Normal('x1', mu=0, sigma=5),
263
        'x2': pm.Laplace('x2', mu=0, b=1),  # Regularized prior
264
        'x3': pm.Normal('x3', mu=0, sigma=5),
265
        'sigma': pm.HalfNormal('sigma', sigma=2)
266
    }
267
    
268
    glm = pm.GLM(y=y, X=X, 
269
                labels=['x1', 'x2', 'x3'],
270
                family=pm.glm.families.Normal(),
271
                priors=priors)
272
    
273
    trace = pm.sample(1000, tune=1000)
274
```
275

276
### Logistic Regression
277

278
```python
279
# Binary classification data
280
n_samples = 200
281
X_log = np.random.randn(n_samples, 4)
282
true_coef_log = np.array([1.2, -0.8, 0.5, -1.1])
283
true_intercept_log = -0.3
284

285
# Generate binary outcomes
286
linear_pred = true_intercept_log + X_log @ true_coef_log
287
prob = 1 / (1 + np.exp(-linear_pred))  # Logistic function
288
y_binary = np.random.binomial(1, prob)
289

290
# Logistic regression GLM
291
with pm.Model() as logistic_glm:
292
    glm = pm.GLM(y=y_binary, X=X_log,
293
                labels=['feature_1', 'feature_2', 'feature_3', 'feature_4'],
294
                family=pm.glm.families.Binomial())
295
    
296
    trace = pm.sample(1000, tune=1000)
297
    
298
    # Prediction probabilities
299
    X_test = np.random.randn(50, 4)
300
    pred_samples = glm.predict(X_test, trace=trace)
301
    
302
    # Extract probabilities
303
    prob_samples = pred_samples['y']  # Bernoulli samples
304
    prob_mean = prob_samples.mean(axis=0)
305

306
# Multi-trial binomial
307
n_trials = 20
308
y_binomial = np.random.binomial(n_trials, prob)
309

310
with pm.Model() as binomial_glm:
311
    glm = pm.GLM(y=y_binomial, X=X_log,
312
                labels=['feature_1', 'feature_2', 'feature_3', 'feature_4'],  
313
                family=pm.glm.families.Binomial(n=n_trials))
314
    
315
    trace = pm.sample(1000, tune=1000)
316
```
317

318
### Poisson Regression
319

320
```python
321
# Count data generation
322
X_count = np.random.randn(150, 2)
323
true_coef_count = np.array([0.8, -0.6])
324
true_intercept_count = 1.2
325

326
# Generate count outcomes
327
log_rate = true_intercept_count + X_count @ true_coef_count
328
rate = np.exp(log_rate)
329
y_counts = np.random.poisson(rate)
330

331
# Poisson regression GLM
332
with pm.Model() as poisson_glm:
333
    glm = pm.GLM(y=y_counts, X=X_count,
334
                labels=['exposure', 'treatment'],
335
                family=pm.glm.families.Poisson())
336
    
337
    trace = pm.sample(1000, tune=1000)
338
    
339
    # Prediction of rates
340
    X_count_new = np.array([[0.5, 1.0], [-1.0, 0.0], [1.5, -0.5]])
341
    count_pred = glm.predict(X_count_new, trace=trace)
342
    
343
    # Expected counts
344
    expected_counts = count_pred['y'].mean(axis=0)
345
    print("Expected counts:", expected_counts)
346
```
347

348
### Negative Binomial Regression for Overdispersed Counts
349

350
```python
351
# Overdispersed count data  
352
X_nb = np.random.randn(120, 3)
353
true_coef_nb = np.array([0.5, -0.3, 0.7])
354
log_mu = 2.0 + X_nb @ true_coef_nb
355
mu = np.exp(log_mu)
356

357
# Overdispersion parameter
358
alpha_true = 2.0
359
y_nb = np.random.negative_binomial(n=alpha_true, 
360
                                  p=alpha_true/(alpha_true + mu))
361

362
with pm.Model() as nb_glm:
363
    # Custom priors for dispersion
364
    priors = {
365
        'alpha': pm.Gamma('alpha', alpha=1, beta=1)  # Dispersion parameter
366
    }
367
    
368
    glm = pm.GLM(y=y_nb, X=X_nb,
369
                labels=['var1', 'var2', 'var3'],
370
                family=pm.glm.families.NegativeBinomial(),
371
                priors=priors)
372
    
373
    trace = pm.sample(1000, tune=1000)
374
    
375
    # Check for overdispersion
376
    alpha_post = trace.posterior['alpha'].values.flatten()
377
    print(f"Overdispersion parameter (mean): {alpha_post.mean():.3f}")
378
```
379

380
### Gamma Regression for Positive Continuous Data
381

382
```python
383
# Positive continuous response (e.g., waiting times, costs)
384
X_gamma = np.random.randn(100, 2)
385
true_coef_gamma = np.array([0.4, -0.2])
386
log_scale = 1.0 + X_gamma @ true_coef_gamma
387
scale = np.exp(log_scale)
388

389
# Gamma-distributed response
390
shape = 2.0
391
y_gamma = np.random.gamma(shape=shape, scale=scale/shape)
392

393
with pm.Model() as gamma_glm:
394
    # Priors for shape parameter
395
    priors = {
396
        'alpha': pm.Gamma('alpha', alpha=1, beta=1)  # Shape parameter
397
    }
398
    
399
    glm = pm.GLM(y=y_gamma, X=X_gamma,
400
                labels=['duration_factor', 'cost_driver'],
401
                family=pm.glm.families.Gamma(),
402
                priors=priors)
403
    
404
    trace = pm.sample(1000, tune=1000)
405
```
406

407
### Robust Regression with Student's t
408

409
```python
410
# Data with outliers
411
X_robust = np.random.randn(80, 2)
412
true_coef_robust = np.array([1.0, -0.5])
413
y_clean = 2.0 + X_robust @ true_coef_robust
414

415
# Add outliers
416
outlier_idx = np.random.choice(80, size=8, replace=False)
417
y_robust = y_clean.copy()
418
y_robust[outlier_idx] += np.random.normal(0, 5, len(outlier_idx))
419

420
with pm.Model() as robust_glm:
421
    # Student's t for robustness
422
    priors = {
423
        'nu': pm.Gamma('nu', alpha=2, beta=0.1),  # Degrees of freedom
424
        'sigma': pm.HalfNormal('sigma', sigma=2)
425
    }
426
    
427
    glm = pm.GLM(y=y_robust, X=X_robust,
428
                labels=['x1', 'x2'], 
429
                family=pm.glm.families.StudentT(),
430
                priors=priors)
431
    
432
    trace = pm.sample(1000, tune=1000)
433
    
434
    # Compare with normal GLM
435
    glm_normal = pm.GLM(y=y_robust, X=X_robust,
436
                       family=pm.glm.families.Normal())
437
    trace_normal = pm.sample(1000, tune=1000)
438
```
439

440
### Mixed Effects GLM Structure
441

442
```python
443
# Hierarchical/mixed effects structure using GLM components
444
n_groups = 5
445
n_per_group = 30
446
group_labels = np.repeat(range(n_groups), n_per_group)
447
n_total = len(group_labels)
448

449
# Group-level predictors
450
X_group = np.random.randn(n_total, 2)
451

452
# Individual-level predictors  
453
X_individual = np.random.randn(n_total, 1)
454

455
with pm.Model() as mixed_glm:
456
    # Fixed effects for individual-level predictors
457
    individual_component = pm.glm.LinearComponent(
458
        x=X_individual,
459
        labels=['individual_pred'],
460
        priors={'individual_pred': pm.Normal('individual_pred', 0, 1)}
461
    )
462
    individual_pred = individual_component.build('individual')
463
    
464
    # Random intercepts per group  
465
    group_intercept_sd = pm.HalfNormal('group_intercept_sd', sigma=1)
466
    group_intercepts = pm.Normal('group_intercepts', 
467
                                mu=0, 
468
                                sigma=group_intercept_sd, 
469
                                shape=n_groups)
470
    
471
    # Random slopes per group for one predictor
472
    group_slope_sd = pm.HalfNormal('group_slope_sd', sigma=1)
473
    group_slopes = pm.Normal('group_slopes',
474
                            mu=0,
475
                            sigma=group_slope_sd, 
476
                            shape=n_groups)
477
    
478
    # Combined linear predictor
479
    mu = (individual_pred + 
480
          group_intercepts[group_labels] + 
481
          group_slopes[group_labels] * X_group[:, 0])
482
    
483
    # Observation noise
484
    sigma = pm.HalfNormal('sigma', sigma=1)
485
    
486
    # Generate response data
487
    y_mixed = pm.Normal('y_mixed', mu=mu, sigma=sigma, observed=np.random.randn(n_total))
488
    
489
    trace = pm.sample(1000, tune=1000)
490
```
491

492
### GLM with Interaction Terms
493

494
```python
495
# Model with interaction effects
496
n_interact = 200
497
X1 = np.random.randn(n_interact)
498
X2 = np.random.binomial(1, 0.5, n_interact)  # Binary predictor
499
X_interact = np.column_stack([X1, X2, X1 * X2])  # Include interaction
500

501
# True coefficients including interaction
502
true_coef_interact = np.array([1.0, 0.5, -0.8])  # main effects + interaction
503
y_interact_true = 0.2 + X_interact @ true_coef_interact
504
y_interact = y_interact_true + np.random.normal(0, 0.3, n_interact)
505

506
with pm.Model() as interaction_glm:
507
    glm = pm.GLM(y=y_interact, X=X_interact,
508
                labels=['continuous', 'binary', 'interaction'],
509
                family=pm.glm.families.Normal())
510
    
511
    trace = pm.sample(1000, tune=1000)
512
    
513
    # Analyze interaction effect
514
    interaction_coef = trace.posterior['interaction'].values.flatten()
515
    print(f"Interaction effect: {interaction_coef.mean():.3f} ± {interaction_coef.std():.3f}")
516
```
517

518
### GLM Model Comparison
519

520
```python
521
# Compare different GLM families for same data
522
y_comparison = y_counts  # Use count data from earlier example
523
X_comparison = X_count
524

525
models_comparison = {}
526

527
# Poisson model
528
with pm.Model() as model_poisson:
529
    glm_pois = pm.GLM(y=y_comparison, X=X_comparison,
530
                     family=pm.glm.families.Poisson())
531
    trace_pois = pm.sample(1000, tune=1000, target_accept=0.9)
532
    
533
models_comparison['Poisson'] = trace_pois
534

535
# Negative binomial model
536
with pm.Model() as model_nb:
537
    glm_nb = pm.GLM(y=y_comparison, X=X_comparison,
538
                   family=pm.glm.families.NegativeBinomial())
539
    trace_nb = pm.sample(1000, tune=1000, target_accept=0.9)
540
    
541
models_comparison['NegBinom'] = trace_nb
542

543
# Model comparison using WAIC
544
import arviz as az
545
comparison = az.compare(models_comparison, ic='waic')
546
print("Model comparison:")
547
print(comparison)
548
```

Version

Tile

Files

glm.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

glm.mddocs/