or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

core-programming.mddistributions.mdgaussian-processes.mdindex.mdinference.mdneural-networks.mdoptimization.mdtransforms-constraints.md

gaussian-processes.mddocs/

0

# Gaussian Processes

1

2

Gaussian process models for non-parametric Bayesian modeling, providing flexible function approximation with uncertainty quantification through kernels, likelihoods, and efficient inference algorithms.

3

4

## Capabilities

5

6

### Kernel Functions

7

8

Kernel functions that define the covariance structure and prior assumptions about function smoothness and behavior in Gaussian process models.

9

10

```python { .api }

11

class Kernel:

12

"""

13

Base class for Gaussian process kernel functions.

14

15

Kernels define the covariance structure of Gaussian processes by

16

specifying how similar function values should be at different inputs.

17

"""

18

19

def forward(self, X: torch.Tensor, Z: torch.Tensor = None, diag: bool = False) -> torch.Tensor:

20

"""

21

Compute kernel matrix or diagonal.

22

23

Parameters:

24

- X (Tensor): First set of inputs of shape (n, input_dim)

25

- Z (Tensor, optional): Second set of inputs of shape (m, input_dim).

26

If None, uses X for both arguments (computes K(X, X))

27

- diag (bool): If True, return only diagonal elements as vector

28

29

Returns:

30

Tensor: Kernel matrix of shape (n, m) or diagonal vector of shape (n,)

31

32

Examples:

33

>>> kernel = RBF(input_dim=2)

34

>>> X = torch.randn(10, 2)

35

>>> K = kernel.forward(X) # Shape: (10, 10)

36

>>> diag_K = kernel.forward(X, diag=True) # Shape: (10,)

37

"""

38

39

class RBF(Kernel):

40

"""

41

Radial Basis Function (RBF) kernel, also known as Gaussian or squared exponential kernel.

42

43

k(x, x') = variance * exp(-0.5 * ||x - x'||^2 / lengthscale^2)

44

45

Encodes smooth function assumptions with characteristic lengthscale.

46

"""

47

48

def __init__(self, input_dim: int, variance: torch.Tensor = None,

49

lengthscale: torch.Tensor = None, active_dims: List[int] = None):

50

"""

51

Parameters:

52

- input_dim (int): Input dimensionality

53

- variance (Tensor, optional): Kernel variance/amplitude parameter

54

- lengthscale (Tensor, optional): Characteristic lengthscale parameter(s)

55

- active_dims (List[int], optional): Dimensions to apply kernel to

56

57

Examples:

58

>>> # Isotropic RBF (same lengthscale for all dimensions)

59

>>> rbf = RBF(input_dim=3, variance=2.0, lengthscale=1.5)

60

>>>

61

>>> # Anisotropic RBF (different lengthscale per dimension)

62

>>> rbf = RBF(input_dim=3, lengthscale=torch.tensor([1.0, 2.0, 0.5]))

63

>>>

64

>>> # Apply to subset of dimensions

65

>>> rbf = RBF(input_dim=5, active_dims=[0, 2, 4])

66

"""

67

68

class Matern32(Kernel):

69

"""

70

Matérn kernel with smoothness parameter ν = 3/2.

71

72

k(x, x') = variance * (1 + sqrt(3) * r) * exp(-sqrt(3) * r)

73

where r = ||x - x'|| / lengthscale

74

75

Less smooth than RBF, allowing for more flexible function shapes.

76

"""

77

78

def __init__(self, input_dim: int, variance: torch.Tensor = None,

79

lengthscale: torch.Tensor = None, active_dims: List[int] = None):

80

"""

81

Parameters:

82

- input_dim (int): Input dimensionality

83

- variance (Tensor, optional): Kernel variance parameter

84

- lengthscale (Tensor, optional): Characteristic lengthscale

85

- active_dims (List[int], optional): Active input dimensions

86

"""

87

88

class Matern52(Kernel):

89

"""

90

Matérn kernel with smoothness parameter ν = 5/2.

91

92

k(x, x') = variance * (1 + sqrt(5) * r + 5/3 * r^2) * exp(-sqrt(5) * r)

93

where r = ||x - x'|| / lengthscale

94

95

Smoother than Matern32 but less smooth than RBF.

96

"""

97

98

def __init__(self, input_dim: int, variance: torch.Tensor = None,

99

lengthscale: torch.Tensor = None, active_dims: List[int] = None):

100

"""Parameters same as Matern32."""

101

102

class Exponential(Kernel):

103

"""

104

Exponential kernel (Matérn with ν = 1/2).

105

106

k(x, x') = variance * exp(-r)

107

where r = ||x - x'|| / lengthscale

108

109

Generates rough, non-differentiable functions.

110

"""

111

112

def __init__(self, input_dim: int, variance: torch.Tensor = None,

113

lengthscale: torch.Tensor = None, active_dims: List[int] = None):

114

"""Parameters same as RBF."""

115

116

class Linear(Kernel):

117

"""

118

Linear kernel for linear function relationships.

119

120

k(x, x') = variance * x^T * x'

121

122

Encodes linear function assumptions.

123

"""

124

125

def __init__(self, input_dim: int, variance: torch.Tensor = None,

126

active_dims: List[int] = None):

127

"""

128

Parameters:

129

- input_dim (int): Input dimensionality

130

- variance (Tensor, optional): Kernel variance parameter

131

- active_dims (List[int], optional): Active input dimensions

132

"""

133

134

class Polynomial(Kernel):

135

"""

136

Polynomial kernel for polynomial function relationships.

137

138

k(x, x') = (variance * x^T * x' + bias)^degree

139

"""

140

141

def __init__(self, input_dim: int, degree: int = 2, variance: torch.Tensor = None,

142

bias: torch.Tensor = None, active_dims: List[int] = None):

143

"""

144

Parameters:

145

- input_dim (int): Input dimensionality

146

- degree (int): Polynomial degree

147

- variance (Tensor, optional): Kernel variance

148

- bias (Tensor, optional): Bias term

149

- active_dims (List[int], optional): Active dimensions

150

"""

151

152

class Periodic(Kernel):

153

"""

154

Periodic kernel for periodic function patterns.

155

156

k(x, x') = variance * exp(-2 * sin^2(π * ||x - x'|| / period) / lengthscale^2)

157

158

Encodes periodic structure with specified period.

159

"""

160

161

def __init__(self, input_dim: int, period: torch.Tensor, variance: torch.Tensor = None,

162

lengthscale: torch.Tensor = None, active_dims: List[int] = None):

163

"""

164

Parameters:

165

- input_dim (int): Input dimensionality

166

- period (Tensor): Period parameter for periodic structure

167

- variance (Tensor, optional): Kernel variance

168

- lengthscale (Tensor, optional): Lengthscale within periods

169

- active_dims (List[int], optional): Active dimensions

170

171

Examples:

172

>>> # Daily periodic pattern

173

>>> periodic = Periodic(input_dim=1, period=24.0)

174

>>>

175

>>> # Seasonal pattern

176

>>> periodic = Periodic(input_dim=1, period=365.25)

177

"""

178

179

class WhiteNoise(Kernel):

180

"""

181

White noise kernel for independent noise.

182

183

k(x, x') = variance * δ(x, x')

184

where δ is Kronecker delta (1 if x == x', 0 otherwise)

185

186

Models independent noise at each point.

187

"""

188

189

def __init__(self, input_dim: int, variance: torch.Tensor = None,

190

active_dims: List[int] = None):

191

"""

192

Parameters:

193

- input_dim (int): Input dimensionality

194

- variance (Tensor, optional): Noise variance

195

- active_dims (List[int], optional): Active dimensions

196

"""

197

198

class Constant(Kernel):

199

"""

200

Constant kernel that returns constant covariance.

201

202

k(x, x') = variance

203

204

Models constant function assumptions.

205

"""

206

207

def __init__(self, input_dim: int, variance: torch.Tensor = None,

208

active_dims: List[int] = None):

209

"""

210

Parameters:

211

- input_dim (int): Input dimensionality

212

- variance (Tensor, optional): Constant variance value

213

- active_dims (List[int], optional): Active dimensions

214

"""

215

```

216

217

### Kernel Operations

218

219

Operations for combining and modifying kernels to create complex covariance structures.

220

221

```python { .api }

222

class Sum(Kernel):

223

"""

224

Sum of multiple kernels: k(x, x') = k1(x, x') + k2(x, x') + ...

225

226

Combines different kernel behaviors additively.

227

"""

228

229

def __init__(self, kern1: Kernel, kern2: Kernel):

230

"""

231

Parameters:

232

- kern1 (Kernel): First kernel

233

- kern2 (Kernel): Second kernel

234

235

Examples:

236

>>> # Combine RBF and periodic components

237

>>> rbf = RBF(input_dim=1, lengthscale=1.0)

238

>>> periodic = Periodic(input_dim=1, period=12.0)

239

>>> combined = Sum(rbf, periodic)

240

"""

241

242

class Product(Kernel):

243

"""

244

Product of multiple kernels: k(x, x') = k1(x, x') * k2(x, x') * ...

245

246

Combines kernel behaviors multiplicatively.

247

"""

248

249

def __init__(self, kern1: Kernel, kern2: Kernel):

250

"""

251

Parameters:

252

- kern1 (Kernel): First kernel

253

- kern2 (Kernel): Second kernel

254

255

Examples:

256

>>> # Modulate RBF with periodic structure

257

>>> rbf = RBF(input_dim=1, lengthscale=2.0)

258

>>> periodic = Periodic(input_dim=1, period=7.0)

259

>>> modulated = Product(rbf, periodic)

260

"""

261

262

class Exponent(Kernel):

263

"""

264

Exponentiated kernel: k(x, x') = k_base(x, x')^exponent

265

266

Raises kernel values to a power.

267

"""

268

269

def __init__(self, kernel: Kernel, exponent: float):

270

"""

271

Parameters:

272

- kernel (Kernel): Base kernel

273

- exponent (float): Exponent value

274

"""

275

276

class VerticalScaling(Kernel):

277

"""

278

Vertically scale kernel: k(x, x') = scale * k_base(x, x')

279

280

Multiplies kernel by a scaling factor.

281

"""

282

283

def __init__(self, kernel: Kernel, scale: torch.Tensor):

284

"""

285

Parameters:

286

- kernel (Kernel): Base kernel to scale

287

- scale (Tensor): Scaling factor

288

"""

289

290

class Warping(Kernel):

291

"""

292

Apply input warping to kernel: k(x, x') = k_base(f(x), f(x'))

293

294

Transforms inputs before applying base kernel.

295

"""

296

297

def __init__(self, kernel: Kernel, iwarping_fn: callable):

298

"""

299

Parameters:

300

- kernel (Kernel): Base kernel

301

- iwarping_fn (callable): Input warping function

302

"""

303

```

304

305

### Likelihood Functions

306

307

Likelihood functions that define the observation model relating GP function values to observed data.

308

309

```python { .api }

310

class Likelihood:

311

"""

312

Base class for GP likelihood functions.

313

314

Defines how GP function values relate to observed data,

315

including noise models and observation transformations.

316

"""

317

318

def forward(self, function_dist: dist.Distribution, y: torch.Tensor = None) -> dist.Distribution:

319

"""

320

Forward pass through likelihood.

321

322

Parameters:

323

- function_dist (Distribution): GP function value distribution

324

- y (Tensor, optional): Observed data

325

326

Returns:

327

Distribution: Observation distribution

328

"""

329

330

class Gaussian(Likelihood):

331

"""

332

Gaussian likelihood for continuous observations with additive noise.

333

334

y = f(x) + ε, where ε ~ N(0, noise_variance)

335

336

Most common likelihood for regression problems.

337

"""

338

339

def __init__(self, noise: torch.Tensor = None, name: str = "Gaussian"):

340

"""

341

Parameters:

342

- noise (Tensor, optional): Noise variance parameter

343

- name (str): Likelihood name for parameter scoping

344

345

Examples:

346

>>> # Fixed noise variance

347

>>> likelihood = Gaussian(noise=0.1)

348

>>>

349

>>> # Learnable noise variance

350

>>> likelihood = Gaussian() # noise will be learned

351

"""

352

353

class Bernoulli(Likelihood):

354

"""

355

Bernoulli likelihood for binary classification.

356

357

p(y = 1 | f) = σ(f) where σ is sigmoid function

358

359

Maps GP function values to class probabilities.

360

"""

361

362

def __init__(self, name: str = "Bernoulli"):

363

"""

364

Parameters:

365

- name (str): Likelihood name for parameter scoping

366

"""

367

368

class Poisson(Likelihood):

369

"""

370

Poisson likelihood for count data.

371

372

p(y | f) = Poisson(exp(f))

373

374

Uses log-link to ensure positive rate parameter.

375

"""

376

377

def __init__(self, name: str = "Poisson"):

378

"""

379

Parameters:

380

- name (str): Likelihood name for parameter scoping

381

"""

382

383

class Beta(Likelihood):

384

"""

385

Beta likelihood for data on unit interval.

386

387

Useful for modeling proportions, rates, or probabilities.

388

"""

389

390

def __init__(self, name: str = "Beta"):

391

"""

392

Parameters:

393

- name (str): Likelihood name for parameter scoping

394

"""

395

396

class Gamma(Likelihood):

397

"""

398

Gamma likelihood for positive continuous data.

399

400

Useful for modeling positive quantities like waiting times.

401

"""

402

403

def __init__(self, name: str = "Gamma"):

404

"""

405

Parameters:

406

- name (str): Likelihood name for parameter scoping

407

"""

408

```

409

410

### GP Models

411

412

Complete Gaussian process models combining kernels and likelihoods for different modeling scenarios.

413

414

```python { .api }

415

class GPModel:

416

"""

417

Base Gaussian process model class.

418

419

Combines kernel functions and likelihood models to create

420

complete GP models for regression and classification.

421

"""

422

423

def __init__(self, X: torch.Tensor, y: torch.Tensor, kernel: Kernel,

424

likelihood: Likelihood, name: str = "GPModel"):

425

"""

426

Parameters:

427

- X (Tensor): Training inputs of shape (n, input_dim)

428

- y (Tensor): Training outputs of shape (n,) or (n, output_dim)

429

- kernel (Kernel): Covariance kernel function

430

- likelihood (Likelihood): Observation likelihood model

431

- name (str): Model name for parameter scoping

432

433

Examples:

434

>>> X_train = torch.randn(100, 2)

435

>>> y_train = torch.randn(100)

436

>>> kernel = RBF(input_dim=2)

437

>>> likelihood = Gaussian()

438

>>> gp = GPModel(X_train, y_train, kernel, likelihood)

439

"""

440

441

def model(self):

442

"""Define the GP generative model."""

443

444

def guide(self):

445

"""Define variational guide for approximate inference."""

446

447

def forward(self, X_new: torch.Tensor, full_cov: bool = False, noiseless: bool = True) -> dist.Distribution:

448

"""

449

Make predictions at new input locations.

450

451

Parameters:

452

- X_new (Tensor): New input locations of shape (m, input_dim)

453

- full_cov (bool): Whether to return full covariance matrix

454

- noiseless (bool): Whether to exclude observation noise from predictions

455

456

Returns:

457

Distribution: Predictive distribution at new locations

458

459

Examples:

460

>>> X_test = torch.randn(20, 2)

461

>>> pred_dist = gp.forward(X_test)

462

>>> pred_mean = pred_dist.mean

463

>>> pred_var = pred_dist.variance

464

"""

465

466

class VariationalGP(GPModel):

467

"""

468

Variational Gaussian process for scalable inference.

469

470

Uses sparse GP approximations with inducing points for

471

efficient inference on large datasets.

472

"""

473

474

def __init__(self, X: torch.Tensor, y: torch.Tensor, kernel: Kernel,

475

likelihood: Likelihood, X_u: torch.Tensor = None,

476

num_inducing: int = None, name: str = "VariationalGP"):

477

"""

478

Parameters:

479

- X (Tensor): Training inputs

480

- y (Tensor): Training outputs

481

- kernel (Kernel): Covariance kernel

482

- likelihood (Likelihood): Observation likelihood

483

- X_u (Tensor, optional): Inducing input locations

484

- num_inducing (int, optional): Number of inducing points (if X_u not provided)

485

- name (str): Model name

486

487

Examples:

488

>>> # Large dataset GP with 50 inducing points

489

>>> X_train = torch.randn(10000, 3)

490

>>> y_train = torch.randn(10000)

491

>>> vgp = VariationalGP(X_train, y_train, RBF(3), Gaussian(), num_inducing=50)

492

"""

493

494

class SparseGPRegression(VariationalGP):

495

"""

496

Sparse GP regression model using variational inference.

497

498

Optimized for regression tasks with Gaussian likelihoods

499

and large datasets.

500

"""

501

pass

502

503

class VariationalSparseGP(VariationalGP):

504

"""

505

General variational sparse GP with flexible likelihoods.

506

507

Supports non-Gaussian likelihoods through variational inference.

508

"""

509

pass

510

```

511

512

### Multi-Output GPs

513

514

Models for handling multiple outputs and structured output spaces.

515

516

```python { .api }

517

class MultiOutputGP(GPModel):

518

"""

519

Multi-output Gaussian process for vector-valued functions.

520

521

Models correlations between different output dimensions

522

using appropriate kernel structures.

523

"""

524

525

def __init__(self, X: torch.Tensor, y: torch.Tensor, kernel: Kernel,

526

likelihood: Likelihood, num_outputs: int, name: str = "MultiOutputGP"):

527

"""

528

Parameters:

529

- X (Tensor): Training inputs

530

- y (Tensor): Training outputs of shape (n, num_outputs)

531

- kernel (Kernel): Base kernel (will be extended for multiple outputs)

532

- likelihood (Likelihood): Output likelihood

533

- num_outputs (int): Number of output dimensions

534

- name (str): Model name

535

"""

536

537

class VariationalMultiOutputGP(MultiOutputGP):

538

"""

539

Variational multi-output GP for scalable multi-output modeling.

540

541

Combines multi-output structure with sparse GP approximations.

542

"""

543

pass

544

```

545

546

### Utility Functions

547

548

Helper functions and utilities for GP modeling and inference.

549

550

```python { .api }

551

def conditional(X_new: torch.Tensor, X_train: torch.Tensor, kernel: Kernel,

552

f_loc: torch.Tensor, f_scale_tril: torch.Tensor = None,

553

full_cov: bool = False, whiten: bool = False,

554

jitter: float = 1e-6) -> dist.Distribution:

555

"""

556

Compute conditional GP distribution p(f* | f, X*, X).

557

558

Parameters:

559

- X_new (Tensor): Test input locations

560

- X_train (Tensor): Training input locations

561

- kernel (Kernel): Covariance kernel

562

- f_loc (Tensor): Mean of training function values

563

- f_scale_tril (Tensor, optional): Cholesky factor of training covariance

564

- full_cov (bool): Whether to return full covariance

565

- whiten (bool): Whether to use whitened parameterization

566

- jitter (float): Jitter for numerical stability

567

568

Returns:

569

Distribution: Conditional GP distribution

570

"""

571

572

def util_gp_prior(X: torch.Tensor, kernel: Kernel, jitter: float = 1e-6) -> dist.Distribution:

573

"""

574

Compute GP prior distribution at given input locations.

575

576

Parameters:

577

- X (Tensor): Input locations

578

- kernel (Kernel): Covariance kernel

579

- jitter (float): Diagonal jitter for numerical stability

580

581

Returns:

582

Distribution: GP prior distribution

583

"""

584

585

def train_gp(gp_model: GPModel, optimizer, num_steps: int = 1000,

586

retain_graph: bool = False) -> List[float]:

587

"""

588

Train GP model using optimization.

589

590

Parameters:

591

- gp_model (GPModel): GP model to train

592

- optimizer: PyTorch optimizer

593

- num_steps (int): Number of optimization steps

594

- retain_graph (bool): Whether to retain computation graph

595

596

Returns:

597

List[float]: Training loss history

598

599

Examples:

600

>>> optimizer = torch.optim.Adam(gp.parameters(), lr=0.01)

601

>>> losses = train_gp(gp, optimizer, num_steps=500)

602

"""

603

```

604

605

## Examples

606

607

### Basic GP Regression

608

609

```python

610

import torch

611

import pyro

612

import pyro.distributions as dist

613

from pyro.contrib.gp import GPModel

614

from pyro.contrib.gp.kernels import RBF

615

from pyro.contrib.gp.likelihoods import Gaussian

616

617

# Generate training data

618

X_train = torch.linspace(0, 10, 50).unsqueeze(-1)

619

y_train = torch.sin(X_train.squeeze()) + 0.1 * torch.randn(50)

620

621

# Define GP model

622

kernel = RBF(input_dim=1, lengthscale=1.0, variance=1.0)

623

likelihood = Gaussian(noise=0.1)

624

gp = GPModel(X_train, y_train, kernel, likelihood)

625

626

# Training

627

optimizer = torch.optim.Adam(gp.parameters(), lr=0.01)

628

for i in range(1000):

629

optimizer.zero_grad()

630

loss = -gp.model().log_prob(y_train)

631

loss.backward()

632

optimizer.step()

633

634

# Prediction

635

X_test = torch.linspace(0, 12, 100).unsqueeze(-1)

636

with torch.no_grad():

637

pred_dist = gp.forward(X_test)

638

pred_mean = pred_dist.mean

639

pred_std = pred_dist.stddev

640

```

641

642

### Sparse GP for Large Datasets

643

644

```python

645

from pyro.contrib.gp import VariationalGP

646

647

# Large dataset

648

X_train = torch.randn(10000, 3)

649

y_train = torch.sin(X_train.sum(dim=1)) + 0.1 * torch.randn(10000)

650

651

# Sparse GP with inducing points

652

kernel = RBF(input_dim=3)

653

likelihood = Gaussian()

654

sparse_gp = VariationalGP(X_train, y_train, kernel, likelihood, num_inducing=100)

655

656

# Training with SVI

657

from pyro.infer import SVI, Trace_ELBO

658

from pyro.optim import Adam

659

660

svi = SVI(sparse_gp.model, sparse_gp.guide, Adam({"lr": 0.01}), Trace_ELBO())

661

662

for step in range(2000):

663

loss = svi.step()

664

if step % 200 == 0:

665

print(f"Step {step}, Loss: {loss}")

666

```

667

668

### Multi-Output GP

669

670

```python

671

from pyro.contrib.gp import MultiOutputGP

672

673

# Multi-dimensional output data

674

X_train = torch.randn(200, 2)

675

y_train = torch.randn(200, 3) # 3-dimensional output

676

677

# Multi-output GP

678

kernel = RBF(input_dim=2)

679

likelihood = Gaussian()

680

multi_gp = MultiOutputGP(X_train, y_train, kernel, likelihood, num_outputs=3)

681

682

# Training and prediction similar to basic GP

683

```

684

685

### Custom Kernel Combination

686

687

```python

688

from pyro.contrib.gp.kernels import RBF, Periodic, WhiteNoise, Sum, Product

689

690

# Complex kernel combining multiple components

691

base_kernel = RBF(input_dim=1, lengthscale=2.0, variance=1.0)

692

periodic_kernel = Periodic(input_dim=1, period=7.0, lengthscale=1.0)

693

noise_kernel = WhiteNoise(input_dim=1, variance=0.1)

694

695

# Combine kernels: RBF + Periodic pattern + independent noise

696

combined_kernel = Sum(Sum(base_kernel, periodic_kernel), noise_kernel)

697

698

# Use in GP model

699

gp = GPModel(X_train, y_train, combined_kernel, Gaussian())

700

```

701

702

### GP Classification

703

704

```python

705

from pyro.contrib.gp.likelihoods import Bernoulli

706

707

# Binary classification data

708

X_train = torch.randn(100, 2)

709

y_train = torch.randint(0, 2, (100,)).float()

710

711

# GP classifier

712

kernel = RBF(input_dim=2)

713

likelihood = Bernoulli()

714

gp_classifier = GPModel(X_train, y_train, kernel, likelihood)

715

716

# Training requires variational inference for non-Gaussian likelihood

717

from pyro.infer import SVI, Trace_ELBO

718

from pyro.optim import Adam

719

720

svi = SVI(gp_classifier.model, gp_classifier.guide, Adam({"lr": 0.01}), Trace_ELBO())

721

722

for step in range(1000):

723

loss = svi.step()

724

```