or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data-io.mdevaluation.mdfactors.mdindex.mdinference.mdlearning.mdmodels.md

factors.mddocs/

0

# Probability Factors

1

2

Representations of probability distributions including discrete factors, conditional probability distributions, and continuous distributions. Factors are the fundamental building blocks for representing uncertainty in probabilistic graphical models.

3

4

## Capabilities

5

6

### Discrete Factors

7

8

Core discrete probability factor class for representing joint probability distributions over discrete variables.

9

10

```python { .api }

11

class DiscreteFactor:

12

def __init__(self, variables, cardinality, values, state_names={}):

13

"""

14

Create a discrete factor over specified variables.

15

16

Parameters:

17

- variables: list of variable names

18

- cardinality: list of variable cardinalities (number of states)

19

- values: factor values as flat array or nested array

20

- state_names: dict mapping variables to their state names

21

"""

22

23

def scope(self):

24

"""

25

Get the variables in the factor's scope.

26

27

Returns:

28

list: Variable names in the factor

29

"""

30

31

def get_cardinality(self, variables):

32

"""Get cardinalities of specified variables."""

33

34

def get_value(self, **kwargs):

35

"""

36

Get factor value for specific variable assignments.

37

38

Parameters:

39

- kwargs: variable assignments as keyword arguments

40

41

Returns:

42

float: Factor value for the assignment

43

"""

44

45

def set_value(self, value, **kwargs):

46

"""Set factor value for specific variable assignments."""

47

48

def marginalize(self, variables, inplace=True):

49

"""

50

Sum out variables from the factor.

51

52

Parameters:

53

- variables: list of variables to marginalize out

54

- inplace: whether to modify factor in-place

55

56

Returns:

57

DiscreteFactor: Marginalized factor

58

"""

59

60

def maximize(self, variables, inplace=True):

61

"""

62

Maximize out variables (max-marginalization).

63

64

Parameters:

65

- variables: list of variables to maximize out

66

- inplace: whether to modify factor in-place

67

68

Returns:

69

DiscreteFactor: Maximized factor

70

"""

71

72

def normalize(self, inplace=True):

73

"""

74

Normalize factor values to sum to 1.

75

76

Parameters:

77

- inplace: whether to modify factor in-place

78

79

Returns:

80

DiscreteFactor: Normalized factor

81

"""

82

83

def reduce(self, values, inplace=True, show_warnings=True):

84

"""

85

Condition the factor on observed evidence.

86

87

Parameters:

88

- values: dict of variable assignments {variable: value}

89

- inplace: whether to modify factor in-place

90

- show_warnings: whether to show warnings

91

92

Returns:

93

DiscreteFactor: Reduced factor

94

"""

95

96

def sum(self, phi1, inplace=True):

97

"""Add another factor to this factor."""

98

99

def product(self, phi1, inplace=True):

100

"""

101

Multiply this factor with another factor.

102

103

Parameters:

104

- phi1: DiscreteFactor to multiply with

105

- inplace: whether to modify factor in-place

106

107

Returns:

108

DiscreteFactor: Product factor

109

"""

110

111

def divide(self, phi1, inplace=True):

112

"""Divide this factor by another factor."""

113

114

def sample(self, n, seed=None):

115

"""

116

Generate samples from the factor distribution.

117

118

Parameters:

119

- n: number of samples to generate

120

- seed: random seed for reproducibility

121

122

Returns:

123

pandas.DataFrame: Generated samples

124

"""

125

126

def copy(self):

127

"""Create deep copy of the factor."""

128

```

129

130

### Conditional Probability Distributions

131

132

Specialized factors representing conditional probability distributions P(X|Parents).

133

134

```python { .api }

135

class TabularCPD:

136

def __init__(self, variable, variable_card, values, evidence=None,

137

evidence_card=None, state_names={}):

138

"""

139

Create a tabular conditional probability distribution.

140

141

Parameters:

142

- variable: name of the variable this CPD represents

143

- variable_card: cardinality of the variable

144

- values: CPD values as 2D array

145

- evidence: list of parent variable names

146

- evidence_card: list of parent variable cardinalities

147

- state_names: dict mapping variables to state names

148

"""

149

150

def get_values(self):

151

"""

152

Get the CPD values.

153

154

Returns:

155

numpy.ndarray: CPD probability values

156

"""

157

158

def normalize(self, inplace=True):

159

"""

160

Normalize CPD so each column sums to 1.

161

162

Parameters:

163

- inplace: whether to modify CPD in-place

164

165

Returns:

166

TabularCPD: Normalized CPD

167

"""

168

169

def marginalize(self, variables, inplace=True):

170

"""Marginalize over specified variables."""

171

172

def reduce(self, values, inplace=True, show_warnings=True):

173

"""

174

Condition CPD on observed evidence.

175

176

Parameters:

177

- values: dict of evidence {variable: value}

178

- inplace: whether to modify CPD in-place

179

- show_warnings: whether to show warnings

180

181

Returns:

182

TabularCPD: Reduced CPD

183

"""

184

185

def to_factor(self):

186

"""

187

Convert CPD to a DiscreteFactor.

188

189

Returns:

190

DiscreteFactor: Equivalent factor representation

191

"""

192

193

def reorder_parents(self, new_order, inplace=True):

194

"""Reorder parent variables to match specified order."""

195

196

def get_evidence(self):

197

"""

198

Get list of parent (evidence) variables.

199

200

Returns:

201

list: Parent variable names

202

"""

203

204

def copy(self):

205

"""Create deep copy of the CPD."""

206

207

def to_csv(self, filename):

208

"""Save CPD to CSV file."""

209

210

def to_dataframe(self):

211

"""

212

Convert CPD to pandas DataFrame.

213

214

Returns:

215

pandas.DataFrame: CPD as DataFrame

216

"""

217

```

218

219

### State Representation

220

221

Utility class for representing variable states.

222

223

```python { .api }

224

class State:

225

def __init__(self, variable, state):

226

"""

227

Create a variable state.

228

229

Parameters:

230

- variable: variable name

231

- state: state value/index

232

"""

233

234

def __eq__(self, other):

235

"""Check equality with another State."""

236

237

def __hash__(self):

238

"""Hash function for use in sets/dictionaries."""

239

```

240

241

### Joint Probability Distributions

242

243

Full joint probability distributions over all variables.

244

245

```python { .api }

246

class JointProbabilityDistribution:

247

def __init__(self, variables, cardinality, values):

248

"""

249

Create a joint probability distribution.

250

251

Parameters:

252

- variables: list of all variable names

253

- cardinality: list of variable cardinalities

254

- values: joint probability values

255

"""

256

257

def marginal_distribution(self, variables, inplace=True):

258

"""

259

Get marginal distribution over specified variables.

260

261

Parameters:

262

- variables: list of variables for marginal

263

- inplace: whether to modify distribution in-place

264

265

Returns:

266

JointProbabilityDistribution: Marginal distribution

267

"""

268

269

def conditional_distribution(self, variables, evidence):

270

"""

271

Get conditional distribution P(variables|evidence).

272

273

Parameters:

274

- variables: list of query variables

275

- evidence: dict of evidence {variable: value}

276

277

Returns:

278

JointProbabilityDistribution: Conditional distribution

279

"""

280

281

def check_normalization(self):

282

"""Check if distribution is properly normalized."""

283

284

def normalize(self, inplace=True):

285

"""Normalize distribution to sum to 1."""

286

287

def copy(self):

288

"""Create deep copy of the distribution."""

289

```

290

291

### Noisy-OR CPDs

292

293

Specialized CPDs implementing the noisy-OR model for causal relationships.

294

295

```python { .api }

296

class NoisyORCPD:

297

def __init__(self, variable, evidence_card, leak_probability,

298

inhibition_probs):

299

"""

300

Create a Noisy-OR conditional probability distribution.

301

302

Parameters:

303

- variable: child variable name

304

- evidence_card: list of parent variable cardinalities

305

- leak_probability: probability of effect without any cause

306

- inhibition_probs: list of inhibition probabilities for each parent

307

"""

308

309

def to_tabular_cpd(self):

310

"""

311

Convert to equivalent TabularCPD.

312

313

Returns:

314

TabularCPD: Equivalent tabular representation

315

"""

316

317

def copy(self):

318

"""Create deep copy of the Noisy-OR CPD."""

319

```

320

321

### Continuous Factors

322

323

Linear Gaussian conditional probability distributions for continuous variables.

324

325

```python { .api }

326

class LinearGaussianCPD:

327

def __init__(self, variable, beta, std, evidence=[]):

328

"""

329

Create a Linear Gaussian CPD for continuous variables.

330

331

Parameters:

332

- variable: name of the continuous variable

333

- beta: regression coefficients (including intercept)

334

- std: standard deviation of the noise

335

- evidence: list of parent variable names

336

"""

337

338

def copy(self):

339

"""Create deep copy of the Linear Gaussian CPD."""

340

341

@staticmethod

342

def get_random(variable, evidence, loc=0.0, scale=1.0, seed=None):

343

"""

344

Generate random Linear Gaussian CPD.

345

346

Parameters:

347

- variable: variable name

348

- evidence: list of parent variables

349

- loc: mean of coefficient distribution

350

- scale: standard deviation of coefficient distribution

351

- seed: random seed

352

353

Returns:

354

LinearGaussianCPD: Random Linear Gaussian CPD

355

"""

356

```

357

358

### Hybrid Factors

359

360

Functional CPDs that can represent arbitrary distributions using functional forms.

361

362

```python { .api }

363

class FunctionalCPD:

364

def __init__(self, variable, evidence, distribution_fn, *args, **kwargs):

365

"""

366

Create a Functional CPD using arbitrary distribution functions.

367

368

Parameters:

369

- variable: name of the variable

370

- evidence: list of parent variable names

371

- distribution_fn: function returning pyro.distributions object

372

- args, kwargs: additional arguments for the distribution function

373

"""

374

375

def copy(self):

376

"""Create deep copy of the Functional CPD."""

377

378

def sample(self, evidence_dict=None, size=1, seed=None):

379

"""

380

Sample from the functional CPD given evidence.

381

382

Parameters:

383

- evidence_dict: dict of evidence values for parent variables

384

- size: number of samples to generate

385

- seed: random seed

386

387

Returns:

388

numpy.ndarray: Generated samples

389

"""

390

```

391

392

### Factor Operations

393

394

Utility functions for factor manipulation.

395

396

```python { .api }

397

def factor_product(*factors):

398

"""

399

Compute product of multiple factors.

400

401

Parameters:

402

- factors: DiscreteFactor objects to multiply

403

404

Returns:

405

DiscreteFactor: Product of all factors

406

"""

407

408

def factor_divide(phi1, phi2):

409

"""

410

Divide one factor by another factor.

411

412

Parameters:

413

- phi1: numerator factor

414

- phi2: denominator factor

415

416

Returns:

417

DiscreteFactor: Division result

418

"""

419

420

def factor_sum_product(factors, variables):

421

"""

422

Compute sum-product of factors over specified variables.

423

424

Parameters:

425

- factors: list of DiscreteFactor objects

426

- variables: list of variables to sum out

427

428

Returns:

429

DiscreteFactor: Result of sum-product operation

430

"""

431

```

432

433

### Factor Collections

434

435

Classes for managing collections of factors.

436

437

```python { .api }

438

class FactorSet:

439

def __init__(self, factors=None):

440

"""

441

Create a set of factors.

442

443

Parameters:

444

- factors: list of DiscreteFactor objects

445

"""

446

447

def product(self, inplace=True):

448

"""Compute product of all factors in the set."""

449

450

def marginalize(self, variables, inplace=True):

451

"""Marginalize variables from all factors."""

452

453

def reduce(self, values, inplace=True):

454

"""Reduce all factors with evidence."""

455

456

def factorset_product(*factor_sets):

457

"""Compute product of multiple factor sets."""

458

459

def factorset_divide(factor_set1, factor_set2):

460

"""Divide one factor set by another."""

461

462

class FactorDict:

463

def __init__(self, factors=None):

464

"""Dictionary-based factor collection with variable indexing."""

465

466

def get_factors(self, variables):

467

"""Get factors involving specified variables."""

468

469

def add_factors(self, *factors):

470

"""Add factors to the collection."""

471

```

472

473

## Usage Examples

474

475

### Creating and Manipulating Discrete Factors

476

477

```python

478

from pgmpy.factors.discrete import DiscreteFactor

479

480

# Create a factor over variables A and B

481

factor_ab = DiscreteFactor(['A', 'B'], [2, 2], [0.8, 0.2, 0.3, 0.7])

482

483

# Create another factor over variables B and C

484

factor_bc = DiscreteFactor(['B', 'C'], [2, 2], [0.9, 0.1, 0.4, 0.6])

485

486

# Multiply factors

487

result = factor_ab.product(factor_bc, inplace=False)

488

489

# Marginalize out variable B

490

marginal_ac = result.marginalize(['B'], inplace=False)

491

492

# Condition on evidence

493

evidence = {'A': 1}

494

conditional = result.reduce(evidence, inplace=False)

495

```

496

497

### Working with CPDs

498

499

```python

500

from pgmpy.factors.discrete import TabularCPD

501

import numpy as np

502

503

# Create CPD for P(Grade | Difficulty, Intelligence)

504

values = np.array([[0.3, 0.4, 0.9, 0.08], # Grade = 0

505

[0.4, 0.25, 0.08, 0.02], # Grade = 1

506

[0.3, 0.35, 0.02, 0.9]]) # Grade = 2

507

508

cpd = TabularCPD(variable='Grade', variable_card=3, values=values,

509

evidence=['Difficulty', 'Intelligence'],

510

evidence_card=[2, 2],

511

state_names={'Grade': ['A', 'B', 'C'],

512

'Difficulty': ['Easy', 'Hard'],

513

'Intelligence': ['Low', 'High']})

514

515

# Check if CPD is normalized

516

print(cpd.get_values().sum(axis=0)) # Should sum to 1 for each column

517

518

# Convert to factor

519

factor = cpd.to_factor()

520

521

# Export to CSV

522

cpd.to_csv('grade_cpd.csv')

523

```

524

525

### Linear Gaussian CPDs for Continuous Variables

526

527

```python

528

from pgmpy.factors.continuous import LinearGaussianCPD

529

530

# Create P(Y | X) = N(2 + 1.5*X, σ=0.5)

531

cpd_y = LinearGaussianCPD(variable='Y',

532

beta=[2, 1.5], # intercept + coefficient

533

std=0.5,

534

evidence=['X'])

535

536

# Generate random Linear Gaussian CPD

537

random_cpd = LinearGaussianCPD.get_random('Z', ['X', 'Y'], seed=42)

538

```