or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data-io.mdevaluation.mdfactors.mdindex.mdinference.mdlearning.mdmodels.md

learning.mddocs/

0

# Learning Algorithms

1

2

Algorithms for learning model structure from data and estimating parameters, including constraint-based, score-based, and hybrid approaches. pgmpy provides comprehensive learning capabilities for both structure discovery and parameter estimation.

3

4

## Capabilities

5

6

### Parameter Estimation

7

8

#### Maximum Likelihood Estimation

9

10

```python { .api }

11

class MaximumLikelihoodEstimator:

12

def __init__(self, model, data, **kwargs):

13

"""

14

Maximum likelihood parameter estimation.

15

16

Parameters:

17

- model: DiscreteBayesianNetwork with structure

18

- data: pandas.DataFrame with observed data

19

"""

20

21

def get_parameters(self, n_jobs=1, weighted=False):

22

"""

23

Estimate all model parameters.

24

25

Parameters:

26

- n_jobs: number of parallel jobs

27

- weighted: whether to use weighted estimation

28

29

Returns:

30

list: List of estimated TabularCPDs

31

"""

32

33

def estimate_cpd(self, node, weighted=False):

34

"""

35

Estimate CPD for a single node.

36

37

Parameters:

38

- node: variable name

39

- weighted: whether to use weighted estimation

40

41

Returns:

42

TabularCPD: Estimated conditional probability distribution

43

"""

44

```

45

46

#### Bayesian Parameter Estimation

47

48

```python { .api }

49

class BayesianEstimator:

50

def __init__(self, model, data, **kwargs):

51

"""Bayesian parameter estimation with priors."""

52

53

def get_parameters(self, prior_type='BDeu', equivalent_sample_size=10):

54

"""

55

Estimate parameters using Bayesian approach.

56

57

Parameters:

58

- prior_type: type of prior ('BDeu', 'K2', 'dirichlet')

59

- equivalent_sample_size: strength of prior belief

60

61

Returns:

62

list: List of posterior CPDs

63

"""

64

```

65

66

#### Expectation-Maximization

67

68

```python { .api }

69

class ExpectationMaximization:

70

def __init__(self, model, data, **kwargs):

71

"""EM algorithm for incomplete data."""

72

73

def get_parameters(self, max_iter=100, tol=1e-4):

74

"""

75

Estimate parameters using EM algorithm.

76

77

Parameters:

78

- max_iter: maximum iterations

79

- tol: convergence tolerance

80

81

Returns:

82

list: Estimated CPDs

83

"""

84

```

85

86

### Structure Learning

87

88

#### Hill Climbing Search

89

90

```python { .api }

91

class HillClimbSearch:

92

def __init__(self, data, use_cache=True, **kwargs):

93

"""

94

Hill climbing structure search algorithm.

95

96

Parameters:

97

- data: pandas.DataFrame with observed data

98

- use_cache: whether to cache scoring computations

99

"""

100

101

def estimate(self, start=None, tabu_length=0, max_indegree=None,

102

show_progress=True):

103

"""

104

Learn structure using hill climbing.

105

106

Parameters:

107

- start: initial graph structure

108

- tabu_length: length of tabu list

109

- max_indegree: maximum parent set size

110

- show_progress: whether to show progress

111

112

Returns:

113

DiscreteBayesianNetwork: Learned structure

114

"""

115

```

116

117

#### Constraint-Based Methods

118

119

```python { .api }

120

class PC:

121

def __init__(self, data):

122

"""

123

PC algorithm for causal discovery.

124

125

Parameters:

126

- data: pandas.DataFrame with observed data

127

"""

128

129

def estimate(self, variant="stable", ci_test="chi_square",

130

significance_level=0.05, show_progress=True):

131

"""

132

Learn structure using PC algorithm.

133

134

Parameters:

135

- variant: PC variant ('orig', 'stable', 'parallel')

136

- ci_test: conditional independence test

137

- significance_level: significance threshold

138

- show_progress: whether to show progress

139

140

Returns:

141

DiscreteBayesianNetwork: Learned causal structure

142

"""

143

```

144

145

#### Greedy Equivalence Search

146

147

```python { .api }

148

class GES:

149

def __init__(self, data):

150

"""Greedy Equivalence Search algorithm."""

151

152

def estimate(self, scoring_method='bic', phase1=True, phase2=True):

153

"""

154

Learn structure using GES.

155

156

Parameters:

157

- scoring_method: scoring function to use

158

- phase1: whether to perform forward phase

159

- phase2: whether to perform backward phase

160

161

Returns:

162

DiscreteBayesianNetwork: Learned structure

163

"""

164

```

165

166

### Structure Scoring

167

168

```python { .api }

169

class StructureScore:

170

def __init__(self, data):

171

"""Base class for structure scoring methods."""

172

173

def score(self, model):

174

"""

175

Compute structure score.

176

177

Parameters:

178

- model: DiscreteBayesianNetwork to score

179

180

Returns:

181

float: Structure score

182

"""

183

184

# Bayesian scores

185

class K2:

186

def __init__(self, data):

187

"""K2 score for structure evaluation."""

188

189

class BDeu:

190

def __init__(self, data, equivalent_sample_size=10):

191

"""BDeu score with equivalent sample size."""

192

193

class BDs:

194

def __init__(self, data):

195

"""BDs score for structure evaluation."""

196

197

# Information criterion scores

198

class BIC:

199

def __init__(self, data):

200

"""Bayesian Information Criterion."""

201

202

class AIC:

203

def __init__(self, data):

204

"""Akaike Information Criterion."""

205

206

# Gaussian scores

207

class BICGauss:

208

def __init__(self, data):

209

"""BIC for Gaussian data."""

210

211

class AICGauss:

212

def __init__(self, data):

213

"""AIC for Gaussian data."""

214

```

215

216

### Advanced Learning Methods

217

218

```python { .api }

219

class MmhcEstimator:

220

def __init__(self, data):

221

"""Max-Min Hill Climbing estimator."""

222

223

def estimate(self, significance_level=0.05):

224

"""Learn structure using MMHC algorithm."""

225

226

class SEMEstimator:

227

def __init__(self, data):

228

"""Structural Equation Model estimator."""

229

230

def estimate(self, method='2sls'):

231

"""

232

Estimate SEM parameters.

233

234

Parameters:

235

- method: estimation method ('2sls', 'fiml')

236

237

Returns:

238

SEM: Estimated structural equation model

239

"""

240

241

class IVEstimator:

242

def __init__(self, data):

243

"""Instrumental Variable estimator."""

244

245

def estimate(self, instrument, treatment, outcome):

246

"""Estimate causal effect using IV method."""

247

248

class ExpertInLoop:

249

def __init__(self, data):

250

"""Interactive structure learning with expert guidance."""

251

252

def estimate(self, expert_knowledge=None):

253

"""Learn structure with expert input."""

254

```

255

256

### Additional Structure Learning Algorithms

257

258

Advanced and specialized structure learning methods.

259

260

```python { .api }

261

class TreeSearch:

262

def __init__(self, data, use_cache=True):

263

"""

264

Tree-based structure search algorithm.

265

266

Parameters:

267

- data: pandas.DataFrame containing the data

268

- use_cache: whether to use caching for efficiency

269

"""

270

271

def estimate(self, start=None, max_indegree=None):

272

"""Estimate optimal tree structure."""

273

274

class ExhaustiveSearch:

275

def __init__(self, data, use_cache=True):

276

"""

277

Exhaustive search over all possible structures.

278

279

Parameters:

280

- data: pandas.DataFrame containing the data

281

- use_cache: whether to use caching for efficiency

282

"""

283

284

def estimate(self, scoring_method="bic", max_indegree=None):

285

"""Perform exhaustive structure search."""

286

287

class MirrorDescentEstimator:

288

def __init__(self, model, data):

289

"""

290

Mirror descent optimization for parameter estimation.

291

292

Parameters:

293

- model: model structure

294

- data: training data

295

"""

296

297

def get_parameters(self, n_jobs=1):

298

"""Estimate parameters using mirror descent."""

299

300

class ExpertKnowledge:

301

def __init__(self, data):

302

"""

303

Structure learning incorporating expert domain knowledge.

304

305

Parameters:

306

- data: pandas.DataFrame containing the data

307

"""

308

309

def estimate(self, must_have_edges=[], forbidden_edges=[],

310

must_not_have_edges=[]):

311

"""

312

Learn structure with expert constraints.

313

314

Parameters:

315

- must_have_edges: list of edges that must be present

316

- forbidden_edges: list of edges that are forbidden

317

- must_not_have_edges: alias for forbidden_edges

318

319

Returns:

320

Learned model structure

321

"""

322

```

323

324

### Additional Scoring Methods

325

326

Extended scoring functions for different variable types and model assumptions.

327

328

```python { .api }

329

class BICCondGauss:

330

def __init__(self, data):

331

"""BIC score for conditional Gaussian models."""

332

333

def score(self, model):

334

"""Compute BIC score for conditional Gaussian model."""

335

336

class AICCondGauss:

337

def __init__(self, data):

338

"""AIC score for conditional Gaussian models."""

339

340

def score(self, model):

341

"""Compute AIC score for conditional Gaussian model."""

342

343

class LogLikelihoodGauss:

344

def __init__(self, data):

345

"""Log-likelihood score for Gaussian models."""

346

347

def score(self, model):

348

"""Compute log-likelihood score for Gaussian model."""

349

350

class LogLikelihoodCondGauss:

351

def __init__(self, data):

352

"""Log-likelihood score for conditional Gaussian models."""

353

354

def score(self, model):

355

"""Compute log-likelihood score for conditional Gaussian model."""

356

```

357

358

## Usage Examples

359

360

### Parameter Learning

361

362

```python

363

from pgmpy.models import DiscreteBayesianNetwork

364

from pgmpy.estimators import MaximumLikelihoodEstimator

365

import pandas as pd

366

367

# Assume we have data and a model structure

368

data = pd.DataFrame({

369

'A': [0, 1, 0, 1, 1],

370

'B': [1, 0, 1, 0, 1],

371

'C': [0, 1, 1, 0, 1]

372

})

373

374

model = DiscreteBayesianNetwork([('A', 'C'), ('B', 'C')])

375

376

# Learn parameters

377

estimator = MaximumLikelihoodEstimator(model, data)

378

cpds = estimator.get_parameters()

379

380

# Add learned CPDs to model

381

model.add_cpds(*cpds)

382

```

383

384

### Structure Learning

385

386

```python

387

from pgmpy.estimators import HillClimbSearch, PC

388

from pgmpy.estimators import BICScore

389

390

# Score-based structure learning

391

hc = HillClimbSearch(data)

392

best_model = hc.estimate()

393

394

# Constraint-based structure learning

395

pc = PC(data)

396

causal_model = pc.estimate(ci_test="chi_square", significance_level=0.01)

397

398

# Structure scoring

399

scoring_method = BICScore(data)

400

score = scoring_method.score(best_model)

401

```