or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data-io.mdevaluation.mdfactors.mdindex.mdinference.mdlearning.mdmodels.md

data-io.mddocs/

0

# Data I/O and Sampling

1

2

File I/O capabilities for various formats and sampling algorithms for generating data from probabilistic models. pgmpy supports multiple file formats and provides comprehensive sampling methods.

3

4

## Capabilities

5

6

### File Format Readers and Writers

7

8

#### BIF Format (Bayesian Interchange Format)

9

10

```python { .api }

11

class BIFReader:

12

def __init__(self, path):

13

"""

14

Read Bayesian networks from BIF format.

15

16

Parameters:

17

- path: file path to BIF file

18

"""

19

20

def get_model(self):

21

"""

22

Parse BIF file and create model.

23

24

Returns:

25

DiscreteBayesianNetwork: Parsed model

26

"""

27

28

class BIFWriter:

29

def __init__(self, model):

30

"""

31

Write Bayesian networks to BIF format.

32

33

Parameters:

34

- model: DiscreteBayesianNetwork to write

35

"""

36

37

def write_bif(self, filename):

38

"""

39

Write model to BIF file.

40

41

Parameters:

42

- filename: output file path

43

"""

44

```

45

46

#### XML-BIF Format

47

48

```python { .api }

49

class XMLBIFReader:

50

def __init__(self, path):

51

"""Read XML BIF format files."""

52

53

def get_model(self):

54

"""Parse XML BIF and create model."""

55

56

class XMLBIFWriter:

57

def __init__(self, model):

58

"""Write XML BIF format files."""

59

60

def write_xmlbif(self, filename):

61

"""Write model in XML BIF format."""

62

```

63

64

#### Other Supported Formats

65

66

```python { .api }

67

# XDSL (GeNIe format)

68

class XDSLReader:

69

def __init__(self, path):

70

"""Read GeNIe XDSL format."""

71

72

class XDSLWriter:

73

def __init__(self, model):

74

"""Write GeNIe XDSL format."""

75

76

# NET (Microsoft format)

77

class NETReader:

78

def __init__(self, path):

79

"""Read Microsoft NET format."""

80

81

class NETWriter:

82

def __init__(self, model):

83

"""Write Microsoft NET format."""

84

85

# UAI format

86

class UAIReader:

87

def __init__(self, path):

88

"""Read UAI competition format."""

89

90

class UAIWriter:

91

def __init__(self, model):

92

"""Write UAI competition format."""

93

```

94

95

#### XBN Format

96

97

```python { .api }

98

class XBNReader:

99

def __init__(self, path):

100

"""

101

Read Bayesian networks from XBN format.

102

103

Parameters:

104

- path: file path to XBN file

105

"""

106

107

def get_model(self):

108

"""Parse XBN file and create model."""

109

110

class XBNWriter:

111

def __init__(self, model):

112

"""

113

Write Bayesian networks to XBN format.

114

115

Parameters:

116

- model: DiscreteBayesianNetwork to write

117

"""

118

119

def write_xbn(self, filename):

120

"""Write model to XBN file."""

121

```

122

123

#### PomdpX Format

124

125

```python { .api }

126

class PomdpXReader:

127

def __init__(self, path):

128

"""

129

Read models from PomdpX format.

130

131

Parameters:

132

- path: file path to PomdpX file

133

"""

134

135

def get_model(self):

136

"""Parse PomdpX file and create model."""

137

138

class PomdpXWriter:

139

def __init__(self, model):

140

"""

141

Write models to PomdpX format.

142

143

Parameters:

144

- model: model to write

145

"""

146

147

def write_pomdpx(self, filename):

148

"""Write model to PomdpX file."""

149

```

150

151

### Sampling Algorithms

152

153

#### Forward Sampling

154

155

```python { .api }

156

class BayesianModelSampling:

157

def __init__(self, model):

158

"""

159

Sampling algorithms for Bayesian networks.

160

161

Parameters:

162

- model: DiscreteBayesianNetwork to sample from

163

"""

164

165

def forward_sample(self, size=1, seed=None, include_latents=False,

166

partial_samples=None, show_progress=True):

167

"""

168

Generate samples using forward sampling.

169

170

Parameters:

171

- size: number of samples to generate

172

- seed: random seed for reproducibility

173

- include_latents: whether to include latent variables

174

- partial_samples: DataFrame with partial variable assignments

175

- show_progress: whether to show progress bar

176

177

Returns:

178

pandas.DataFrame: Generated samples

179

"""

180

181

def rejection_sample(self, evidence=[], size=1, seed=None,

182

include_latents=False, show_progress=True):

183

"""

184

Generate samples using rejection sampling.

185

186

Parameters:

187

- evidence: list of State objects representing evidence

188

- size: number of samples to generate

189

- seed: random seed

190

- include_latents: whether to include latent variables

191

- show_progress: whether to show progress bar

192

193

Returns:

194

pandas.DataFrame: Samples consistent with evidence

195

"""

196

197

def likelihood_weighted_sample(self, evidence=[], size=1, seed=None,

198

include_latents=False, show_progress=True):

199

"""

200

Generate weighted samples using likelihood weighting.

201

202

Parameters:

203

- evidence: list of evidence State objects

204

- size: number of samples

205

- seed: random seed

206

- include_latents: whether to include latents

207

- show_progress: whether to show progress bar

208

209

Returns:

210

pandas.DataFrame: Weighted samples with 'weight' column

211

"""

212

```

213

214

#### MCMC Sampling

215

216

```python { .api }

217

class GibbsSampling:

218

def __init__(self, model=None):

219

"""

220

Gibbs sampling for MCMC-based inference.

221

222

Parameters:

223

- model: DiscreteBayesianNetwork or MarkovNetwork

224

"""

225

226

def sample(self, start_state=None, size=1, seed=None, include_latents=False):

227

"""

228

Generate samples using Gibbs sampling MCMC.

229

230

Parameters:

231

- start_state: initial state for Markov chain

232

- size: number of samples to generate

233

- seed: random seed

234

- include_latents: whether to include latent variables

235

236

Returns:

237

pandas.DataFrame: MCMC samples from posterior

238

"""

239

240

def generate_sample(self, start_state=None, size=1, seed=None, include_latents=False):

241

"""Generate single sample from current chain state."""

242

```

243

244

### Utility Functions

245

246

```python { .api }

247

def _return_samples(samples, return_type='dataframe'):

248

"""

249

Utility function for formatting sample output.

250

251

Parameters:

252

- samples: raw sample data

253

- return_type: format for returned samples

254

255

Returns:

256

pandas.DataFrame or dict: Formatted samples

257

"""

258

259

# Data processing utilities

260

def discretize(data, cardinality, labels=dict(), method="rounding"):

261

"""

262

Discretize continuous data into discrete bins.

263

264

Parameters:

265

- data: pandas.DataFrame with continuous variables

266

- cardinality: dict of variable cardinalities {var: n_bins}

267

- labels: dict of bin labels {var: [label1, label2, ...]}

268

- method: discretization method ('rounding', 'uniform', 'quantile')

269

270

Returns:

271

pandas.DataFrame: Discretized data

272

"""

273

274

def preprocess_data(df):

275

"""

276

Preprocess data for use with pgmpy models.

277

278

Parameters:

279

- df: pandas.DataFrame with raw data

280

281

Returns:

282

pandas.DataFrame: Preprocessed data ready for modeling

283

"""

284

285

def get_example_model(model):

286

"""

287

Get predefined example model by name.

288

289

Parameters:

290

- model: string name of example model

291

292

Returns:

293

DiscreteBayesianNetwork: Example model

294

"""

295

```

296

297

## Usage Examples

298

299

### Loading and Saving Models

300

301

```python

302

from pgmpy.readwrite import BIFReader, BIFWriter

303

from pgmpy.models import DiscreteBayesianNetwork

304

305

# Load model from BIF file

306

reader = BIFReader('model.bif')

307

model = reader.get_model()

308

309

# Save model to BIF file

310

writer = BIFWriter(model)

311

writer.write_bif('output_model.bif')

312

313

# Using model's built-in save/load methods

314

model.save('model.bif', filetype='bif')

315

loaded_model = DiscreteBayesianNetwork.load('model.bif', filetype='bif')

316

```

317

318

### Generating Samples

319

320

```python

321

from pgmpy.sampling import BayesianModelSampling

322

from pgmpy.factors.discrete import State

323

324

# Initialize sampler

325

sampler = BayesianModelSampling(model)

326

327

# Forward sampling

328

samples = sampler.forward_sample(size=1000, seed=42)

329

print(samples.head())

330

331

# Rejection sampling with evidence

332

evidence = [State('A', 1)]

333

conditional_samples = sampler.rejection_sample(

334

evidence=evidence,

335

size=500,

336

seed=42

337

)

338

339

# Likelihood weighted sampling

340

weighted_samples = sampler.likelihood_weighted_sample(

341

evidence=evidence,

342

size=1000,

343

seed=42

344

)

345

print("Weights:", weighted_samples['weight'].describe())

346

```

347

348

### MCMC Sampling

349

350

```python

351

from pgmpy.sampling import GibbsSampling

352

353

# Initialize Gibbs sampler

354

gibbs = GibbsSampling(model)

355

356

# Generate MCMC samples

357

mcmc_samples = gibbs.sample(

358

start_state={'A': 0, 'B': 1, 'C': 0},

359

size=10000,

360

seed=42

361

)

362

363

# Check convergence (simplified)

364

print("Sample means:", mcmc_samples.mean())

365

print("Sample variance:", mcmc_samples.var())

366

```

367

368

### Data Preprocessing

369

370

```python

371

from pgmpy.utils import discretize, preprocess_data

372

import pandas as pd

373

import numpy as np

374

375

# Create continuous data

376

continuous_data = pd.DataFrame({

377

'height': np.random.normal(170, 10, 1000),

378

'weight': np.random.normal(70, 15, 1000),

379

'age': np.random.uniform(18, 80, 1000)

380

})

381

382

# Discretize continuous variables

383

discrete_data = discretize(

384

continuous_data,

385

cardinality={'height': 3, 'weight': 3, 'age': 4},

386

labels={

387

'height': ['short', 'medium', 'tall'],

388

'weight': ['light', 'medium', 'heavy'],

389

'age': ['young', 'adult', 'middle', 'senior']

390

},

391

method='quantile'

392

)

393

394

# Preprocess for modeling

395

processed_data = preprocess_data(discrete_data)

396

```