0
# Data I/O and Sampling
1
2
File I/O capabilities for various formats and sampling algorithms for generating data from probabilistic models. pgmpy supports multiple file formats and provides comprehensive sampling methods.
3
4
## Capabilities
5
6
### File Format Readers and Writers
7
8
#### BIF Format (Bayesian Interchange Format)
9
10
```python { .api }
11
class BIFReader:
12
def __init__(self, path):
13
"""
14
Read Bayesian networks from BIF format.
15
16
Parameters:
17
- path: file path to BIF file
18
"""
19
20
def get_model(self):
21
"""
22
Parse BIF file and create model.
23
24
Returns:
25
DiscreteBayesianNetwork: Parsed model
26
"""
27
28
class BIFWriter:
29
def __init__(self, model):
30
"""
31
Write Bayesian networks to BIF format.
32
33
Parameters:
34
- model: DiscreteBayesianNetwork to write
35
"""
36
37
def write_bif(self, filename):
38
"""
39
Write model to BIF file.
40
41
Parameters:
42
- filename: output file path
43
"""
44
```
45
46
#### XML-BIF Format
47
48
```python { .api }
49
class XMLBIFReader:
50
def __init__(self, path):
51
"""Read XML BIF format files."""
52
53
def get_model(self):
54
"""Parse XML BIF and create model."""
55
56
class XMLBIFWriter:
57
def __init__(self, model):
58
"""Write XML BIF format files."""
59
60
def write_xmlbif(self, filename):
61
"""Write model in XML BIF format."""
62
```
63
64
#### Other Supported Formats
65
66
```python { .api }
67
# XDSL (GeNIe format)
68
class XDSLReader:
69
def __init__(self, path):
70
"""Read GeNIe XDSL format."""
71
72
class XDSLWriter:
73
def __init__(self, model):
74
"""Write GeNIe XDSL format."""
75
76
# NET (Microsoft format)
77
class NETReader:
78
def __init__(self, path):
79
"""Read Microsoft NET format."""
80
81
class NETWriter:
82
def __init__(self, model):
83
"""Write Microsoft NET format."""
84
85
# UAI format
86
class UAIReader:
87
def __init__(self, path):
88
"""Read UAI competition format."""
89
90
class UAIWriter:
91
def __init__(self, model):
92
"""Write UAI competition format."""
93
```
94
95
#### XBN Format
96
97
```python { .api }
98
class XBNReader:
99
def __init__(self, path):
100
"""
101
Read Bayesian networks from XBN format.
102
103
Parameters:
104
- path: file path to XBN file
105
"""
106
107
def get_model(self):
108
"""Parse XBN file and create model."""
109
110
class XBNWriter:
111
def __init__(self, model):
112
"""
113
Write Bayesian networks to XBN format.
114
115
Parameters:
116
- model: DiscreteBayesianNetwork to write
117
"""
118
119
def write_xbn(self, filename):
120
"""Write model to XBN file."""
121
```
122
123
#### PomdpX Format
124
125
```python { .api }
126
class PomdpXReader:
127
def __init__(self, path):
128
"""
129
Read models from PomdpX format.
130
131
Parameters:
132
- path: file path to PomdpX file
133
"""
134
135
def get_model(self):
136
"""Parse PomdpX file and create model."""
137
138
class PomdpXWriter:
139
def __init__(self, model):
140
"""
141
Write models to PomdpX format.
142
143
Parameters:
144
- model: model to write
145
"""
146
147
def write_pomdpx(self, filename):
148
"""Write model to PomdpX file."""
149
```
150
151
### Sampling Algorithms
152
153
#### Forward Sampling
154
155
```python { .api }
156
class BayesianModelSampling:
157
def __init__(self, model):
158
"""
159
Sampling algorithms for Bayesian networks.
160
161
Parameters:
162
- model: DiscreteBayesianNetwork to sample from
163
"""
164
165
def forward_sample(self, size=1, seed=None, include_latents=False,
166
partial_samples=None, show_progress=True):
167
"""
168
Generate samples using forward sampling.
169
170
Parameters:
171
- size: number of samples to generate
172
- seed: random seed for reproducibility
173
- include_latents: whether to include latent variables
174
- partial_samples: DataFrame with partial variable assignments
175
- show_progress: whether to show progress bar
176
177
Returns:
178
pandas.DataFrame: Generated samples
179
"""
180
181
def rejection_sample(self, evidence=[], size=1, seed=None,
182
include_latents=False, show_progress=True):
183
"""
184
Generate samples using rejection sampling.
185
186
Parameters:
187
- evidence: list of State objects representing evidence
188
- size: number of samples to generate
189
- seed: random seed
190
- include_latents: whether to include latent variables
191
- show_progress: whether to show progress bar
192
193
Returns:
194
pandas.DataFrame: Samples consistent with evidence
195
"""
196
197
def likelihood_weighted_sample(self, evidence=[], size=1, seed=None,
198
include_latents=False, show_progress=True):
199
"""
200
Generate weighted samples using likelihood weighting.
201
202
Parameters:
203
- evidence: list of evidence State objects
204
- size: number of samples
205
- seed: random seed
206
- include_latents: whether to include latents
207
- show_progress: whether to show progress bar
208
209
Returns:
210
pandas.DataFrame: Weighted samples with 'weight' column
211
"""
212
```
213
214
#### MCMC Sampling
215
216
```python { .api }
217
class GibbsSampling:
218
def __init__(self, model=None):
219
"""
220
Gibbs sampling for MCMC-based inference.
221
222
Parameters:
223
- model: DiscreteBayesianNetwork or MarkovNetwork
224
"""
225
226
def sample(self, start_state=None, size=1, seed=None, include_latents=False):
227
"""
228
Generate samples using Gibbs sampling MCMC.
229
230
Parameters:
231
- start_state: initial state for Markov chain
232
- size: number of samples to generate
233
- seed: random seed
234
- include_latents: whether to include latent variables
235
236
Returns:
237
pandas.DataFrame: MCMC samples from posterior
238
"""
239
240
def generate_sample(self, start_state=None, size=1, seed=None, include_latents=False):
241
"""Generate single sample from current chain state."""
242
```
243
244
### Utility Functions
245
246
```python { .api }
247
def _return_samples(samples, return_type='dataframe'):
248
"""
249
Utility function for formatting sample output.
250
251
Parameters:
252
- samples: raw sample data
253
- return_type: format for returned samples
254
255
Returns:
256
pandas.DataFrame or dict: Formatted samples
257
"""
258
259
# Data processing utilities
260
def discretize(data, cardinality, labels=dict(), method="rounding"):
261
"""
262
Discretize continuous data into discrete bins.
263
264
Parameters:
265
- data: pandas.DataFrame with continuous variables
266
- cardinality: dict of variable cardinalities {var: n_bins}
267
- labels: dict of bin labels {var: [label1, label2, ...]}
268
- method: discretization method ('rounding', 'uniform', 'quantile')
269
270
Returns:
271
pandas.DataFrame: Discretized data
272
"""
273
274
def preprocess_data(df):
275
"""
276
Preprocess data for use with pgmpy models.
277
278
Parameters:
279
- df: pandas.DataFrame with raw data
280
281
Returns:
282
pandas.DataFrame: Preprocessed data ready for modeling
283
"""
284
285
def get_example_model(model):
286
"""
287
Get predefined example model by name.
288
289
Parameters:
290
- model: string name of example model
291
292
Returns:
293
DiscreteBayesianNetwork: Example model
294
"""
295
```
296
297
## Usage Examples
298
299
### Loading and Saving Models
300
301
```python
302
from pgmpy.readwrite import BIFReader, BIFWriter
303
from pgmpy.models import DiscreteBayesianNetwork
304
305
# Load model from BIF file
306
reader = BIFReader('model.bif')
307
model = reader.get_model()
308
309
# Save model to BIF file
310
writer = BIFWriter(model)
311
writer.write_bif('output_model.bif')
312
313
# Using model's built-in save/load methods
314
model.save('model.bif', filetype='bif')
315
loaded_model = DiscreteBayesianNetwork.load('model.bif', filetype='bif')
316
```
317
318
### Generating Samples
319
320
```python
321
from pgmpy.sampling import BayesianModelSampling
322
from pgmpy.factors.discrete import State
323
324
# Initialize sampler
325
sampler = BayesianModelSampling(model)
326
327
# Forward sampling
328
samples = sampler.forward_sample(size=1000, seed=42)
329
print(samples.head())
330
331
# Rejection sampling with evidence
332
evidence = [State('A', 1)]
333
conditional_samples = sampler.rejection_sample(
334
evidence=evidence,
335
size=500,
336
seed=42
337
)
338
339
# Likelihood weighted sampling
340
weighted_samples = sampler.likelihood_weighted_sample(
341
evidence=evidence,
342
size=1000,
343
seed=42
344
)
345
print("Weights:", weighted_samples['weight'].describe())
346
```
347
348
### MCMC Sampling
349
350
```python
351
from pgmpy.sampling import GibbsSampling
352
353
# Initialize Gibbs sampler
354
gibbs = GibbsSampling(model)
355
356
# Generate MCMC samples
357
mcmc_samples = gibbs.sample(
358
start_state={'A': 0, 'B': 1, 'C': 0},
359
size=10000,
360
seed=42
361
)
362
363
# Check convergence (simplified)
364
print("Sample means:", mcmc_samples.mean())
365
print("Sample variance:", mcmc_samples.var())
366
```
367
368
### Data Preprocessing
369
370
```python
371
from pgmpy.utils import discretize, preprocess_data
372
import pandas as pd
373
import numpy as np
374
375
# Create continuous data
376
continuous_data = pd.DataFrame({
377
'height': np.random.normal(170, 10, 1000),
378
'weight': np.random.normal(70, 15, 1000),
379
'age': np.random.uniform(18, 80, 1000)
380
})
381
382
# Discretize continuous variables
383
discrete_data = discretize(
384
continuous_data,
385
cardinality={'height': 3, 'weight': 3, 'age': 4},
386
labels={
387
'height': ['short', 'medium', 'tall'],
388
'weight': ['light', 'medium', 'heavy'],
389
'age': ['young', 'adult', 'middle', 'senior']
390
},
391
method='quantile'
392
)
393
394
# Preprocess for modeling
395
processed_data = preprocess_data(discrete_data)
396
```