Tessl Tile for pypi/pgmpy@1.0.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

data-io.md evaluation.md factors.md index.md inference.md learning.md models.md

data-io.mddocs/

0
# Data I/O and Sampling
1

2
File I/O capabilities for various formats and sampling algorithms for generating data from probabilistic models. pgmpy supports multiple file formats and provides comprehensive sampling methods.
3

4
## Capabilities
5

6
### File Format Readers and Writers
7

8
#### BIF Format (Bayesian Interchange Format)
9

10
```python { .api }
11
class BIFReader:
12
    def __init__(self, path):
13
        """
14
        Read Bayesian networks from BIF format.
15
        
16
        Parameters:
17
        - path: file path to BIF file
18
        """
19
    
20
    def get_model(self):
21
        """
22
        Parse BIF file and create model.
23
        
24
        Returns:
25
        DiscreteBayesianNetwork: Parsed model
26
        """
27

28
class BIFWriter:
29
    def __init__(self, model):
30
        """
31
        Write Bayesian networks to BIF format.
32
        
33
        Parameters:
34
        - model: DiscreteBayesianNetwork to write
35
        """
36
    
37
    def write_bif(self, filename):
38
        """
39
        Write model to BIF file.
40
        
41
        Parameters:
42
        - filename: output file path
43
        """
44
```
45

46
#### XML-BIF Format
47

48
```python { .api }
49
class XMLBIFReader:
50
    def __init__(self, path):
51
        """Read XML BIF format files."""
52
    
53
    def get_model(self):
54
        """Parse XML BIF and create model."""
55

56
class XMLBIFWriter:
57
    def __init__(self, model):
58
        """Write XML BIF format files."""
59
    
60
    def write_xmlbif(self, filename):
61
        """Write model in XML BIF format."""
62
```
63

64
#### Other Supported Formats
65

66
```python { .api }
67
# XDSL (GeNIe format)
68
class XDSLReader:
69
    def __init__(self, path):
70
        """Read GeNIe XDSL format."""
71

72
class XDSLWriter:
73
    def __init__(self, model):
74
        """Write GeNIe XDSL format."""
75

76
# NET (Microsoft format)  
77
class NETReader:
78
    def __init__(self, path):
79
        """Read Microsoft NET format."""
80

81
class NETWriter:
82
    def __init__(self, model):
83
        """Write Microsoft NET format."""
84

85
# UAI format
86
class UAIReader:
87
    def __init__(self, path):
88
        """Read UAI competition format."""
89

90
class UAIWriter:
91
    def __init__(self, model):
92
        """Write UAI competition format."""
93
```
94

95
#### XBN Format
96

97
```python { .api }
98
class XBNReader:
99
    def __init__(self, path):
100
        """
101
        Read Bayesian networks from XBN format.
102
        
103
        Parameters:
104
        - path: file path to XBN file
105
        """
106
    
107
    def get_model(self):
108
        """Parse XBN file and create model."""
109

110
class XBNWriter:
111
    def __init__(self, model):
112
        """
113
        Write Bayesian networks to XBN format.
114
        
115
        Parameters:
116
        - model: DiscreteBayesianNetwork to write
117
        """
118
    
119
    def write_xbn(self, filename):
120
        """Write model to XBN file."""
121
```
122

123
#### PomdpX Format
124

125
```python { .api }
126
class PomdpXReader:
127
    def __init__(self, path):
128
        """
129
        Read models from PomdpX format.
130
        
131
        Parameters:
132
        - path: file path to PomdpX file
133
        """
134
    
135
    def get_model(self):
136
        """Parse PomdpX file and create model."""
137

138
class PomdpXWriter:
139
    def __init__(self, model):
140
        """
141
        Write models to PomdpX format.
142
        
143
        Parameters:
144
        - model: model to write
145
        """
146
    
147
    def write_pomdpx(self, filename):
148
        """Write model to PomdpX file."""
149
```
150

151
### Sampling Algorithms
152

153
#### Forward Sampling
154

155
```python { .api }
156
class BayesianModelSampling:
157
    def __init__(self, model):
158
        """
159
        Sampling algorithms for Bayesian networks.
160
        
161
        Parameters:
162
        - model: DiscreteBayesianNetwork to sample from
163
        """
164
    
165
    def forward_sample(self, size=1, seed=None, include_latents=False, 
166
                      partial_samples=None, show_progress=True):
167
        """
168
        Generate samples using forward sampling.
169
        
170
        Parameters:
171
        - size: number of samples to generate
172
        - seed: random seed for reproducibility
173
        - include_latents: whether to include latent variables
174
        - partial_samples: DataFrame with partial variable assignments
175
        - show_progress: whether to show progress bar
176
        
177
        Returns:
178
        pandas.DataFrame: Generated samples
179
        """
180
    
181
    def rejection_sample(self, evidence=[], size=1, seed=None, 
182
                        include_latents=False, show_progress=True):
183
        """
184
        Generate samples using rejection sampling.
185
        
186
        Parameters:
187
        - evidence: list of State objects representing evidence
188
        - size: number of samples to generate
189
        - seed: random seed
190
        - include_latents: whether to include latent variables
191
        - show_progress: whether to show progress bar
192
        
193
        Returns:
194
        pandas.DataFrame: Samples consistent with evidence
195
        """
196
    
197
    def likelihood_weighted_sample(self, evidence=[], size=1, seed=None,
198
                                  include_latents=False, show_progress=True):
199
        """
200
        Generate weighted samples using likelihood weighting.
201
        
202
        Parameters:
203
        - evidence: list of evidence State objects
204
        - size: number of samples
205
        - seed: random seed
206
        - include_latents: whether to include latents
207
        - show_progress: whether to show progress bar
208
        
209
        Returns:
210
        pandas.DataFrame: Weighted samples with 'weight' column
211
        """
212
```
213

214
#### MCMC Sampling
215

216
```python { .api }
217
class GibbsSampling:
218
    def __init__(self, model=None):
219
        """
220
        Gibbs sampling for MCMC-based inference.
221
        
222
        Parameters:
223
        - model: DiscreteBayesianNetwork or MarkovNetwork
224
        """
225
    
226
    def sample(self, start_state=None, size=1, seed=None, include_latents=False):
227
        """
228
        Generate samples using Gibbs sampling MCMC.
229
        
230
        Parameters:
231
        - start_state: initial state for Markov chain
232
        - size: number of samples to generate
233
        - seed: random seed
234
        - include_latents: whether to include latent variables
235
        
236
        Returns:
237
        pandas.DataFrame: MCMC samples from posterior
238
        """
239
    
240
    def generate_sample(self, start_state=None, size=1, seed=None, include_latents=False):
241
        """Generate single sample from current chain state."""
242
```
243

244
### Utility Functions
245

246
```python { .api }
247
def _return_samples(samples, return_type='dataframe'):
248
    """
249
    Utility function for formatting sample output.
250
    
251
    Parameters:
252
    - samples: raw sample data
253
    - return_type: format for returned samples
254
    
255
    Returns:
256
    pandas.DataFrame or dict: Formatted samples
257
    """
258

259
# Data processing utilities
260
def discretize(data, cardinality, labels=dict(), method="rounding"):
261
    """
262
    Discretize continuous data into discrete bins.
263
    
264
    Parameters:
265
    - data: pandas.DataFrame with continuous variables
266
    - cardinality: dict of variable cardinalities {var: n_bins}
267
    - labels: dict of bin labels {var: [label1, label2, ...]}
268
    - method: discretization method ('rounding', 'uniform', 'quantile')
269
    
270
    Returns:
271
    pandas.DataFrame: Discretized data
272
    """
273

274
def preprocess_data(df):
275
    """
276
    Preprocess data for use with pgmpy models.
277
    
278
    Parameters:
279
    - df: pandas.DataFrame with raw data
280
    
281
    Returns:
282
    pandas.DataFrame: Preprocessed data ready for modeling
283
    """
284

285
def get_example_model(model):
286
    """
287
    Get predefined example model by name.
288
    
289
    Parameters:
290
    - model: string name of example model
291
    
292
    Returns:
293
    DiscreteBayesianNetwork: Example model
294
    """
295
```
296

297
## Usage Examples
298

299
### Loading and Saving Models
300

301
```python
302
from pgmpy.readwrite import BIFReader, BIFWriter
303
from pgmpy.models import DiscreteBayesianNetwork
304

305
# Load model from BIF file
306
reader = BIFReader('model.bif')
307
model = reader.get_model()
308

309
# Save model to BIF file
310
writer = BIFWriter(model)
311
writer.write_bif('output_model.bif')
312

313
# Using model's built-in save/load methods
314
model.save('model.bif', filetype='bif')
315
loaded_model = DiscreteBayesianNetwork.load('model.bif', filetype='bif')
316
```
317

318
### Generating Samples
319

320
```python
321
from pgmpy.sampling import BayesianModelSampling
322
from pgmpy.factors.discrete import State
323

324
# Initialize sampler
325
sampler = BayesianModelSampling(model)
326

327
# Forward sampling
328
samples = sampler.forward_sample(size=1000, seed=42)
329
print(samples.head())
330

331
# Rejection sampling with evidence
332
evidence = [State('A', 1)]
333
conditional_samples = sampler.rejection_sample(
334
    evidence=evidence, 
335
    size=500,
336
    seed=42
337
)
338

339
# Likelihood weighted sampling
340
weighted_samples = sampler.likelihood_weighted_sample(
341
    evidence=evidence,
342
    size=1000,
343
    seed=42
344
)
345
print("Weights:", weighted_samples['weight'].describe())
346
```
347

348
### MCMC Sampling
349

350
```python
351
from pgmpy.sampling import GibbsSampling
352

353
# Initialize Gibbs sampler
354
gibbs = GibbsSampling(model)
355

356
# Generate MCMC samples
357
mcmc_samples = gibbs.sample(
358
    start_state={'A': 0, 'B': 1, 'C': 0},
359
    size=10000,
360
    seed=42
361
)
362

363
# Check convergence (simplified)
364
print("Sample means:", mcmc_samples.mean())
365
print("Sample variance:", mcmc_samples.var())
366
```
367

368
### Data Preprocessing
369

370
```python
371
from pgmpy.utils import discretize, preprocess_data
372
import pandas as pd
373
import numpy as np
374

375
# Create continuous data
376
continuous_data = pd.DataFrame({
377
    'height': np.random.normal(170, 10, 1000),
378
    'weight': np.random.normal(70, 15, 1000),
379
    'age': np.random.uniform(18, 80, 1000)
380
})
381

382
# Discretize continuous variables
383
discrete_data = discretize(
384
    continuous_data,
385
    cardinality={'height': 3, 'weight': 3, 'age': 4},
386
    labels={
387
        'height': ['short', 'medium', 'tall'],
388
        'weight': ['light', 'medium', 'heavy'],
389
        'age': ['young', 'adult', 'middle', 'senior']
390
    },
391
    method='quantile'
392
)
393

394
# Preprocess for modeling
395
processed_data = preprocess_data(discrete_data)
396
```

Version

Tile

Files

data-io.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

data-io.mddocs/