A library for Probabilistic Graphical Models
—
File I/O capabilities for various formats and sampling algorithms for generating data from probabilistic models. pgmpy supports multiple file formats and provides comprehensive sampling methods.
class BIFReader:
def __init__(self, path):
"""
Read Bayesian networks from BIF format.
Parameters:
- path: file path to BIF file
"""
def get_model(self):
"""
Parse BIF file and create model.
Returns:
DiscreteBayesianNetwork: Parsed model
"""
class BIFWriter:
def __init__(self, model):
"""
Write Bayesian networks to BIF format.
Parameters:
- model: DiscreteBayesianNetwork to write
"""
def write_bif(self, filename):
"""
Write model to BIF file.
Parameters:
- filename: output file path
"""class XMLBIFReader:
def __init__(self, path):
"""Read XML BIF format files."""
def get_model(self):
"""Parse XML BIF and create model."""
class XMLBIFWriter:
def __init__(self, model):
"""Write XML BIF format files."""
def write_xmlbif(self, filename):
"""Write model in XML BIF format."""# XDSL (GeNIe format)
class XDSLReader:
def __init__(self, path):
"""Read GeNIe XDSL format."""
class XDSLWriter:
def __init__(self, model):
"""Write GeNIe XDSL format."""
# NET (Microsoft format)
class NETReader:
def __init__(self, path):
"""Read Microsoft NET format."""
class NETWriter:
def __init__(self, model):
"""Write Microsoft NET format."""
# UAI format
class UAIReader:
def __init__(self, path):
"""Read UAI competition format."""
class UAIWriter:
def __init__(self, model):
"""Write UAI competition format."""class XBNReader:
def __init__(self, path):
"""
Read Bayesian networks from XBN format.
Parameters:
- path: file path to XBN file
"""
def get_model(self):
"""Parse XBN file and create model."""
class XBNWriter:
def __init__(self, model):
"""
Write Bayesian networks to XBN format.
Parameters:
- model: DiscreteBayesianNetwork to write
"""
def write_xbn(self, filename):
"""Write model to XBN file."""class PomdpXReader:
def __init__(self, path):
"""
Read models from PomdpX format.
Parameters:
- path: file path to PomdpX file
"""
def get_model(self):
"""Parse PomdpX file and create model."""
class PomdpXWriter:
def __init__(self, model):
"""
Write models to PomdpX format.
Parameters:
- model: model to write
"""
def write_pomdpx(self, filename):
"""Write model to PomdpX file."""class BayesianModelSampling:
def __init__(self, model):
"""
Sampling algorithms for Bayesian networks.
Parameters:
- model: DiscreteBayesianNetwork to sample from
"""
def forward_sample(self, size=1, seed=None, include_latents=False,
partial_samples=None, show_progress=True):
"""
Generate samples using forward sampling.
Parameters:
- size: number of samples to generate
- seed: random seed for reproducibility
- include_latents: whether to include latent variables
- partial_samples: DataFrame with partial variable assignments
- show_progress: whether to show progress bar
Returns:
pandas.DataFrame: Generated samples
"""
def rejection_sample(self, evidence=[], size=1, seed=None,
include_latents=False, show_progress=True):
"""
Generate samples using rejection sampling.
Parameters:
- evidence: list of State objects representing evidence
- size: number of samples to generate
- seed: random seed
- include_latents: whether to include latent variables
- show_progress: whether to show progress bar
Returns:
pandas.DataFrame: Samples consistent with evidence
"""
def likelihood_weighted_sample(self, evidence=[], size=1, seed=None,
include_latents=False, show_progress=True):
"""
Generate weighted samples using likelihood weighting.
Parameters:
- evidence: list of evidence State objects
- size: number of samples
- seed: random seed
- include_latents: whether to include latents
- show_progress: whether to show progress bar
Returns:
pandas.DataFrame: Weighted samples with 'weight' column
"""class GibbsSampling:
def __init__(self, model=None):
"""
Gibbs sampling for MCMC-based inference.
Parameters:
- model: DiscreteBayesianNetwork or MarkovNetwork
"""
def sample(self, start_state=None, size=1, seed=None, include_latents=False):
"""
Generate samples using Gibbs sampling MCMC.
Parameters:
- start_state: initial state for Markov chain
- size: number of samples to generate
- seed: random seed
- include_latents: whether to include latent variables
Returns:
pandas.DataFrame: MCMC samples from posterior
"""
def generate_sample(self, start_state=None, size=1, seed=None, include_latents=False):
"""Generate single sample from current chain state."""def _return_samples(samples, return_type='dataframe'):
"""
Utility function for formatting sample output.
Parameters:
- samples: raw sample data
- return_type: format for returned samples
Returns:
pandas.DataFrame or dict: Formatted samples
"""
# Data processing utilities
def discretize(data, cardinality, labels=dict(), method="rounding"):
"""
Discretize continuous data into discrete bins.
Parameters:
- data: pandas.DataFrame with continuous variables
- cardinality: dict of variable cardinalities {var: n_bins}
- labels: dict of bin labels {var: [label1, label2, ...]}
- method: discretization method ('rounding', 'uniform', 'quantile')
Returns:
pandas.DataFrame: Discretized data
"""
def preprocess_data(df):
"""
Preprocess data for use with pgmpy models.
Parameters:
- df: pandas.DataFrame with raw data
Returns:
pandas.DataFrame: Preprocessed data ready for modeling
"""
def get_example_model(model):
"""
Get predefined example model by name.
Parameters:
- model: string name of example model
Returns:
DiscreteBayesianNetwork: Example model
"""from pgmpy.readwrite import BIFReader, BIFWriter
from pgmpy.models import DiscreteBayesianNetwork
# Load model from BIF file
reader = BIFReader('model.bif')
model = reader.get_model()
# Save model to BIF file
writer = BIFWriter(model)
writer.write_bif('output_model.bif')
# Using model's built-in save/load methods
model.save('model.bif', filetype='bif')
loaded_model = DiscreteBayesianNetwork.load('model.bif', filetype='bif')from pgmpy.sampling import BayesianModelSampling
from pgmpy.factors.discrete import State
# Initialize sampler
sampler = BayesianModelSampling(model)
# Forward sampling
samples = sampler.forward_sample(size=1000, seed=42)
print(samples.head())
# Rejection sampling with evidence
evidence = [State('A', 1)]
conditional_samples = sampler.rejection_sample(
evidence=evidence,
size=500,
seed=42
)
# Likelihood weighted sampling
weighted_samples = sampler.likelihood_weighted_sample(
evidence=evidence,
size=1000,
seed=42
)
print("Weights:", weighted_samples['weight'].describe())from pgmpy.sampling import GibbsSampling
# Initialize Gibbs sampler
gibbs = GibbsSampling(model)
# Generate MCMC samples
mcmc_samples = gibbs.sample(
start_state={'A': 0, 'B': 1, 'C': 0},
size=10000,
seed=42
)
# Check convergence (simplified)
print("Sample means:", mcmc_samples.mean())
print("Sample variance:", mcmc_samples.var())from pgmpy.utils import discretize, preprocess_data
import pandas as pd
import numpy as np
# Create continuous data
continuous_data = pd.DataFrame({
'height': np.random.normal(170, 10, 1000),
'weight': np.random.normal(70, 15, 1000),
'age': np.random.uniform(18, 80, 1000)
})
# Discretize continuous variables
discrete_data = discretize(
continuous_data,
cardinality={'height': 3, 'weight': 3, 'age': 4},
labels={
'height': ['short', 'medium', 'tall'],
'weight': ['light', 'medium', 'heavy'],
'age': ['young', 'adult', 'middle', 'senior']
},
method='quantile'
)
# Preprocess for modeling
processed_data = preprocess_data(discrete_data)Install with Tessl CLI
npx tessl i tessl/pypi-pgmpy