CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pgmpy

A library for Probabilistic Graphical Models

Pending
Overview
Eval results
Files

data-io.mddocs/

Data I/O and Sampling

File I/O capabilities for various formats and sampling algorithms for generating data from probabilistic models. pgmpy supports multiple file formats and provides comprehensive sampling methods.

Capabilities

File Format Readers and Writers

BIF Format (Bayesian Interchange Format)

class BIFReader:
    def __init__(self, path):
        """
        Read Bayesian networks from BIF format.
        
        Parameters:
        - path: file path to BIF file
        """
    
    def get_model(self):
        """
        Parse BIF file and create model.
        
        Returns:
        DiscreteBayesianNetwork: Parsed model
        """

class BIFWriter:
    def __init__(self, model):
        """
        Write Bayesian networks to BIF format.
        
        Parameters:
        - model: DiscreteBayesianNetwork to write
        """
    
    def write_bif(self, filename):
        """
        Write model to BIF file.
        
        Parameters:
        - filename: output file path
        """

XML-BIF Format

class XMLBIFReader:
    def __init__(self, path):
        """Read XML BIF format files."""
    
    def get_model(self):
        """Parse XML BIF and create model."""

class XMLBIFWriter:
    def __init__(self, model):
        """Write XML BIF format files."""
    
    def write_xmlbif(self, filename):
        """Write model in XML BIF format."""

Other Supported Formats

# XDSL (GeNIe format)
class XDSLReader:
    def __init__(self, path):
        """Read GeNIe XDSL format."""

class XDSLWriter:
    def __init__(self, model):
        """Write GeNIe XDSL format."""

# NET (Microsoft format)  
class NETReader:
    def __init__(self, path):
        """Read Microsoft NET format."""

class NETWriter:
    def __init__(self, model):
        """Write Microsoft NET format."""

# UAI format
class UAIReader:
    def __init__(self, path):
        """Read UAI competition format."""

class UAIWriter:
    def __init__(self, model):
        """Write UAI competition format."""

XBN Format

class XBNReader:
    def __init__(self, path):
        """
        Read Bayesian networks from XBN format.
        
        Parameters:
        - path: file path to XBN file
        """
    
    def get_model(self):
        """Parse XBN file and create model."""

class XBNWriter:
    def __init__(self, model):
        """
        Write Bayesian networks to XBN format.
        
        Parameters:
        - model: DiscreteBayesianNetwork to write
        """
    
    def write_xbn(self, filename):
        """Write model to XBN file."""

PomdpX Format

class PomdpXReader:
    def __init__(self, path):
        """
        Read models from PomdpX format.
        
        Parameters:
        - path: file path to PomdpX file
        """
    
    def get_model(self):
        """Parse PomdpX file and create model."""

class PomdpXWriter:
    def __init__(self, model):
        """
        Write models to PomdpX format.
        
        Parameters:
        - model: model to write
        """
    
    def write_pomdpx(self, filename):
        """Write model to PomdpX file."""

Sampling Algorithms

Forward Sampling

class BayesianModelSampling:
    def __init__(self, model):
        """
        Sampling algorithms for Bayesian networks.
        
        Parameters:
        - model: DiscreteBayesianNetwork to sample from
        """
    
    def forward_sample(self, size=1, seed=None, include_latents=False, 
                      partial_samples=None, show_progress=True):
        """
        Generate samples using forward sampling.
        
        Parameters:
        - size: number of samples to generate
        - seed: random seed for reproducibility
        - include_latents: whether to include latent variables
        - partial_samples: DataFrame with partial variable assignments
        - show_progress: whether to show progress bar
        
        Returns:
        pandas.DataFrame: Generated samples
        """
    
    def rejection_sample(self, evidence=[], size=1, seed=None, 
                        include_latents=False, show_progress=True):
        """
        Generate samples using rejection sampling.
        
        Parameters:
        - evidence: list of State objects representing evidence
        - size: number of samples to generate
        - seed: random seed
        - include_latents: whether to include latent variables
        - show_progress: whether to show progress bar
        
        Returns:
        pandas.DataFrame: Samples consistent with evidence
        """
    
    def likelihood_weighted_sample(self, evidence=[], size=1, seed=None,
                                  include_latents=False, show_progress=True):
        """
        Generate weighted samples using likelihood weighting.
        
        Parameters:
        - evidence: list of evidence State objects
        - size: number of samples
        - seed: random seed
        - include_latents: whether to include latents
        - show_progress: whether to show progress bar
        
        Returns:
        pandas.DataFrame: Weighted samples with 'weight' column
        """

MCMC Sampling

class GibbsSampling:
    def __init__(self, model=None):
        """
        Gibbs sampling for MCMC-based inference.
        
        Parameters:
        - model: DiscreteBayesianNetwork or MarkovNetwork
        """
    
    def sample(self, start_state=None, size=1, seed=None, include_latents=False):
        """
        Generate samples using Gibbs sampling MCMC.
        
        Parameters:
        - start_state: initial state for Markov chain
        - size: number of samples to generate
        - seed: random seed
        - include_latents: whether to include latent variables
        
        Returns:
        pandas.DataFrame: MCMC samples from posterior
        """
    
    def generate_sample(self, start_state=None, size=1, seed=None, include_latents=False):
        """Generate single sample from current chain state."""

Utility Functions

def _return_samples(samples, return_type='dataframe'):
    """
    Utility function for formatting sample output.
    
    Parameters:
    - samples: raw sample data
    - return_type: format for returned samples
    
    Returns:
    pandas.DataFrame or dict: Formatted samples
    """

# Data processing utilities
def discretize(data, cardinality, labels=dict(), method="rounding"):
    """
    Discretize continuous data into discrete bins.
    
    Parameters:
    - data: pandas.DataFrame with continuous variables
    - cardinality: dict of variable cardinalities {var: n_bins}
    - labels: dict of bin labels {var: [label1, label2, ...]}
    - method: discretization method ('rounding', 'uniform', 'quantile')
    
    Returns:
    pandas.DataFrame: Discretized data
    """

def preprocess_data(df):
    """
    Preprocess data for use with pgmpy models.
    
    Parameters:
    - df: pandas.DataFrame with raw data
    
    Returns:
    pandas.DataFrame: Preprocessed data ready for modeling
    """

def get_example_model(model):
    """
    Get predefined example model by name.
    
    Parameters:
    - model: string name of example model
    
    Returns:
    DiscreteBayesianNetwork: Example model
    """

Usage Examples

Loading and Saving Models

from pgmpy.readwrite import BIFReader, BIFWriter
from pgmpy.models import DiscreteBayesianNetwork

# Load model from BIF file
reader = BIFReader('model.bif')
model = reader.get_model()

# Save model to BIF file
writer = BIFWriter(model)
writer.write_bif('output_model.bif')

# Using model's built-in save/load methods
model.save('model.bif', filetype='bif')
loaded_model = DiscreteBayesianNetwork.load('model.bif', filetype='bif')

Generating Samples

from pgmpy.sampling import BayesianModelSampling
from pgmpy.factors.discrete import State

# Initialize sampler
sampler = BayesianModelSampling(model)

# Forward sampling
samples = sampler.forward_sample(size=1000, seed=42)
print(samples.head())

# Rejection sampling with evidence
evidence = [State('A', 1)]
conditional_samples = sampler.rejection_sample(
    evidence=evidence, 
    size=500,
    seed=42
)

# Likelihood weighted sampling
weighted_samples = sampler.likelihood_weighted_sample(
    evidence=evidence,
    size=1000,
    seed=42
)
print("Weights:", weighted_samples['weight'].describe())

MCMC Sampling

from pgmpy.sampling import GibbsSampling

# Initialize Gibbs sampler
gibbs = GibbsSampling(model)

# Generate MCMC samples
mcmc_samples = gibbs.sample(
    start_state={'A': 0, 'B': 1, 'C': 0},
    size=10000,
    seed=42
)

# Check convergence (simplified)
print("Sample means:", mcmc_samples.mean())
print("Sample variance:", mcmc_samples.var())

Data Preprocessing

from pgmpy.utils import discretize, preprocess_data
import pandas as pd
import numpy as np

# Create continuous data
continuous_data = pd.DataFrame({
    'height': np.random.normal(170, 10, 1000),
    'weight': np.random.normal(70, 15, 1000),
    'age': np.random.uniform(18, 80, 1000)
})

# Discretize continuous variables
discrete_data = discretize(
    continuous_data,
    cardinality={'height': 3, 'weight': 3, 'age': 4},
    labels={
        'height': ['short', 'medium', 'tall'],
        'weight': ['light', 'medium', 'heavy'],
        'age': ['young', 'adult', 'middle', 'senior']
    },
    method='quantile'
)

# Preprocess for modeling
processed_data = preprocess_data(discrete_data)

Install with Tessl CLI

npx tessl i tessl/pypi-pgmpy

docs

data-io.md

evaluation.md

factors.md

index.md

inference.md

learning.md

models.md

tile.json