A library for Probabilistic Graphical Models
npx @tessl/cli install tessl/pypi-pgmpy@1.0.0A comprehensive Python library for working with Probabilistic Graphical Models, specifically Bayesian Networks and related models including Directed Acyclic Graphs, Dynamic Bayesian Networks, and Structural Equation Models. The library combines features from causal inference and probabilistic inference literature, enabling users to seamlessly work between these domains. It implements algorithms for structure learning, causal discovery, parameter estimation, probabilistic and causal inference, and simulations.
pip install pgmpyimport pgmpyCommon imports for specific functionality:
# Core models
from pgmpy.models import DiscreteBayesianNetwork, BayesianNetwork
from pgmpy.models import MarkovNetwork, FactorGraph, JunctionTree
from pgmpy.models import DynamicBayesianNetwork, NaiveBayes
# Factors and distributions
from pgmpy.factors.discrete import TabularCPD, DiscreteFactor
from pgmpy.factors import FactorSet, FactorDict
# Inference algorithms
from pgmpy.inference import VariableElimination, BeliefPropagation
from pgmpy.inference import CausalInference
# Learning and estimation
from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator
from pgmpy.estimators import HillClimbSearch, PC
# Sampling
from pgmpy.sampling import BayesianModelSampling, GibbsSampling
# File I/O
from pgmpy.readwrite import BIFReader, BIFWriter
# Independence and utilities
from pgmpy.independencies import Independenciesfrom pgmpy.models import DiscreteBayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination
# Create a simple Bayesian Network
model = DiscreteBayesianNetwork([('A', 'C'), ('B', 'C')])
# Define conditional probability distributions
cpd_a = TabularCPD(variable='A', variable_card=2, values=[[0.7], [0.3]])
cpd_b = TabularCPD(variable='B', variable_card=2, values=[[0.6], [0.4]])
cpd_c = TabularCPD(variable='C', variable_card=2,
values=[[0.9, 0.8, 0.2, 0.1],
[0.1, 0.2, 0.8, 0.9]],
evidence=['A', 'B'], evidence_card=[2, 2])
# Add CPDs to the model
model.add_cpds(cpd_a, cpd_b, cpd_c)
# Check model validity
assert model.check_model()
# Perform inference
inference = VariableElimination(model)
result = inference.query(variables=['C'], evidence={'A': 1})
print(result)Global configuration for compute backend and performance settings:
import pgmpy
# Access global configuration
config = pgmpy.config
# Set compute backend (numpy or torch)
config.set_backend('torch', device='cuda')
# Control progress bars
config.set_show_progress(True)
# Set data type precision
config.set_dtype('float64')pgmpy is organized around several key concepts:
The library supports both discrete and continuous variables, exact and approximate inference methods, and provides extensive functionality for model evaluation and validation.
Core model classes for creating and managing probabilistic graphical models, including Bayesian networks, Markov networks, and factor graphs.
class DiscreteBayesianNetwork:
def __init__(self, ebunch=None, latents=set(), lavaan_str=None, dagitty_str=None): ...
def add_edge(self, u, v, **kwargs): ...
def add_cpds(self, *cpds): ...
def check_model(self): ...
def predict(self, data, variables=None, n_jobs=1): ...
def simulate(self, n_samples, do=None, evidence=None): ...Representations of probability distributions including discrete factors, conditional probability distributions, and continuous distributions.
class TabularCPD:
def __init__(self, variable, variable_card, values, evidence=None, evidence_card=None): ...
def normalize(self, inplace=True): ...
def marginalize(self, variables, inplace=True): ...
def to_factor(self): ...
class DiscreteFactor:
def __init__(self, variables, cardinality, values): ...
def product(self, phi1, inplace=True): ...
def marginalize(self, variables, inplace=True): ...
def reduce(self, values, inplace=True): ...Exact and approximate inference algorithms for computing marginal probabilities, MAP queries, and causal inference.
class VariableElimination:
def __init__(self, model): ...
def query(self, variables, evidence=None, elimination_order="MinFill"): ...
def map_query(self, variables=None, evidence=None): ...
class BeliefPropagation:
def __init__(self, model): ...
def calibrate(self): ...
def query(self, variables, evidence=None): ...
class CausalInference:
def __init__(self, model): ...
def estimate_ate(self, treatment, outcome, common_causes=None): ...Algorithms for learning model structure from data and estimating parameters, including constraint-based, score-based, and hybrid approaches.
class MaximumLikelihoodEstimator:
def __init__(self, model, data): ...
def get_parameters(self, n_jobs=1): ...
def estimate_cpd(self, node): ...
class HillClimbSearch:
def __init__(self, data, use_cache=True): ...
def estimate(self, start=None, max_indegree=None): ...
class PC:
def __init__(self, data): ...
def estimate(self, variant="stable", ci_test="chi_square"): ...File I/O capabilities for various formats and sampling algorithms for generating data from probabilistic models.
class BayesianModelSampling:
def __init__(self, model): ...
def forward_sample(self, size=1, seed=None): ...
def rejection_sample(self, evidence=[], size=1): ...
# File format readers/writers
class BIFReader:
def __init__(self, path): ...
def get_model(self): ...
class BIFWriter:
def __init__(self, model): ...
def write_bif(self, filename): ...Functions for evaluating model quality, computing metrics, and validating learned structures.
def log_likelihood_score(model, data): ...
def structure_score(model, data, scoring_method="bic-g"): ...
def correlation_score(model, data, test="chi_square"): ...
def SHD(true_model, est_model): ...Classes for representing conditional independence relationships and graph structures used as foundations for probabilistic models.
class Independencies:
def __init__(self, assertions=None): ...
def add_assertions(self, *assertions): ...
def get_assertions(self): ...
class IndependenceAssertion:
def __init__(self, event1, event2, event3=[]): ...
# Base graph structures
class DAG:
def __init__(self, ebunch=None): ...
def add_edges_from(self, ebunch): ...
def is_dag(self): ...
class UndirectedGraph:
def __init__(self, ebunch=None): ...
def add_edge(self, u, v): ...Helper functions and classes for data processing, mathematical operations, and state management.
# Math and data utilities
def cartesian(*arrays):
"""Cartesian product of input arrays."""
def sample_discrete(distribution, size=1, seed=None):
"""Sample from discrete probability distribution."""
def discretize(data, cardinality, labels=dict(), method="rounding"):
"""Discretize continuous data into discrete bins."""
def preprocess_data(df):
"""Preprocess data for pgmpy models."""
def get_example_model(model):
"""Get predefined example model by name."""
# Optimization utilities
def optimize(func, x0, method='L-BFGS-B'):
"""Optimization wrapper function."""
def pinverse(a, rcond=1e-15):
"""Compute Moore-Penrose pseudoinverse."""
# State name handling
class StateNameMixin:
"""Mixin class for handling variable state names."""
# External utilities
def tabulate(data, headers=None):
"""Format data as a table."""# Configuration class
class Config:
def set_backend(self, backend: str, device: str = None, dtype = None): ...
def get_backend(self) -> str: ...
def set_device(self, device: str = None): ...
def get_device(self): ...
def set_dtype(self, dtype = None): ...
def get_dtype(self): ...
def set_show_progress(self, show_progress: bool): ...
def get_show_progress(self) -> bool: ...
# Common data structures
StateNameType = Dict[str, List[str]]
EvidenceType = Dict[str, int]
VariableCardType = Dict[str, int]