A library for Probabilistic Graphical Models
—
Algorithms for learning model structure from data and estimating parameters, including constraint-based, score-based, and hybrid approaches. pgmpy provides comprehensive learning capabilities for both structure discovery and parameter estimation.
class MaximumLikelihoodEstimator:
def __init__(self, model, data, **kwargs):
"""
Maximum likelihood parameter estimation.
Parameters:
- model: DiscreteBayesianNetwork with structure
- data: pandas.DataFrame with observed data
"""
def get_parameters(self, n_jobs=1, weighted=False):
"""
Estimate all model parameters.
Parameters:
- n_jobs: number of parallel jobs
- weighted: whether to use weighted estimation
Returns:
list: List of estimated TabularCPDs
"""
def estimate_cpd(self, node, weighted=False):
"""
Estimate CPD for a single node.
Parameters:
- node: variable name
- weighted: whether to use weighted estimation
Returns:
TabularCPD: Estimated conditional probability distribution
"""class BayesianEstimator:
def __init__(self, model, data, **kwargs):
"""Bayesian parameter estimation with priors."""
def get_parameters(self, prior_type='BDeu', equivalent_sample_size=10):
"""
Estimate parameters using Bayesian approach.
Parameters:
- prior_type: type of prior ('BDeu', 'K2', 'dirichlet')
- equivalent_sample_size: strength of prior belief
Returns:
list: List of posterior CPDs
"""class ExpectationMaximization:
def __init__(self, model, data, **kwargs):
"""EM algorithm for incomplete data."""
def get_parameters(self, max_iter=100, tol=1e-4):
"""
Estimate parameters using EM algorithm.
Parameters:
- max_iter: maximum iterations
- tol: convergence tolerance
Returns:
list: Estimated CPDs
"""class HillClimbSearch:
def __init__(self, data, use_cache=True, **kwargs):
"""
Hill climbing structure search algorithm.
Parameters:
- data: pandas.DataFrame with observed data
- use_cache: whether to cache scoring computations
"""
def estimate(self, start=None, tabu_length=0, max_indegree=None,
show_progress=True):
"""
Learn structure using hill climbing.
Parameters:
- start: initial graph structure
- tabu_length: length of tabu list
- max_indegree: maximum parent set size
- show_progress: whether to show progress
Returns:
DiscreteBayesianNetwork: Learned structure
"""class PC:
def __init__(self, data):
"""
PC algorithm for causal discovery.
Parameters:
- data: pandas.DataFrame with observed data
"""
def estimate(self, variant="stable", ci_test="chi_square",
significance_level=0.05, show_progress=True):
"""
Learn structure using PC algorithm.
Parameters:
- variant: PC variant ('orig', 'stable', 'parallel')
- ci_test: conditional independence test
- significance_level: significance threshold
- show_progress: whether to show progress
Returns:
DiscreteBayesianNetwork: Learned causal structure
"""class GES:
def __init__(self, data):
"""Greedy Equivalence Search algorithm."""
def estimate(self, scoring_method='bic', phase1=True, phase2=True):
"""
Learn structure using GES.
Parameters:
- scoring_method: scoring function to use
- phase1: whether to perform forward phase
- phase2: whether to perform backward phase
Returns:
DiscreteBayesianNetwork: Learned structure
"""class StructureScore:
def __init__(self, data):
"""Base class for structure scoring methods."""
def score(self, model):
"""
Compute structure score.
Parameters:
- model: DiscreteBayesianNetwork to score
Returns:
float: Structure score
"""
# Bayesian scores
class K2:
def __init__(self, data):
"""K2 score for structure evaluation."""
class BDeu:
def __init__(self, data, equivalent_sample_size=10):
"""BDeu score with equivalent sample size."""
class BDs:
def __init__(self, data):
"""BDs score for structure evaluation."""
# Information criterion scores
class BIC:
def __init__(self, data):
"""Bayesian Information Criterion."""
class AIC:
def __init__(self, data):
"""Akaike Information Criterion."""
# Gaussian scores
class BICGauss:
def __init__(self, data):
"""BIC for Gaussian data."""
class AICGauss:
def __init__(self, data):
"""AIC for Gaussian data."""class MmhcEstimator:
def __init__(self, data):
"""Max-Min Hill Climbing estimator."""
def estimate(self, significance_level=0.05):
"""Learn structure using MMHC algorithm."""
class SEMEstimator:
def __init__(self, data):
"""Structural Equation Model estimator."""
def estimate(self, method='2sls'):
"""
Estimate SEM parameters.
Parameters:
- method: estimation method ('2sls', 'fiml')
Returns:
SEM: Estimated structural equation model
"""
class IVEstimator:
def __init__(self, data):
"""Instrumental Variable estimator."""
def estimate(self, instrument, treatment, outcome):
"""Estimate causal effect using IV method."""
class ExpertInLoop:
def __init__(self, data):
"""Interactive structure learning with expert guidance."""
def estimate(self, expert_knowledge=None):
"""Learn structure with expert input."""Advanced and specialized structure learning methods.
class TreeSearch:
def __init__(self, data, use_cache=True):
"""
Tree-based structure search algorithm.
Parameters:
- data: pandas.DataFrame containing the data
- use_cache: whether to use caching for efficiency
"""
def estimate(self, start=None, max_indegree=None):
"""Estimate optimal tree structure."""
class ExhaustiveSearch:
def __init__(self, data, use_cache=True):
"""
Exhaustive search over all possible structures.
Parameters:
- data: pandas.DataFrame containing the data
- use_cache: whether to use caching for efficiency
"""
def estimate(self, scoring_method="bic", max_indegree=None):
"""Perform exhaustive structure search."""
class MirrorDescentEstimator:
def __init__(self, model, data):
"""
Mirror descent optimization for parameter estimation.
Parameters:
- model: model structure
- data: training data
"""
def get_parameters(self, n_jobs=1):
"""Estimate parameters using mirror descent."""
class ExpertKnowledge:
def __init__(self, data):
"""
Structure learning incorporating expert domain knowledge.
Parameters:
- data: pandas.DataFrame containing the data
"""
def estimate(self, must_have_edges=[], forbidden_edges=[],
must_not_have_edges=[]):
"""
Learn structure with expert constraints.
Parameters:
- must_have_edges: list of edges that must be present
- forbidden_edges: list of edges that are forbidden
- must_not_have_edges: alias for forbidden_edges
Returns:
Learned model structure
"""Extended scoring functions for different variable types and model assumptions.
class BICCondGauss:
def __init__(self, data):
"""BIC score for conditional Gaussian models."""
def score(self, model):
"""Compute BIC score for conditional Gaussian model."""
class AICCondGauss:
def __init__(self, data):
"""AIC score for conditional Gaussian models."""
def score(self, model):
"""Compute AIC score for conditional Gaussian model."""
class LogLikelihoodGauss:
def __init__(self, data):
"""Log-likelihood score for Gaussian models."""
def score(self, model):
"""Compute log-likelihood score for Gaussian model."""
class LogLikelihoodCondGauss:
def __init__(self, data):
"""Log-likelihood score for conditional Gaussian models."""
def score(self, model):
"""Compute log-likelihood score for conditional Gaussian model."""from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
import pandas as pd
# Assume we have data and a model structure
data = pd.DataFrame({
'A': [0, 1, 0, 1, 1],
'B': [1, 0, 1, 0, 1],
'C': [0, 1, 1, 0, 1]
})
model = DiscreteBayesianNetwork([('A', 'C'), ('B', 'C')])
# Learn parameters
estimator = MaximumLikelihoodEstimator(model, data)
cpds = estimator.get_parameters()
# Add learned CPDs to model
model.add_cpds(*cpds)from pgmpy.estimators import HillClimbSearch, PC
from pgmpy.estimators import BICScore
# Score-based structure learning
hc = HillClimbSearch(data)
best_model = hc.estimate()
# Constraint-based structure learning
pc = PC(data)
causal_model = pc.estimate(ci_test="chi_square", significance_level=0.01)
# Structure scoring
scoring_method = BICScore(data)
score = scoring_method.score(best_model)Install with Tessl CLI
npx tessl i tessl/pypi-pgmpy