CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pgmpy

A library for Probabilistic Graphical Models

Pending
Overview
Eval results
Files

learning.mddocs/

Learning Algorithms

Algorithms for learning model structure from data and estimating parameters, including constraint-based, score-based, and hybrid approaches. pgmpy provides comprehensive learning capabilities for both structure discovery and parameter estimation.

Capabilities

Parameter Estimation

Maximum Likelihood Estimation

class MaximumLikelihoodEstimator:
    def __init__(self, model, data, **kwargs):
        """
        Maximum likelihood parameter estimation.
        
        Parameters:
        - model: DiscreteBayesianNetwork with structure
        - data: pandas.DataFrame with observed data
        """
    
    def get_parameters(self, n_jobs=1, weighted=False):
        """
        Estimate all model parameters.
        
        Parameters:
        - n_jobs: number of parallel jobs
        - weighted: whether to use weighted estimation
        
        Returns:
        list: List of estimated TabularCPDs
        """
    
    def estimate_cpd(self, node, weighted=False):
        """
        Estimate CPD for a single node.
        
        Parameters:
        - node: variable name
        - weighted: whether to use weighted estimation
        
        Returns:
        TabularCPD: Estimated conditional probability distribution
        """

Bayesian Parameter Estimation

class BayesianEstimator:
    def __init__(self, model, data, **kwargs):
        """Bayesian parameter estimation with priors."""
    
    def get_parameters(self, prior_type='BDeu', equivalent_sample_size=10):
        """
        Estimate parameters using Bayesian approach.
        
        Parameters:
        - prior_type: type of prior ('BDeu', 'K2', 'dirichlet')
        - equivalent_sample_size: strength of prior belief
        
        Returns:
        list: List of posterior CPDs
        """

Expectation-Maximization

class ExpectationMaximization:
    def __init__(self, model, data, **kwargs):
        """EM algorithm for incomplete data."""
    
    def get_parameters(self, max_iter=100, tol=1e-4):
        """
        Estimate parameters using EM algorithm.
        
        Parameters:
        - max_iter: maximum iterations
        - tol: convergence tolerance
        
        Returns:
        list: Estimated CPDs
        """

Structure Learning

Hill Climbing Search

class HillClimbSearch:
    def __init__(self, data, use_cache=True, **kwargs):
        """
        Hill climbing structure search algorithm.
        
        Parameters:
        - data: pandas.DataFrame with observed data
        - use_cache: whether to cache scoring computations
        """
    
    def estimate(self, start=None, tabu_length=0, max_indegree=None, 
                show_progress=True):
        """
        Learn structure using hill climbing.
        
        Parameters:
        - start: initial graph structure
        - tabu_length: length of tabu list
        - max_indegree: maximum parent set size
        - show_progress: whether to show progress
        
        Returns:
        DiscreteBayesianNetwork: Learned structure
        """

Constraint-Based Methods

class PC:
    def __init__(self, data):
        """
        PC algorithm for causal discovery.
        
        Parameters:
        - data: pandas.DataFrame with observed data
        """
    
    def estimate(self, variant="stable", ci_test="chi_square", 
                significance_level=0.05, show_progress=True):
        """
        Learn structure using PC algorithm.
        
        Parameters:
        - variant: PC variant ('orig', 'stable', 'parallel')
        - ci_test: conditional independence test
        - significance_level: significance threshold
        - show_progress: whether to show progress
        
        Returns:
        DiscreteBayesianNetwork: Learned causal structure
        """

Greedy Equivalence Search

class GES:
    def __init__(self, data):
        """Greedy Equivalence Search algorithm."""
    
    def estimate(self, scoring_method='bic', phase1=True, phase2=True):
        """
        Learn structure using GES.
        
        Parameters:
        - scoring_method: scoring function to use
        - phase1: whether to perform forward phase
        - phase2: whether to perform backward phase
        
        Returns:
        DiscreteBayesianNetwork: Learned structure
        """

Structure Scoring

class StructureScore:
    def __init__(self, data):
        """Base class for structure scoring methods."""
    
    def score(self, model):
        """
        Compute structure score.
        
        Parameters:
        - model: DiscreteBayesianNetwork to score
        
        Returns:
        float: Structure score
        """

# Bayesian scores
class K2:
    def __init__(self, data):
        """K2 score for structure evaluation."""

class BDeu:
    def __init__(self, data, equivalent_sample_size=10):
        """BDeu score with equivalent sample size."""

class BDs:
    def __init__(self, data):
        """BDs score for structure evaluation."""

# Information criterion scores  
class BIC:
    def __init__(self, data):
        """Bayesian Information Criterion."""

class AIC:
    def __init__(self, data):
        """Akaike Information Criterion."""

# Gaussian scores
class BICGauss:
    def __init__(self, data):
        """BIC for Gaussian data."""

class AICGauss:
    def __init__(self, data):
        """AIC for Gaussian data."""

Advanced Learning Methods

class MmhcEstimator:
    def __init__(self, data):
        """Max-Min Hill Climbing estimator."""
    
    def estimate(self, significance_level=0.05):
        """Learn structure using MMHC algorithm."""

class SEMEstimator:
    def __init__(self, data):
        """Structural Equation Model estimator."""
    
    def estimate(self, method='2sls'):
        """
        Estimate SEM parameters.
        
        Parameters:
        - method: estimation method ('2sls', 'fiml')
        
        Returns:
        SEM: Estimated structural equation model
        """

class IVEstimator:
    def __init__(self, data):
        """Instrumental Variable estimator."""
    
    def estimate(self, instrument, treatment, outcome):
        """Estimate causal effect using IV method."""

class ExpertInLoop:
    def __init__(self, data):
        """Interactive structure learning with expert guidance."""
    
    def estimate(self, expert_knowledge=None):
        """Learn structure with expert input."""

Additional Structure Learning Algorithms

Advanced and specialized structure learning methods.

class TreeSearch:
    def __init__(self, data, use_cache=True):
        """
        Tree-based structure search algorithm.
        
        Parameters:
        - data: pandas.DataFrame containing the data
        - use_cache: whether to use caching for efficiency
        """
    
    def estimate(self, start=None, max_indegree=None):
        """Estimate optimal tree structure."""

class ExhaustiveSearch:
    def __init__(self, data, use_cache=True):
        """
        Exhaustive search over all possible structures.
        
        Parameters:
        - data: pandas.DataFrame containing the data  
        - use_cache: whether to use caching for efficiency
        """
    
    def estimate(self, scoring_method="bic", max_indegree=None):
        """Perform exhaustive structure search."""

class MirrorDescentEstimator:
    def __init__(self, model, data):
        """
        Mirror descent optimization for parameter estimation.
        
        Parameters:
        - model: model structure
        - data: training data
        """
    
    def get_parameters(self, n_jobs=1):
        """Estimate parameters using mirror descent."""

class ExpertKnowledge:
    def __init__(self, data):
        """
        Structure learning incorporating expert domain knowledge.
        
        Parameters:
        - data: pandas.DataFrame containing the data
        """
    
    def estimate(self, must_have_edges=[], forbidden_edges=[], 
                 must_not_have_edges=[]):
        """
        Learn structure with expert constraints.
        
        Parameters:
        - must_have_edges: list of edges that must be present
        - forbidden_edges: list of edges that are forbidden
        - must_not_have_edges: alias for forbidden_edges
        
        Returns:
        Learned model structure
        """

Additional Scoring Methods

Extended scoring functions for different variable types and model assumptions.

class BICCondGauss:
    def __init__(self, data):
        """BIC score for conditional Gaussian models."""
    
    def score(self, model):
        """Compute BIC score for conditional Gaussian model."""

class AICCondGauss:
    def __init__(self, data):
        """AIC score for conditional Gaussian models."""
    
    def score(self, model):
        """Compute AIC score for conditional Gaussian model."""

class LogLikelihoodGauss:
    def __init__(self, data):
        """Log-likelihood score for Gaussian models."""
    
    def score(self, model):
        """Compute log-likelihood score for Gaussian model."""

class LogLikelihoodCondGauss:
    def __init__(self, data):
        """Log-likelihood score for conditional Gaussian models."""
    
    def score(self, model):
        """Compute log-likelihood score for conditional Gaussian model."""

Usage Examples

Parameter Learning

from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
import pandas as pd

# Assume we have data and a model structure
data = pd.DataFrame({
    'A': [0, 1, 0, 1, 1],
    'B': [1, 0, 1, 0, 1], 
    'C': [0, 1, 1, 0, 1]
})

model = DiscreteBayesianNetwork([('A', 'C'), ('B', 'C')])

# Learn parameters
estimator = MaximumLikelihoodEstimator(model, data)
cpds = estimator.get_parameters()

# Add learned CPDs to model
model.add_cpds(*cpds)

Structure Learning

from pgmpy.estimators import HillClimbSearch, PC
from pgmpy.estimators import BICScore

# Score-based structure learning
hc = HillClimbSearch(data)
best_model = hc.estimate()

# Constraint-based structure learning  
pc = PC(data)
causal_model = pc.estimate(ci_test="chi_square", significance_level=0.01)

# Structure scoring
scoring_method = BICScore(data)
score = scoring_method.score(best_model)

Install with Tessl CLI

npx tessl i tessl/pypi-pgmpy

docs

data-io.md

evaluation.md

factors.md

index.md

inference.md

learning.md

models.md

tile.json