0
# Models and Graph Structures
1
2
Core model classes for creating and managing probabilistic graphical models. pgmpy provides various model types to represent different kinds of probabilistic relationships and graph structures.
3
4
## Capabilities
5
6
### Discrete Bayesian Networks
7
8
The primary class for working with discrete Bayesian networks - directed acyclic graphs where each node represents a random variable and edges encode conditional dependencies.
9
10
```python { .api }
11
class DiscreteBayesianNetwork:
12
def __init__(self, ebunch=None, latents=set(), lavaan_str=None, dagitty_str=None):
13
"""
14
Create a discrete Bayesian network.
15
16
Parameters:
17
- ebunch: list of edges as tuples
18
- latents: set of latent variable names
19
- lavaan_str: lavaan model string
20
- dagitty_str: dagitty model string
21
"""
22
23
def add_edge(self, u, v, **kwargs):
24
"""Add directed edge from u to v."""
25
26
def remove_node(self, node):
27
"""Remove node and all associated CPDs."""
28
29
def add_cpds(self, *cpds):
30
"""Add conditional probability distributions to the model."""
31
32
def get_cpds(self, node=None):
33
"""Get CPDs for specified node or all CPDs if node is None."""
34
35
def remove_cpds(self, *cpds):
36
"""Remove specified CPDs from the model."""
37
38
def get_cardinality(self, node=None):
39
"""Get cardinality (number of states) for node or all nodes."""
40
41
def check_model(self):
42
"""
43
Validate model consistency.
44
45
Returns:
46
bool: True if model is valid
47
48
Raises:
49
ValueError: If model has issues
50
"""
51
52
def fit_update(self, data, n_prev_samples=None, n_jobs=1):
53
"""Update model parameters with new data."""
54
55
def predict(self, data, variables=None, n_jobs=1, show_progress=True):
56
"""
57
Predict missing values in data.
58
59
Parameters:
60
- data: pandas.DataFrame with missing values
61
- variables: list of variables to predict
62
- n_jobs: number of parallel jobs
63
- show_progress: whether to show progress bar
64
65
Returns:
66
pandas.DataFrame: Data with predicted values
67
"""
68
69
def predict_probability(self, data):
70
"""Predict probabilities for missing values."""
71
72
def get_state_probability(self, states):
73
"""Get probability of specified variable states."""
74
75
def to_markov_model(self):
76
"""Convert to equivalent Markov network."""
77
78
def to_junction_tree(self):
79
"""Create junction tree for exact inference."""
80
81
def simulate(self, n_samples, do=None, evidence=None, show_progress=True):
82
"""
83
Generate samples from the model.
84
85
Parameters:
86
- n_samples: number of samples to generate
87
- do: dict of interventions {variable: value}
88
- evidence: dict of evidence {variable: value}
89
- show_progress: whether to show progress bar
90
91
Returns:
92
pandas.DataFrame: Generated samples
93
"""
94
95
def save(self, filename, filetype="bif"):
96
"""Save model to file in specified format."""
97
98
@staticmethod
99
def load(filename, filetype="bif", **kwargs):
100
"""Load model from file."""
101
102
def copy(self):
103
"""Create deep copy of the model."""
104
105
def get_markov_blanket(self, node):
106
"""Get Markov blanket of specified node."""
107
108
def do(self, nodes, inplace=False):
109
"""
110
Perform do-calculus intervention.
111
112
Parameters:
113
- nodes: dict of interventions {variable: value}
114
- inplace: whether to modify model in-place
115
116
Returns:
117
DiscreteBayesianNetwork: Model with interventions applied
118
"""
119
```
120
121
### Markov Networks
122
123
Undirected graphical models representing joint probability distributions through factor graphs.
124
125
```python { .api }
126
class MarkovNetwork:
127
def __init__(self, ebunch=None, latents=[]):
128
"""
129
Create a Markov network (undirected graphical model).
130
131
Parameters:
132
- ebunch: list of edges as tuples
133
- latents: list of latent variable names
134
"""
135
136
def add_edge(self, u, v, **kwargs):
137
"""Add undirected edge between u and v."""
138
139
def add_factors(self, *factors):
140
"""Add factors (clique potentials) to the model."""
141
142
def get_factors(self, factor=None):
143
"""Get specified factor or all factors."""
144
145
def remove_factors(self, *factors):
146
"""Remove factors from the model."""
147
148
def check_model(self):
149
"""Validate model consistency."""
150
151
def copy(self):
152
"""Create deep copy of the model."""
153
154
def to_factor_graph(self):
155
"""Convert to equivalent factor graph."""
156
157
def to_junction_tree(self):
158
"""Create junction tree for exact inference."""
159
```
160
161
### Factor Graphs
162
163
Bipartite graphs explicitly representing factorization of probability distributions.
164
165
```python { .api }
166
class FactorGraph:
167
def __init__(self, ebunch=None):
168
"""
169
Create a factor graph.
170
171
Parameters:
172
- ebunch: list of edges between variables and factors
173
"""
174
175
def add_edges_from(self, ebunch):
176
"""Add multiple edges from edge list."""
177
178
def add_factors(self, *factors):
179
"""Add factor nodes to the graph."""
180
181
def get_factors(self, factor=None):
182
"""Get specified factor or all factors."""
183
184
def check_model(self):
185
"""Validate factor graph consistency."""
186
187
def to_markov_model(self):
188
"""Convert to equivalent Markov network."""
189
```
190
191
### Junction Trees
192
193
Tree structures used for exact inference in graphical models.
194
195
```python { .api }
196
class JunctionTree:
197
def __init__(self, ebunch=None):
198
"""
199
Create a junction tree.
200
201
Parameters:
202
- ebunch: list of tree edges
203
"""
204
205
def add_edge(self, u, v, **kwargs):
206
"""Add edge between cliques."""
207
208
def add_factors(self, *factors):
209
"""Add factors to cliques."""
210
211
def check_model(self):
212
"""Validate junction tree properties."""
213
214
def copy(self):
215
"""Create deep copy of the junction tree."""
216
```
217
218
### Dynamic Bayesian Networks
219
220
Time-series models representing temporal probabilistic relationships.
221
222
```python { .api }
223
class DynamicBayesianNetwork:
224
def __init__(self, ebunch=None, latents=[]):
225
"""
226
Create a Dynamic Bayesian Network for time-series modeling.
227
228
Parameters:
229
- ebunch: list of edges including temporal connections
230
- latents: list of latent variable names
231
"""
232
233
def add_edge(self, u, v, **kwargs):
234
"""Add edge, supporting temporal connections."""
235
236
def add_cpds(self, *cpds):
237
"""Add CPDs including temporal dependencies."""
238
239
def initialize_initial_state(self, state_names, value):
240
"""Set initial state distribution."""
241
242
def check_model(self):
243
"""Validate DBN structure and parameters."""
244
245
def simulate(self, n_samples, n_time_slices=1):
246
"""Generate temporal samples from the DBN."""
247
```
248
249
### Specialized Models
250
251
Additional model types for specific use cases.
252
253
```python { .api }
254
class NaiveBayes:
255
def __init__(self):
256
"""Create a Naive Bayes classifier model."""
257
258
def fit(self, X, y):
259
"""Fit the model to training data."""
260
261
def predict(self, X):
262
"""Predict class labels for test data."""
263
264
def predict_proba(self, X):
265
"""Predict class probabilities for test data."""
266
267
class LinearGaussianBayesianNetwork:
268
def __init__(self, ebunch=None):
269
"""Create a Linear Gaussian Bayesian Network."""
270
271
def add_cpds(self, *cpds):
272
"""Add Linear Gaussian CPDs."""
273
274
def simulate(self, n_samples):
275
"""Generate samples from the continuous model."""
276
277
class MarkovChain:
278
def __init__(self, variables=None, card=None, start_state=None):
279
"""
280
Create a Markov Chain model.
281
282
Parameters:
283
- variables: list of variable names
284
- card: list of cardinalities
285
- start_state: initial state distribution
286
"""
287
288
def add_variable(self, variable, card):
289
"""Add variable to the chain."""
290
291
def add_transition_model(self, transition_model):
292
"""Add transition probability matrix."""
293
294
def sample(self, start_state=None, size=1):
295
"""Generate samples from the Markov chain."""
296
297
# Structural Equation Models
298
class SEMGraph:
299
def __init__(self, ebunch=None, latents=[]):
300
"""Create a Structural Equation Model graph."""
301
302
class SEMAlg:
303
def __init__(self, model):
304
"""SEM algorithm implementation."""
305
306
class SEM:
307
def __init__(self, ebunch=None, latents=[]):
308
"""Complete Structural Equation Model."""
309
```
310
311
### Graph Base Classes
312
313
Fundamental graph structures used by all model types.
314
315
```python { .api }
316
class DAG:
317
def __init__(self, ebunch=None):
318
"""
319
Directed Acyclic Graph base class.
320
321
Parameters:
322
- ebunch: list of directed edges
323
"""
324
325
def add_edge(self, u, v, **kwargs):
326
"""Add directed edge ensuring acyclicity."""
327
328
def is_dag(self):
329
"""Check if graph is a valid DAG."""
330
331
def topological_sort(self):
332
"""Return topological ordering of nodes."""
333
334
def get_roots(self):
335
"""Get nodes with no parents."""
336
337
def get_leaves(self):
338
"""Get nodes with no children."""
339
340
class PDAG:
341
def __init__(self, ebunch=None):
342
"""Partially Directed Acyclic Graph."""
343
344
def add_edge(self, u, v, edge_type='directed'):
345
"""Add edge with specified type (directed/undirected)."""
346
347
class UndirectedGraph:
348
def __init__(self, ebunch=None):
349
"""Undirected graph base class."""
350
351
def add_edge(self, u, v, **kwargs):
352
"""Add undirected edge."""
353
354
def is_connected(self):
355
"""Check if graph is connected."""
356
```
357
358
### Markov Networks
359
360
Undirected graphical models representing joint probability distributions through clique potentials.
361
362
```python { .api }
363
class MarkovNetwork:
364
def __init__(self, ebunch=None, latents=[]):
365
"""
366
Create a Markov Network (undirected graphical model).
367
368
Parameters:
369
- ebunch: input graph data (edge list or NetworkX graph)
370
- latents: list of latent variables
371
"""
372
373
def add_edge(self, u, v, **kwargs):
374
"""Add undirected edge between u and v."""
375
376
def add_factors(self, *factors):
377
"""Associate factors (clique potentials) to the graph."""
378
379
def get_factors(self, node=None):
380
"""Returns factors containing the node or all factors."""
381
382
def remove_factors(self, *factors):
383
"""Removes given factors from the model."""
384
385
def get_cardinality(self, node=None):
386
"""Returns cardinality of node or all variables."""
387
388
def check_model(self):
389
"""Check the model for various errors."""
390
391
def to_factor_graph(self):
392
"""Converts the Markov Model into Factor Graph."""
393
394
def triangulate(self, heuristic="H6", order=None, inplace=False):
395
"""Triangulate the graph."""
396
397
def to_junction_tree(self):
398
"""Creates a junction tree for the markov model."""
399
400
def markov_blanket(self, node):
401
"""Returns markov blanket for a random variable."""
402
403
def to_bayesian_model(self):
404
"""Creates a Bayesian Model which is minimum I-Map."""
405
406
def get_partition_function(self):
407
"""Returns partition function for the graph."""
408
```
409
410
### Factor Graphs
411
412
Bipartite graphs representing factorization of functions with variable and factor nodes.
413
414
```python { .api }
415
class FactorGraph:
416
def __init__(self, ebunch=None):
417
"""
418
Create a Factor Graph (bipartite graph).
419
420
Parameters:
421
- ebunch: input graph data (edge list)
422
"""
423
424
def add_edge(self, u, v, **kwargs):
425
"""Add edge between variable_node and factor_node."""
426
427
def add_factors(self, *factors, replace=False):
428
"""Associate factors to the graph."""
429
430
def remove_factors(self, *factors):
431
"""Removes given factors."""
432
433
def get_variable_nodes(self):
434
"""Returns variable nodes in the graph."""
435
436
def get_factor_nodes(self):
437
"""Returns factor nodes in the graph."""
438
439
def to_markov_model(self):
440
"""Converts factor graph into markov model."""
441
442
def to_junction_tree(self):
443
"""Create junction tree for the factor graph."""
444
445
def get_partition_function(self):
446
"""Returns partition function."""
447
```
448
449
### Junction Trees and Cluster Graphs
450
451
Tree structures for efficient exact inference in probabilistic graphical models.
452
453
```python { .api }
454
class JunctionTree:
455
def __init__(self, ebunch=None):
456
"""
457
Create a Junction Tree.
458
459
Parameters:
460
- ebunch: input graph data (edge list)
461
"""
462
463
def add_edge(self, u, v, **kwargs):
464
"""Add edge between two clique nodes."""
465
466
def check_model(self):
467
"""Check the model for various errors."""
468
469
class ClusterGraph:
470
def __init__(self, ebunch=None):
471
"""
472
Create a Cluster Graph.
473
474
Parameters:
475
- ebunch: input graph data (edge list)
476
"""
477
478
def add_node(self, node, **kwargs):
479
"""Add single node (clique) to the cluster graph."""
480
481
def add_nodes_from(self, nodes, **kwargs):
482
"""Add multiple nodes."""
483
484
def add_edge(self, u, v, **kwargs):
485
"""Add edge between two clique nodes."""
486
487
def add_factors(self, *factors):
488
"""Associate factors to the graph."""
489
490
def get_factors(self, node=None):
491
"""Return factors for node or all factors."""
492
493
def remove_factors(self, *factors):
494
"""Removes given factors."""
495
496
def get_partition_function(self):
497
"""Returns partition function."""
498
```
499
500
### Dynamic and Temporal Models
501
502
Models for time-series and temporal probabilistic relationships.
503
504
```python { .api }
505
class DynamicBayesianNetwork:
506
def __init__(self, ebunch=None):
507
"""
508
Create a Dynamic Bayesian Network for temporal modeling.
509
510
Parameters:
511
- ebunch: input graph data (edge list)
512
"""
513
514
def add_edge(self, start, end, **kwargs):
515
"""Add edge between two nodes with time slices."""
516
517
def get_intra_edges(self, time_slice=0):
518
"""Returns intra slice edges."""
519
520
def get_inter_edges(self):
521
"""Returns inter-slice edges."""
522
523
def get_interface_nodes(self, time_slice=0):
524
"""Returns interface nodes."""
525
526
def get_slice_nodes(self, time_slice=0):
527
"""Returns nodes in particular timeslice."""
528
529
def add_cpds(self, *cpds):
530
"""Add CPDs to the network."""
531
532
def get_cpds(self, node=None, time_slice=None):
533
"""Returns CPDs for node/timeslice."""
534
535
def initialize_initial_state(self):
536
"""Re-adjust CPDs and edges."""
537
538
def get_constant_bn(self, t_slice=0):
539
"""Returns normal Bayesian Network object."""
540
541
def simulate(self, n_samples=10, n_time_slices=2, do=None, evidence=None):
542
"""Simulates time-series data."""
543
544
class MarkovChain:
545
def __init__(self, variables=None, card=None, start_state=None):
546
"""
547
Create a Markov Chain with multiple kernels.
548
549
Parameters:
550
- variables: list of variables of the model
551
- card: list of cardinalities of the variables
552
- start_state: list of tuples representing starting states
553
"""
554
555
def set_start_state(self, start_state):
556
"""Set the start state of the Markov Chain."""
557
558
def add_variable(self, variable, card=0):
559
"""Add a variable to the model."""
560
561
def add_variables_from(self, variables, cards):
562
"""Add several variables at once."""
563
564
def add_transition_model(self, variable, transition_model):
565
"""Adds transition model for variable."""
566
567
def sample(self, start_state=None, size=1, seed=None):
568
"""Sample from the Markov Chain."""
569
570
def generate_sample(self, start_state=None, size=1, seed=None):
571
"""Generator version of sample."""
572
573
def is_stationarity(self, tolerance=0.2, sample=None):
574
"""Check if chain is stationary."""
575
```
576
577
### Specialized Bayesian Networks
578
579
Specialized network types for specific use cases and variable types.
580
581
```python { .api }
582
class NaiveBayes:
583
def __init__(self, feature_vars=None, dependent_var=None):
584
"""
585
Create a Naive Bayes model.
586
587
Parameters:
588
- feature_vars: list of predictor variables (features)
589
- dependent_var: dependent variable to be predicted
590
"""
591
592
def add_edge(self, u, v, *kwargs):
593
"""Add edge between dependent and feature variable."""
594
595
def add_edges_from(self, ebunch):
596
"""Add edges to the model."""
597
598
def fit(self, data, parent_node=None, estimator=None):
599
"""Computes CPDs from data."""
600
601
class LinearGaussianBayesianNetwork:
602
def __init__(self, ebunch=None, latents=set(), lavaan_str=None, dagitty_str=None):
603
"""
604
Create a Linear Gaussian Bayesian Network.
605
606
Parameters:
607
- ebunch: input graph data
608
- latents: set of latent variables
609
- lavaan_str: lavaan syntax string
610
- dagitty_str: dagitty syntax string
611
"""
612
613
def add_cpds(self, *cpds):
614
"""Add linear Gaussian CPDs to the network."""
615
616
def get_cpds(self, node=None):
617
"""Returns CPD of node or all CPDs."""
618
619
def remove_cpds(self, *cpds):
620
"""Removes given CPDs."""
621
622
def to_joint_gaussian(self):
623
"""Returns mean and covariance of joint gaussian distribution."""
624
625
def simulate(self, n=1000, seed=None):
626
"""Simulates data from the model."""
627
628
def fit(self, data, method="mle"):
629
"""Estimates parameters from data."""
630
631
def predict(self, data, distribution="joint"):
632
"""Predicts missing variables."""
633
634
class FunctionalBayesianNetwork:
635
def __init__(self, ebunch=None, latents=set(), lavaan_str=None, dagitty_str=None):
636
"""
637
Create a Functional Bayesian Network using Pyro distributions.
638
639
Parameters:
640
- ebunch: list of edges to build the network
641
- latents: set of latent variables
642
- lavaan_str: lavaan syntax string
643
- dagitty_str: dagitty syntax string
644
"""
645
646
def add_cpds(self, *cpds):
647
"""Adds FunctionalCPDs to the network."""
648
649
def get_cpds(self, node=None):
650
"""Returns CPD of node or all CPDs."""
651
652
def remove_cpds(self, *cpds):
653
"""Removes given CPDs."""
654
655
def simulate(self, n_samples=1000, seed=None):
656
"""Simulate samples from the model."""
657
658
def fit(self, data, method="SVI", optimizer=None, prior_fn=None):
659
"""Fit model using Pyro's SVI or MCMC."""
660
```
661
662
## Usage Examples
663
664
### Creating a Simple Bayesian Network
665
666
```python
667
from pgmpy.models import DiscreteBayesianNetwork
668
from pgmpy.factors.discrete import TabularCPD
669
670
# Create network structure
671
model = DiscreteBayesianNetwork([('Rain', 'Sprinkler'),
672
('Rain', 'Wet_Grass'),
673
('Sprinkler', 'Wet_Grass')])
674
675
# Define CPDs
676
cpd_rain = TabularCPD(variable='Rain', variable_card=2,
677
values=[[0.8], [0.2]])
678
679
cpd_sprinkler = TabularCPD(variable='Sprinkler', variable_card=2,
680
values=[[0.9, 0.1], [0.1, 0.9]],
681
evidence=['Rain'], evidence_card=[2])
682
683
# Add CPDs and validate
684
model.add_cpds(cpd_rain, cpd_sprinkler)
685
assert model.check_model()
686
```
687
688
### Working with Markov Networks
689
690
```python
691
from pgmpy.models import MarkovNetwork
692
from pgmpy.factors.discrete import DiscreteFactor
693
694
# Create undirected model
695
model = MarkovNetwork([('A', 'B'), ('B', 'C'), ('C', 'A')])
696
697
# Add factors (clique potentials)
698
factor_ab = DiscreteFactor(['A', 'B'], [2, 2], [1, 2, 3, 4])
699
factor_bc = DiscreteFactor(['B', 'C'], [2, 2], [2, 1, 1, 2])
700
701
model.add_factors(factor_ab, factor_bc)
702
assert model.check_model()
703
```