0
# Clustering
1
2
Specialized classes for representing and manipulating clustering results from community detection algorithms. These objects provide comprehensive functionality for analyzing, comparing, and visualizing community structures.
3
4
## Capabilities
5
6
### Base Clustering Class
7
8
Core functionality for working with any type of clustering or partition of ordered sets.
9
10
```python { .api }
11
class Clustering:
12
def __init__(self, membership, n=None):
13
"""
14
Create clustering from membership vector.
15
16
Parameters:
17
- membership: list of int, cluster assignment for each element
18
- n: int, total number of elements (None for auto-detection)
19
"""
20
21
def __len__(self):
22
"""
23
Get number of clusters.
24
25
Returns:
26
int, cluster count
27
"""
28
29
def __getitem__(self, idx):
30
"""
31
Get members of specific cluster.
32
33
Parameters:
34
- idx: int, cluster index
35
36
Returns:
37
list of int, member indices in cluster
38
"""
39
40
def size(self, idx):
41
"""
42
Get size of specific cluster.
43
44
Parameters:
45
- idx: int, cluster index
46
47
Returns:
48
int, number of elements in cluster
49
"""
50
51
def sizes(self):
52
"""
53
Get sizes of all clusters.
54
55
Returns:
56
list of int, cluster sizes
57
"""
58
59
def summary(self, n=None):
60
"""
61
Get summary statistics of clustering.
62
63
Parameters:
64
- n: int, number of largest clusters to show
65
66
Returns:
67
str, formatted summary
68
"""
69
```
70
71
**Usage Examples:**
72
73
```python
74
import igraph as ig
75
76
# Create clustering from membership vector
77
membership = [0, 0, 0, 1, 1, 2, 2, 2, 2]
78
clustering = ig.Clustering(membership)
79
80
# Basic operations
81
print(f"Number of clusters: {len(clustering)}")
82
print(f"Cluster 0 members: {clustering[0]}")
83
print(f"Cluster sizes: {clustering.sizes()}")
84
print(f"Largest cluster size: {clustering.size(clustering.sizes().index(max(clustering.sizes())))}")
85
86
# Summary information
87
print(clustering.summary())
88
89
# Iterate over clusters
90
for i, cluster in enumerate(clustering):
91
print(f"Cluster {i}: {cluster}")
92
```
93
94
### Vertex Clustering
95
96
Graph-specific clustering with enhanced functionality for network community analysis.
97
98
```python { .api }
99
class VertexClustering(Clustering):
100
def __init__(self, graph, membership=None):
101
"""
102
Create vertex clustering for graph.
103
104
Parameters:
105
- graph: Graph object
106
- membership: list, community assignment (None for singleton clusters)
107
"""
108
109
@classmethod
110
def FromAttribute(cls, graph, attribute):
111
"""
112
Create clustering from vertex attribute.
113
114
Parameters:
115
- graph: Graph object
116
- attribute: str, vertex attribute name containing cluster IDs
117
118
Returns:
119
VertexClustering based on attribute values
120
"""
121
122
@property
123
def graph(self):
124
"""Get associated graph object."""
125
126
@property
127
def membership(self):
128
"""Get membership vector."""
129
130
def modularity(self, weights=None):
131
"""
132
Calculate modularity of clustering.
133
134
Parameters:
135
- weights: str/list, edge weights or attribute name
136
137
Returns:
138
float, modularity score (-0.5 to 1.0)
139
"""
140
141
def crossing(self):
142
"""
143
Identify edges crossing between communities.
144
145
Returns:
146
list of bool, True for edges crossing communities
147
"""
148
149
def subgraph(self, idx):
150
"""
151
Get induced subgraph for specific community.
152
153
Parameters:
154
- idx: int, community index
155
156
Returns:
157
Graph, subgraph containing only community members
158
"""
159
160
def subgraphs(self):
161
"""
162
Get all community subgraphs.
163
164
Returns:
165
list of Graph, subgraphs for each community
166
"""
167
168
def giant(self):
169
"""
170
Get largest community.
171
172
Returns:
173
list of int, vertices in largest community
174
"""
175
```
176
177
**Usage Examples:**
178
179
```python
180
# Create sample graph with communities
181
g = ig.Graph.Famous("Zachary") # Karate club network
182
183
# Community detection
184
leiden_comm = g.community_leiden()
185
print(f"Type: {type(leiden_comm)}") # VertexClustering
186
187
# Modularity analysis
188
modularity = leiden_comm.modularity()
189
print(f"Modularity: {modularity:.3f}")
190
191
# Weighted modularity (if edges have weights)
192
if "weight" in g.es.attributes():
193
weighted_mod = leiden_comm.modularity(weights="weight")
194
print(f"Weighted modularity: {weighted_mod:.3f}")
195
196
# Edge crossing analysis
197
crossing_edges = leiden_comm.crossing()
198
n_crossing = sum(crossing_edges)
199
print(f"Edges crossing communities: {n_crossing}/{g.ecount()}")
200
201
# Community subgraphs
202
subgraphs = leiden_comm.subgraphs()
203
for i, sg in enumerate(subgraphs):
204
print(f"Community {i}: {sg.vcount()} vertices, {sg.ecount()} edges")
205
206
# Largest community
207
giant_community = leiden_comm.giant()
208
print(f"Largest community has {len(giant_community)} vertices")
209
210
# Create clustering from vertex attribute
211
g.vs["type"] = ["A", "A", "B", "B", "B", "C", "C"] # Example attributes
212
attr_clustering = ig.VertexClustering.FromAttribute(g, "type")
213
print(f"Attribute-based clustering: {attr_clustering.membership}")
214
```
215
216
### Hierarchical Clustering (Dendrograms)
217
218
Tree-like structures representing hierarchical community organization.
219
220
```python { .api }
221
class Dendrogram:
222
def __init__(self, merges, n=None, nmerges=None):
223
"""
224
Create dendrogram from merge information.
225
226
Parameters:
227
- merges: list of tuples, merge steps (cluster1, cluster2)
228
- n: int, number of leaves
229
- nmerges: int, number of merges
230
"""
231
232
def format(self, format="newick"):
233
"""
234
Export dendrogram in standard format.
235
236
Parameters:
237
- format: str, output format ("newick" supported)
238
239
Returns:
240
str, formatted dendrogram
241
"""
242
243
def summary(self):
244
"""
245
Get ASCII art summary of dendrogram.
246
247
Returns:
248
str, tree visualization
249
"""
250
251
class VertexDendrogram(Dendrogram):
252
def __init__(self, graph, merges, modularity=None):
253
"""
254
Create vertex dendrogram for graph.
255
256
Parameters:
257
- graph: Graph object
258
- merges: merge steps
259
- modularity: list, modularity at each merge level
260
"""
261
262
def as_clustering(self, n=None):
263
"""
264
Convert dendrogram to flat clustering.
265
266
Parameters:
267
- n: int, number of clusters (None for optimal)
268
269
Returns:
270
VertexClustering at specified level
271
"""
272
273
@property
274
def optimal_count(self):
275
"""
276
Get optimal number of clusters (highest modularity).
277
278
Returns:
279
int, optimal cluster count
280
"""
281
```
282
283
**Usage Examples:**
284
285
```python
286
# Hierarchical community detection
287
walktrap_dendro = g.community_walktrap()
288
print(f"Type: {type(walktrap_dendro)}") # VertexDendrogram
289
290
# Optimal clustering
291
optimal_n = walktrap_dendro.optimal_count
292
print(f"Optimal number of clusters: {optimal_n}")
293
294
optimal_clustering = walktrap_dendro.as_clustering()
295
print(f"Optimal modularity: {optimal_clustering.modularity():.3f}")
296
297
# Explore different cut levels
298
for n_clusters in [2, 3, 4, 5]:
299
clustering = walktrap_dendro.as_clustering(n_clusters)
300
print(f"{n_clusters} clusters: modularity {clustering.modularity():.3f}")
301
302
# Export formats
303
newick_str = walktrap_dendro.format("newick")
304
print("Newick format:", newick_str[:100], "...")
305
306
# ASCII visualization
307
print(walktrap_dendro.summary())
308
309
# Fast greedy also returns dendrogram
310
fastgreedy_dendro = g.community_fastgreedy()
311
fg_optimal = fastgreedy_dendro.as_clustering()
312
```
313
314
### Community Covers
315
316
Handle overlapping communities where vertices can belong to multiple clusters.
317
318
```python { .api }
319
class Cover:
320
def __init__(self, n=0):
321
"""
322
Create empty cover structure.
323
324
Parameters:
325
- n: int, number of elements
326
"""
327
328
def add_cluster(self, cluster):
329
"""
330
Add cluster to cover.
331
332
Parameters:
333
- cluster: list, members of new cluster
334
335
Returns:
336
None
337
"""
338
339
def size(self, idx):
340
"""
341
Get size of specific cluster.
342
343
Parameters:
344
- idx: int, cluster index
345
346
Returns:
347
int, cluster size
348
"""
349
350
class VertexCover(Cover):
351
def __init__(self, graph, clusters=None):
352
"""
353
Create vertex cover for graph.
354
355
Parameters:
356
- graph: Graph object
357
- clusters: list of lists, cluster memberships
358
"""
359
360
@property
361
def graph(self):
362
"""Get associated graph."""
363
364
def subgraph(self, idx):
365
"""
366
Get subgraph for cluster (may include overlaps).
367
368
Parameters:
369
- idx: int, cluster index
370
371
Returns:
372
Graph, cluster subgraph
373
"""
374
```
375
376
**Usage Examples:**
377
378
```python
379
# Create overlapping communities manually
380
vertices = list(range(g.vcount()))
381
overlapping_clusters = [
382
[0, 1, 2, 5], # Cluster 0
383
[2, 3, 4, 5], # Cluster 1 (vertices 2,5 overlap)
384
[4, 6, 7, 8] # Cluster 2 (vertex 4 overlaps)
385
]
386
387
cover = ig.VertexCover(g, overlapping_clusters)
388
389
# Analyze overlaps
390
for i in range(len(overlapping_clusters)):
391
subgraph = cover.subgraph(i)
392
print(f"Cluster {i}: {cover.size(i)} vertices, {subgraph.ecount()} edges")
393
394
# Find overlapping vertices
395
all_vertices = set()
396
overlapping = set()
397
for cluster in overlapping_clusters:
398
for v in cluster:
399
if v in all_vertices:
400
overlapping.add(v)
401
all_vertices.add(v)
402
print(f"Overlapping vertices: {overlapping}")
403
```
404
405
### Cohesive Blocks
406
407
Specialized clustering for cohesive block analysis (k-core based communities).
408
409
```python { .api }
410
class CohesiveBlocks:
411
def __init__(self, graph, blocks=None, cohesion=None, parent=None):
412
"""
413
Create cohesive blocks structure.
414
415
Parameters:
416
- graph: Graph object
417
- blocks: list, block membership
418
- cohesion: list, cohesion values for blocks
419
- parent: list, parent blocks in hierarchy
420
"""
421
422
@property
423
def graph(self):
424
"""Get associated graph."""
425
426
def cohesion(self, idx=None):
427
"""
428
Get cohesion values.
429
430
Parameters:
431
- idx: int, specific block index (None for all)
432
433
Returns:
434
int/list, cohesion value(s)
435
"""
436
437
def hierarchy(self):
438
"""
439
Get block hierarchy information.
440
441
Returns:
442
Tree structure of cohesive blocks
443
"""
444
```
445
446
**Usage Examples:**
447
448
```python
449
# Cohesive blocks analysis
450
cohesive = g.cohesive_blocks()
451
print(f"Type: {type(cohesive)}") # CohesiveBlocks
452
453
# Block information
454
n_blocks = len(cohesive)
455
print(f"Number of cohesive blocks: {n_blocks}")
456
457
# Cohesion levels
458
cohesions = cohesive.cohesion()
459
print(f"Cohesion levels: {cohesions}")
460
461
# Hierarchy structure
462
hierarchy = cohesive.hierarchy()
463
print("Block hierarchy available")
464
```
465
466
### Clustering Comparison
467
468
Compare different clustering results to understand algorithmic differences.
469
470
```python { .api }
471
def compare_communities(comm1, comm2, method="vi", remove_none=False):
472
"""
473
Compare two clusterings.
474
475
Parameters:
476
- comm1, comm2: Clustering objects
477
- method: str, comparison method
478
("vi" - variation of information, "nmi" - normalized mutual information,
479
"split-join", "rand", "adjusted_rand")
480
- remove_none: bool, ignore unassigned elements
481
482
Returns:
483
float, similarity/distance measure
484
"""
485
486
def split_join_distance(comm1, comm2):
487
"""
488
Calculate split-join distance.
489
490
Parameters:
491
- comm1, comm2: Clustering objects
492
493
Returns:
494
int, split-join distance
495
"""
496
```
497
498
**Usage Examples:**
499
500
```python
501
from igraph import compare_communities, split_join_distance
502
503
# Generate different clusterings
504
leiden_comm = g.community_leiden(resolution=1.0)
505
louvain_comm = g.community_louvain()
506
walktrap_comm = g.community_walktrap().as_clustering()
507
infomap_comm = g.community_infomap()
508
509
clusterings = [
510
("Leiden", leiden_comm),
511
("Louvain", louvain_comm),
512
("Walktrap", walktrap_comm),
513
("Infomap", infomap_comm)
514
]
515
516
# Compare all pairs
517
print("Variation of Information (lower = more similar):")
518
for i, (name1, comm1) in enumerate(clusterings):
519
for j, (name2, comm2) in enumerate(clusterings):
520
if i < j: # Avoid duplicate comparisons
521
vi = compare_communities(comm1, comm2, method="vi")
522
nmi = compare_communities(comm1, comm2, method="nmi")
523
sj = split_join_distance(comm1, comm2)
524
print(f"{name1} vs {name2}: VI={vi:.3f}, NMI={nmi:.3f}, SJ={sj}")
525
526
# Consensus clustering
527
def consensus_clustering(clusterings, threshold=0.5):
528
"""Create consensus from multiple clusterings."""
529
n = len(clusterings[0].membership)
530
consensus_matrix = [[0 for _ in range(n)] for _ in range(n)]
531
532
# Count co-occurrences
533
for clustering in clusterings:
534
for i in range(n):
535
for j in range(n):
536
if clustering.membership[i] == clustering.membership[j]:
537
consensus_matrix[i][j] += 1
538
539
# Normalize
540
for i in range(n):
541
for j in range(n):
542
consensus_matrix[i][j] /= len(clusterings)
543
544
return consensus_matrix
545
546
# Create consensus
547
all_comms = [comm for _, comm in clusterings]
548
consensus = consensus_clustering(all_comms)
549
print("Consensus matrix computed")
550
```
551
552
### Clustering Evaluation
553
554
Assess clustering quality using various internal and external measures.
555
556
**Usage Examples:**
557
558
```python
559
def evaluate_clustering(graph, clustering):
560
"""Comprehensive clustering evaluation."""
561
results = {}
562
563
# Basic statistics
564
results["n_clusters"] = len(clustering)
565
results["sizes"] = clustering.sizes()
566
results["modularity"] = clustering.modularity()
567
568
# Size distribution
569
sizes = clustering.sizes()
570
results["size_mean"] = sum(sizes) / len(sizes)
571
results["size_std"] = (sum((s - results["size_mean"])**2 for s in sizes) / len(sizes))**0.5
572
results["size_max"] = max(sizes)
573
results["size_min"] = min(sizes)
574
575
# Coverage and crossing edges
576
crossing = clustering.crossing()
577
results["coverage"] = 1 - sum(crossing) / len(crossing)
578
results["internal_edges"] = len(crossing) - sum(crossing)
579
results["external_edges"] = sum(crossing)
580
581
# Conductance (for each cluster)
582
conductances = []
583
for i in range(len(clustering)):
584
cluster_vertices = clustering[i]
585
if len(cluster_vertices) > 0:
586
subgraph = clustering.subgraph(i)
587
internal_degree = sum(subgraph.degree())
588
589
# External degree
590
external_degree = 0
591
for v in cluster_vertices:
592
for neighbor in graph.neighbors(v):
593
if neighbor not in cluster_vertices:
594
external_degree += 1
595
596
total_degree = internal_degree + external_degree
597
conductance = external_degree / total_degree if total_degree > 0 else 0
598
conductances.append(conductance)
599
600
results["avg_conductance"] = sum(conductances) / len(conductances) if conductances else 0
601
602
return results
603
604
# Evaluate different algorithms
605
for name, comm in clusterings:
606
eval_results = evaluate_clustering(g, comm)
607
print(f"\\n{name}:")
608
print(f" Clusters: {eval_results['n_clusters']}")
609
print(f" Modularity: {eval_results['modularity']:.3f}")
610
print(f" Coverage: {eval_results['coverage']:.3f}")
611
print(f" Avg conductance: {eval_results['avg_conductance']:.3f}")
612
print(f" Size range: {eval_results['size_min']}-{eval_results['size_max']}")
613
614
# Stability analysis
615
def clustering_stability(graph, algorithm_func, n_trials=10):
616
"""Assess clustering stability across multiple runs."""
617
clusterings = []
618
619
for _ in range(n_trials):
620
clustering = algorithm_func()
621
clusterings.append(clustering)
622
623
# Compare all pairs
624
similarities = []
625
for i in range(len(clusterings)):
626
for j in range(i+1, len(clusterings)):
627
nmi = compare_communities(clusterings[i], clusterings[j], method="nmi")
628
similarities.append(nmi)
629
630
avg_similarity = sum(similarities) / len(similarities)
631
return avg_similarity, similarities
632
633
# Test stability
634
leiden_stability = clustering_stability(g, lambda: g.community_leiden())
635
print(f"\\nLeiden stability (NMI): {leiden_stability[0]:.3f}")
636
```