Tessl Tile for pypi/igraph@0.11.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

analysis.md clustering.md community.md data-types.md graph-creation.md graph-structure.md index.md io-formats.md layout.md sequences.md visualization.md

clustering.mddocs/

0
# Clustering
1

2
Specialized classes for representing and manipulating clustering results from community detection algorithms. These objects provide comprehensive functionality for analyzing, comparing, and visualizing community structures.
3

4
## Capabilities
5

6
### Base Clustering Class
7

8
Core functionality for working with any type of clustering or partition of ordered sets.
9

10
```python { .api }
11
class Clustering:
12
    def __init__(self, membership, n=None):
13
        """
14
        Create clustering from membership vector.
15
        
16
        Parameters:
17
        - membership: list of int, cluster assignment for each element
18
        - n: int, total number of elements (None for auto-detection)
19
        """
20

21
    def __len__(self):
22
        """
23
        Get number of clusters.
24
        
25
        Returns:
26
        int, cluster count
27
        """
28

29
    def __getitem__(self, idx):
30
        """
31
        Get members of specific cluster.
32
        
33
        Parameters:
34
        - idx: int, cluster index
35
        
36
        Returns:
37
        list of int, member indices in cluster
38
        """
39

40
    def size(self, idx):
41
        """
42
        Get size of specific cluster.
43
        
44
        Parameters:
45
        - idx: int, cluster index
46
        
47
        Returns:
48
        int, number of elements in cluster
49
        """
50

51
    def sizes(self):
52
        """
53
        Get sizes of all clusters.
54
        
55
        Returns:
56
        list of int, cluster sizes
57
        """
58

59
    def summary(self, n=None):
60
        """
61
        Get summary statistics of clustering.
62
        
63
        Parameters:
64
        - n: int, number of largest clusters to show
65
        
66
        Returns:
67
        str, formatted summary
68
        """
69
```
70

71
**Usage Examples:**
72

73
```python
74
import igraph as ig
75

76
# Create clustering from membership vector
77
membership = [0, 0, 0, 1, 1, 2, 2, 2, 2]
78
clustering = ig.Clustering(membership)
79

80
# Basic operations
81
print(f"Number of clusters: {len(clustering)}")
82
print(f"Cluster 0 members: {clustering[0]}")
83
print(f"Cluster sizes: {clustering.sizes()}")
84
print(f"Largest cluster size: {clustering.size(clustering.sizes().index(max(clustering.sizes())))}")
85

86
# Summary information
87
print(clustering.summary())
88

89
# Iterate over clusters
90
for i, cluster in enumerate(clustering):
91
    print(f"Cluster {i}: {cluster}")
92
```
93

94
### Vertex Clustering
95

96
Graph-specific clustering with enhanced functionality for network community analysis.
97

98
```python { .api }
99
class VertexClustering(Clustering):
100
    def __init__(self, graph, membership=None):
101
        """
102
        Create vertex clustering for graph.
103
        
104
        Parameters:
105
        - graph: Graph object
106
        - membership: list, community assignment (None for singleton clusters)
107
        """
108

109
    @classmethod
110
    def FromAttribute(cls, graph, attribute):
111
        """
112
        Create clustering from vertex attribute.
113
        
114
        Parameters:
115
        - graph: Graph object
116
        - attribute: str, vertex attribute name containing cluster IDs
117
        
118
        Returns:
119
        VertexClustering based on attribute values
120
        """
121

122
    @property
123
    def graph(self):
124
        """Get associated graph object."""
125

126
    @property
127
    def membership(self):
128
        """Get membership vector."""
129

130
    def modularity(self, weights=None):
131
        """
132
        Calculate modularity of clustering.
133
        
134
        Parameters:
135
        - weights: str/list, edge weights or attribute name
136
        
137
        Returns:
138
        float, modularity score (-0.5 to 1.0)
139
        """
140

141
    def crossing(self):
142
        """
143
        Identify edges crossing between communities.
144
        
145
        Returns:
146
        list of bool, True for edges crossing communities
147
        """
148

149
    def subgraph(self, idx):
150
        """
151
        Get induced subgraph for specific community.
152
        
153
        Parameters:
154
        - idx: int, community index
155
        
156
        Returns:
157
        Graph, subgraph containing only community members
158
        """
159

160
    def subgraphs(self):
161
        """
162
        Get all community subgraphs.
163
        
164
        Returns:
165
        list of Graph, subgraphs for each community
166
        """
167

168
    def giant(self):
169
        """
170
        Get largest community.
171
        
172
        Returns:
173
        list of int, vertices in largest community
174
        """
175
```
176

177
**Usage Examples:**
178

179
```python
180
# Create sample graph with communities
181
g = ig.Graph.Famous("Zachary")  # Karate club network
182

183
# Community detection  
184
leiden_comm = g.community_leiden()
185
print(f"Type: {type(leiden_comm)}")  # VertexClustering
186

187
# Modularity analysis
188
modularity = leiden_comm.modularity()
189
print(f"Modularity: {modularity:.3f}")
190

191
# Weighted modularity (if edges have weights)
192
if "weight" in g.es.attributes():
193
    weighted_mod = leiden_comm.modularity(weights="weight")
194
    print(f"Weighted modularity: {weighted_mod:.3f}")
195

196
# Edge crossing analysis
197
crossing_edges = leiden_comm.crossing()
198
n_crossing = sum(crossing_edges)
199
print(f"Edges crossing communities: {n_crossing}/{g.ecount()}")
200

201
# Community subgraphs
202
subgraphs = leiden_comm.subgraphs()
203
for i, sg in enumerate(subgraphs):
204
    print(f"Community {i}: {sg.vcount()} vertices, {sg.ecount()} edges")
205

206
# Largest community
207
giant_community = leiden_comm.giant()
208
print(f"Largest community has {len(giant_community)} vertices")
209

210
# Create clustering from vertex attribute
211
g.vs["type"] = ["A", "A", "B", "B", "B", "C", "C"]  # Example attributes
212
attr_clustering = ig.VertexClustering.FromAttribute(g, "type")
213
print(f"Attribute-based clustering: {attr_clustering.membership}")
214
```
215

216
### Hierarchical Clustering (Dendrograms)
217

218
Tree-like structures representing hierarchical community organization.
219

220
```python { .api }
221
class Dendrogram:
222
    def __init__(self, merges, n=None, nmerges=None):
223
        """
224
        Create dendrogram from merge information.
225
        
226
        Parameters:
227
        - merges: list of tuples, merge steps (cluster1, cluster2)
228
        - n: int, number of leaves
229
        - nmerges: int, number of merges
230
        """
231

232
    def format(self, format="newick"):
233
        """
234
        Export dendrogram in standard format.
235
        
236
        Parameters:
237
        - format: str, output format ("newick" supported)
238
        
239
        Returns:
240
        str, formatted dendrogram
241
        """
242

243
    def summary(self):
244
        """
245
        Get ASCII art summary of dendrogram.
246
        
247
        Returns:
248
        str, tree visualization
249
        """
250

251
class VertexDendrogram(Dendrogram):
252
    def __init__(self, graph, merges, modularity=None):
253
        """
254
        Create vertex dendrogram for graph.
255
        
256
        Parameters:
257
        - graph: Graph object
258
        - merges: merge steps
259
        - modularity: list, modularity at each merge level
260
        """
261

262
    def as_clustering(self, n=None):
263
        """
264
        Convert dendrogram to flat clustering.
265
        
266
        Parameters:
267
        - n: int, number of clusters (None for optimal)
268
        
269
        Returns:
270
        VertexClustering at specified level
271
        """
272

273
    @property
274
    def optimal_count(self):
275
        """
276
        Get optimal number of clusters (highest modularity).
277
        
278
        Returns:
279
        int, optimal cluster count
280
        """
281
```
282

283
**Usage Examples:**
284

285
```python
286
# Hierarchical community detection
287
walktrap_dendro = g.community_walktrap()
288
print(f"Type: {type(walktrap_dendro)}")  # VertexDendrogram
289

290
# Optimal clustering
291
optimal_n = walktrap_dendro.optimal_count
292
print(f"Optimal number of clusters: {optimal_n}")
293

294
optimal_clustering = walktrap_dendro.as_clustering()
295
print(f"Optimal modularity: {optimal_clustering.modularity():.3f}")
296

297
# Explore different cut levels
298
for n_clusters in [2, 3, 4, 5]:
299
    clustering = walktrap_dendro.as_clustering(n_clusters)
300
    print(f"{n_clusters} clusters: modularity {clustering.modularity():.3f}")
301

302
# Export formats
303
newick_str = walktrap_dendro.format("newick")
304
print("Newick format:", newick_str[:100], "...")
305

306
# ASCII visualization
307
print(walktrap_dendro.summary())
308

309
# Fast greedy also returns dendrogram
310
fastgreedy_dendro = g.community_fastgreedy()
311
fg_optimal = fastgreedy_dendro.as_clustering()
312
```
313

314
### Community Covers
315

316
Handle overlapping communities where vertices can belong to multiple clusters.
317

318
```python { .api }
319
class Cover:
320
    def __init__(self, n=0):
321
        """
322
        Create empty cover structure.
323
        
324
        Parameters:
325
        - n: int, number of elements
326
        """
327

328
    def add_cluster(self, cluster):
329
        """
330
        Add cluster to cover.
331
        
332
        Parameters:
333
        - cluster: list, members of new cluster
334
        
335
        Returns:
336
        None
337
        """
338

339
    def size(self, idx):
340
        """
341
        Get size of specific cluster.
342
        
343
        Parameters:
344
        - idx: int, cluster index
345
        
346
        Returns:
347
        int, cluster size
348
        """
349

350
class VertexCover(Cover):
351
    def __init__(self, graph, clusters=None):
352
        """
353
        Create vertex cover for graph.
354
        
355
        Parameters:
356
        - graph: Graph object
357
        - clusters: list of lists, cluster memberships
358
        """
359

360
    @property
361
    def graph(self):
362
        """Get associated graph."""
363

364
    def subgraph(self, idx):
365
        """
366
        Get subgraph for cluster (may include overlaps).
367
        
368
        Parameters:
369
        - idx: int, cluster index
370
        
371
        Returns:
372
        Graph, cluster subgraph
373
        """
374
```
375

376
**Usage Examples:**
377

378
```python
379
# Create overlapping communities manually
380
vertices = list(range(g.vcount()))
381
overlapping_clusters = [
382
    [0, 1, 2, 5],      # Cluster 0
383
    [2, 3, 4, 5],      # Cluster 1 (vertices 2,5 overlap)
384
    [4, 6, 7, 8]       # Cluster 2 (vertex 4 overlaps)
385
]
386

387
cover = ig.VertexCover(g, overlapping_clusters)
388

389
# Analyze overlaps
390
for i in range(len(overlapping_clusters)):
391
    subgraph = cover.subgraph(i)
392
    print(f"Cluster {i}: {cover.size(i)} vertices, {subgraph.ecount()} edges")
393

394
# Find overlapping vertices
395
all_vertices = set()
396
overlapping = set()
397
for cluster in overlapping_clusters:
398
    for v in cluster:
399
        if v in all_vertices:
400
            overlapping.add(v)
401
        all_vertices.add(v)
402
print(f"Overlapping vertices: {overlapping}")
403
```
404

405
### Cohesive Blocks
406

407
Specialized clustering for cohesive block analysis (k-core based communities).
408

409
```python { .api }
410
class CohesiveBlocks:
411
    def __init__(self, graph, blocks=None, cohesion=None, parent=None):
412
        """
413
        Create cohesive blocks structure.
414
        
415
        Parameters:
416
        - graph: Graph object
417
        - blocks: list, block membership
418
        - cohesion: list, cohesion values for blocks
419
        - parent: list, parent blocks in hierarchy
420
        """
421

422
    @property
423
    def graph(self):
424
        """Get associated graph."""
425

426
    def cohesion(self, idx=None):
427
        """
428
        Get cohesion values.
429
        
430
        Parameters:
431
        - idx: int, specific block index (None for all)
432
        
433
        Returns:
434
        int/list, cohesion value(s)
435
        """
436

437
    def hierarchy(self):
438
        """
439
        Get block hierarchy information.
440
        
441
        Returns:
442
        Tree structure of cohesive blocks
443
        """
444
```
445

446
**Usage Examples:**
447

448
```python
449
# Cohesive blocks analysis
450
cohesive = g.cohesive_blocks()
451
print(f"Type: {type(cohesive)}")  # CohesiveBlocks
452

453
# Block information
454
n_blocks = len(cohesive)
455
print(f"Number of cohesive blocks: {n_blocks}")
456

457
# Cohesion levels
458
cohesions = cohesive.cohesion()
459
print(f"Cohesion levels: {cohesions}")
460

461
# Hierarchy structure
462
hierarchy = cohesive.hierarchy()
463
print("Block hierarchy available")
464
```
465

466
### Clustering Comparison
467

468
Compare different clustering results to understand algorithmic differences.
469

470
```python { .api }
471
def compare_communities(comm1, comm2, method="vi", remove_none=False):
472
    """
473
    Compare two clusterings.
474
    
475
    Parameters:
476
    - comm1, comm2: Clustering objects
477
    - method: str, comparison method
478
      ("vi" - variation of information, "nmi" - normalized mutual information,
479
       "split-join", "rand", "adjusted_rand")
480
    - remove_none: bool, ignore unassigned elements
481
    
482
    Returns:
483
    float, similarity/distance measure
484
    """
485

486
def split_join_distance(comm1, comm2):
487
    """
488
    Calculate split-join distance.
489
    
490
    Parameters:
491
    - comm1, comm2: Clustering objects
492
    
493
    Returns:
494
    int, split-join distance
495
    """
496
```
497

498
**Usage Examples:**
499

500
```python
501
from igraph import compare_communities, split_join_distance
502

503
# Generate different clusterings
504
leiden_comm = g.community_leiden(resolution=1.0)
505
louvain_comm = g.community_louvain()
506
walktrap_comm = g.community_walktrap().as_clustering()
507
infomap_comm = g.community_infomap()
508

509
clusterings = [
510
    ("Leiden", leiden_comm),
511
    ("Louvain", louvain_comm), 
512
    ("Walktrap", walktrap_comm),
513
    ("Infomap", infomap_comm)
514
]
515

516
# Compare all pairs
517
print("Variation of Information (lower = more similar):")
518
for i, (name1, comm1) in enumerate(clusterings):
519
    for j, (name2, comm2) in enumerate(clusterings):
520
        if i < j:  # Avoid duplicate comparisons
521
            vi = compare_communities(comm1, comm2, method="vi")
522
            nmi = compare_communities(comm1, comm2, method="nmi")
523
            sj = split_join_distance(comm1, comm2)
524
            print(f"{name1} vs {name2}: VI={vi:.3f}, NMI={nmi:.3f}, SJ={sj}")
525

526
# Consensus clustering
527
def consensus_clustering(clusterings, threshold=0.5):
528
    """Create consensus from multiple clusterings."""
529
    n = len(clusterings[0].membership)
530
    consensus_matrix = [[0 for _ in range(n)] for _ in range(n)]
531
    
532
    # Count co-occurrences
533
    for clustering in clusterings:
534
        for i in range(n):
535
            for j in range(n):
536
                if clustering.membership[i] == clustering.membership[j]:
537
                    consensus_matrix[i][j] += 1
538
    
539
    # Normalize
540
    for i in range(n):
541
        for j in range(n):
542
            consensus_matrix[i][j] /= len(clusterings)
543
    
544
    return consensus_matrix
545

546
# Create consensus
547
all_comms = [comm for _, comm in clusterings]
548
consensus = consensus_clustering(all_comms)
549
print("Consensus matrix computed")
550
```
551

552
### Clustering Evaluation
553

554
Assess clustering quality using various internal and external measures.
555

556
**Usage Examples:**
557

558
```python
559
def evaluate_clustering(graph, clustering):
560
    """Comprehensive clustering evaluation."""
561
    results = {}
562
    
563
    # Basic statistics
564
    results["n_clusters"] = len(clustering)
565
    results["sizes"] = clustering.sizes()
566
    results["modularity"] = clustering.modularity()
567
    
568
    # Size distribution
569
    sizes = clustering.sizes()
570
    results["size_mean"] = sum(sizes) / len(sizes)
571
    results["size_std"] = (sum((s - results["size_mean"])**2 for s in sizes) / len(sizes))**0.5
572
    results["size_max"] = max(sizes)
573
    results["size_min"] = min(sizes)
574
    
575
    # Coverage and crossing edges
576
    crossing = clustering.crossing()
577
    results["coverage"] = 1 - sum(crossing) / len(crossing)
578
    results["internal_edges"] = len(crossing) - sum(crossing)
579
    results["external_edges"] = sum(crossing)
580
    
581
    # Conductance (for each cluster)
582
    conductances = []
583
    for i in range(len(clustering)):
584
        cluster_vertices = clustering[i]
585
        if len(cluster_vertices) > 0:
586
            subgraph = clustering.subgraph(i)
587
            internal_degree = sum(subgraph.degree())
588
            
589
            # External degree  
590
            external_degree = 0
591
            for v in cluster_vertices:
592
                for neighbor in graph.neighbors(v):
593
                    if neighbor not in cluster_vertices:
594
                        external_degree += 1
595
            
596
            total_degree = internal_degree + external_degree
597
            conductance = external_degree / total_degree if total_degree > 0 else 0
598
            conductances.append(conductance)
599
    
600
    results["avg_conductance"] = sum(conductances) / len(conductances) if conductances else 0
601
    
602
    return results
603

604
# Evaluate different algorithms
605
for name, comm in clusterings:
606
    eval_results = evaluate_clustering(g, comm)
607
    print(f"\\n{name}:")
608
    print(f"  Clusters: {eval_results['n_clusters']}")
609
    print(f"  Modularity: {eval_results['modularity']:.3f}")
610
    print(f"  Coverage: {eval_results['coverage']:.3f}")
611
    print(f"  Avg conductance: {eval_results['avg_conductance']:.3f}")
612
    print(f"  Size range: {eval_results['size_min']}-{eval_results['size_max']}")
613

614
# Stability analysis
615
def clustering_stability(graph, algorithm_func, n_trials=10):
616
    """Assess clustering stability across multiple runs."""
617
    clusterings = []
618
    
619
    for _ in range(n_trials):
620
        clustering = algorithm_func()
621
        clusterings.append(clustering)
622
    
623
    # Compare all pairs
624
    similarities = []
625
    for i in range(len(clusterings)):
626
        for j in range(i+1, len(clusterings)):
627
            nmi = compare_communities(clusterings[i], clusterings[j], method="nmi")
628
            similarities.append(nmi)
629
    
630
    avg_similarity = sum(similarities) / len(similarities)
631
    return avg_similarity, similarities
632

633
# Test stability
634
leiden_stability = clustering_stability(g, lambda: g.community_leiden())
635
print(f"\\nLeiden stability (NMI): {leiden_stability[0]:.3f}")
636
```

Version

Tile

Files

clustering.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

clustering.mddocs/