or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

analysis.mdclustering.mdcommunity.mddata-types.mdgraph-creation.mdgraph-structure.mdindex.mdio-formats.mdlayout.mdsequences.mdvisualization.md

clustering.mddocs/

0

# Clustering

1

2

Specialized classes for representing and manipulating clustering results from community detection algorithms. These objects provide comprehensive functionality for analyzing, comparing, and visualizing community structures.

3

4

## Capabilities

5

6

### Base Clustering Class

7

8

Core functionality for working with any type of clustering or partition of ordered sets.

9

10

```python { .api }

11

class Clustering:

12

def __init__(self, membership, n=None):

13

"""

14

Create clustering from membership vector.

15

16

Parameters:

17

- membership: list of int, cluster assignment for each element

18

- n: int, total number of elements (None for auto-detection)

19

"""

20

21

def __len__(self):

22

"""

23

Get number of clusters.

24

25

Returns:

26

int, cluster count

27

"""

28

29

def __getitem__(self, idx):

30

"""

31

Get members of specific cluster.

32

33

Parameters:

34

- idx: int, cluster index

35

36

Returns:

37

list of int, member indices in cluster

38

"""

39

40

def size(self, idx):

41

"""

42

Get size of specific cluster.

43

44

Parameters:

45

- idx: int, cluster index

46

47

Returns:

48

int, number of elements in cluster

49

"""

50

51

def sizes(self):

52

"""

53

Get sizes of all clusters.

54

55

Returns:

56

list of int, cluster sizes

57

"""

58

59

def summary(self, n=None):

60

"""

61

Get summary statistics of clustering.

62

63

Parameters:

64

- n: int, number of largest clusters to show

65

66

Returns:

67

str, formatted summary

68

"""

69

```

70

71

**Usage Examples:**

72

73

```python

74

import igraph as ig

75

76

# Create clustering from membership vector

77

membership = [0, 0, 0, 1, 1, 2, 2, 2, 2]

78

clustering = ig.Clustering(membership)

79

80

# Basic operations

81

print(f"Number of clusters: {len(clustering)}")

82

print(f"Cluster 0 members: {clustering[0]}")

83

print(f"Cluster sizes: {clustering.sizes()}")

84

print(f"Largest cluster size: {clustering.size(clustering.sizes().index(max(clustering.sizes())))}")

85

86

# Summary information

87

print(clustering.summary())

88

89

# Iterate over clusters

90

for i, cluster in enumerate(clustering):

91

print(f"Cluster {i}: {cluster}")

92

```

93

94

### Vertex Clustering

95

96

Graph-specific clustering with enhanced functionality for network community analysis.

97

98

```python { .api }

99

class VertexClustering(Clustering):

100

def __init__(self, graph, membership=None):

101

"""

102

Create vertex clustering for graph.

103

104

Parameters:

105

- graph: Graph object

106

- membership: list, community assignment (None for singleton clusters)

107

"""

108

109

@classmethod

110

def FromAttribute(cls, graph, attribute):

111

"""

112

Create clustering from vertex attribute.

113

114

Parameters:

115

- graph: Graph object

116

- attribute: str, vertex attribute name containing cluster IDs

117

118

Returns:

119

VertexClustering based on attribute values

120

"""

121

122

@property

123

def graph(self):

124

"""Get associated graph object."""

125

126

@property

127

def membership(self):

128

"""Get membership vector."""

129

130

def modularity(self, weights=None):

131

"""

132

Calculate modularity of clustering.

133

134

Parameters:

135

- weights: str/list, edge weights or attribute name

136

137

Returns:

138

float, modularity score (-0.5 to 1.0)

139

"""

140

141

def crossing(self):

142

"""

143

Identify edges crossing between communities.

144

145

Returns:

146

list of bool, True for edges crossing communities

147

"""

148

149

def subgraph(self, idx):

150

"""

151

Get induced subgraph for specific community.

152

153

Parameters:

154

- idx: int, community index

155

156

Returns:

157

Graph, subgraph containing only community members

158

"""

159

160

def subgraphs(self):

161

"""

162

Get all community subgraphs.

163

164

Returns:

165

list of Graph, subgraphs for each community

166

"""

167

168

def giant(self):

169

"""

170

Get largest community.

171

172

Returns:

173

list of int, vertices in largest community

174

"""

175

```

176

177

**Usage Examples:**

178

179

```python

180

# Create sample graph with communities

181

g = ig.Graph.Famous("Zachary") # Karate club network

182

183

# Community detection

184

leiden_comm = g.community_leiden()

185

print(f"Type: {type(leiden_comm)}") # VertexClustering

186

187

# Modularity analysis

188

modularity = leiden_comm.modularity()

189

print(f"Modularity: {modularity:.3f}")

190

191

# Weighted modularity (if edges have weights)

192

if "weight" in g.es.attributes():

193

weighted_mod = leiden_comm.modularity(weights="weight")

194

print(f"Weighted modularity: {weighted_mod:.3f}")

195

196

# Edge crossing analysis

197

crossing_edges = leiden_comm.crossing()

198

n_crossing = sum(crossing_edges)

199

print(f"Edges crossing communities: {n_crossing}/{g.ecount()}")

200

201

# Community subgraphs

202

subgraphs = leiden_comm.subgraphs()

203

for i, sg in enumerate(subgraphs):

204

print(f"Community {i}: {sg.vcount()} vertices, {sg.ecount()} edges")

205

206

# Largest community

207

giant_community = leiden_comm.giant()

208

print(f"Largest community has {len(giant_community)} vertices")

209

210

# Create clustering from vertex attribute

211

g.vs["type"] = ["A", "A", "B", "B", "B", "C", "C"] # Example attributes

212

attr_clustering = ig.VertexClustering.FromAttribute(g, "type")

213

print(f"Attribute-based clustering: {attr_clustering.membership}")

214

```

215

216

### Hierarchical Clustering (Dendrograms)

217

218

Tree-like structures representing hierarchical community organization.

219

220

```python { .api }

221

class Dendrogram:

222

def __init__(self, merges, n=None, nmerges=None):

223

"""

224

Create dendrogram from merge information.

225

226

Parameters:

227

- merges: list of tuples, merge steps (cluster1, cluster2)

228

- n: int, number of leaves

229

- nmerges: int, number of merges

230

"""

231

232

def format(self, format="newick"):

233

"""

234

Export dendrogram in standard format.

235

236

Parameters:

237

- format: str, output format ("newick" supported)

238

239

Returns:

240

str, formatted dendrogram

241

"""

242

243

def summary(self):

244

"""

245

Get ASCII art summary of dendrogram.

246

247

Returns:

248

str, tree visualization

249

"""

250

251

class VertexDendrogram(Dendrogram):

252

def __init__(self, graph, merges, modularity=None):

253

"""

254

Create vertex dendrogram for graph.

255

256

Parameters:

257

- graph: Graph object

258

- merges: merge steps

259

- modularity: list, modularity at each merge level

260

"""

261

262

def as_clustering(self, n=None):

263

"""

264

Convert dendrogram to flat clustering.

265

266

Parameters:

267

- n: int, number of clusters (None for optimal)

268

269

Returns:

270

VertexClustering at specified level

271

"""

272

273

@property

274

def optimal_count(self):

275

"""

276

Get optimal number of clusters (highest modularity).

277

278

Returns:

279

int, optimal cluster count

280

"""

281

```

282

283

**Usage Examples:**

284

285

```python

286

# Hierarchical community detection

287

walktrap_dendro = g.community_walktrap()

288

print(f"Type: {type(walktrap_dendro)}") # VertexDendrogram

289

290

# Optimal clustering

291

optimal_n = walktrap_dendro.optimal_count

292

print(f"Optimal number of clusters: {optimal_n}")

293

294

optimal_clustering = walktrap_dendro.as_clustering()

295

print(f"Optimal modularity: {optimal_clustering.modularity():.3f}")

296

297

# Explore different cut levels

298

for n_clusters in [2, 3, 4, 5]:

299

clustering = walktrap_dendro.as_clustering(n_clusters)

300

print(f"{n_clusters} clusters: modularity {clustering.modularity():.3f}")

301

302

# Export formats

303

newick_str = walktrap_dendro.format("newick")

304

print("Newick format:", newick_str[:100], "...")

305

306

# ASCII visualization

307

print(walktrap_dendro.summary())

308

309

# Fast greedy also returns dendrogram

310

fastgreedy_dendro = g.community_fastgreedy()

311

fg_optimal = fastgreedy_dendro.as_clustering()

312

```

313

314

### Community Covers

315

316

Handle overlapping communities where vertices can belong to multiple clusters.

317

318

```python { .api }

319

class Cover:

320

def __init__(self, n=0):

321

"""

322

Create empty cover structure.

323

324

Parameters:

325

- n: int, number of elements

326

"""

327

328

def add_cluster(self, cluster):

329

"""

330

Add cluster to cover.

331

332

Parameters:

333

- cluster: list, members of new cluster

334

335

Returns:

336

None

337

"""

338

339

def size(self, idx):

340

"""

341

Get size of specific cluster.

342

343

Parameters:

344

- idx: int, cluster index

345

346

Returns:

347

int, cluster size

348

"""

349

350

class VertexCover(Cover):

351

def __init__(self, graph, clusters=None):

352

"""

353

Create vertex cover for graph.

354

355

Parameters:

356

- graph: Graph object

357

- clusters: list of lists, cluster memberships

358

"""

359

360

@property

361

def graph(self):

362

"""Get associated graph."""

363

364

def subgraph(self, idx):

365

"""

366

Get subgraph for cluster (may include overlaps).

367

368

Parameters:

369

- idx: int, cluster index

370

371

Returns:

372

Graph, cluster subgraph

373

"""

374

```

375

376

**Usage Examples:**

377

378

```python

379

# Create overlapping communities manually

380

vertices = list(range(g.vcount()))

381

overlapping_clusters = [

382

[0, 1, 2, 5], # Cluster 0

383

[2, 3, 4, 5], # Cluster 1 (vertices 2,5 overlap)

384

[4, 6, 7, 8] # Cluster 2 (vertex 4 overlaps)

385

]

386

387

cover = ig.VertexCover(g, overlapping_clusters)

388

389

# Analyze overlaps

390

for i in range(len(overlapping_clusters)):

391

subgraph = cover.subgraph(i)

392

print(f"Cluster {i}: {cover.size(i)} vertices, {subgraph.ecount()} edges")

393

394

# Find overlapping vertices

395

all_vertices = set()

396

overlapping = set()

397

for cluster in overlapping_clusters:

398

for v in cluster:

399

if v in all_vertices:

400

overlapping.add(v)

401

all_vertices.add(v)

402

print(f"Overlapping vertices: {overlapping}")

403

```

404

405

### Cohesive Blocks

406

407

Specialized clustering for cohesive block analysis (k-core based communities).

408

409

```python { .api }

410

class CohesiveBlocks:

411

def __init__(self, graph, blocks=None, cohesion=None, parent=None):

412

"""

413

Create cohesive blocks structure.

414

415

Parameters:

416

- graph: Graph object

417

- blocks: list, block membership

418

- cohesion: list, cohesion values for blocks

419

- parent: list, parent blocks in hierarchy

420

"""

421

422

@property

423

def graph(self):

424

"""Get associated graph."""

425

426

def cohesion(self, idx=None):

427

"""

428

Get cohesion values.

429

430

Parameters:

431

- idx: int, specific block index (None for all)

432

433

Returns:

434

int/list, cohesion value(s)

435

"""

436

437

def hierarchy(self):

438

"""

439

Get block hierarchy information.

440

441

Returns:

442

Tree structure of cohesive blocks

443

"""

444

```

445

446

**Usage Examples:**

447

448

```python

449

# Cohesive blocks analysis

450

cohesive = g.cohesive_blocks()

451

print(f"Type: {type(cohesive)}") # CohesiveBlocks

452

453

# Block information

454

n_blocks = len(cohesive)

455

print(f"Number of cohesive blocks: {n_blocks}")

456

457

# Cohesion levels

458

cohesions = cohesive.cohesion()

459

print(f"Cohesion levels: {cohesions}")

460

461

# Hierarchy structure

462

hierarchy = cohesive.hierarchy()

463

print("Block hierarchy available")

464

```

465

466

### Clustering Comparison

467

468

Compare different clustering results to understand algorithmic differences.

469

470

```python { .api }

471

def compare_communities(comm1, comm2, method="vi", remove_none=False):

472

"""

473

Compare two clusterings.

474

475

Parameters:

476

- comm1, comm2: Clustering objects

477

- method: str, comparison method

478

("vi" - variation of information, "nmi" - normalized mutual information,

479

"split-join", "rand", "adjusted_rand")

480

- remove_none: bool, ignore unassigned elements

481

482

Returns:

483

float, similarity/distance measure

484

"""

485

486

def split_join_distance(comm1, comm2):

487

"""

488

Calculate split-join distance.

489

490

Parameters:

491

- comm1, comm2: Clustering objects

492

493

Returns:

494

int, split-join distance

495

"""

496

```

497

498

**Usage Examples:**

499

500

```python

501

from igraph import compare_communities, split_join_distance

502

503

# Generate different clusterings

504

leiden_comm = g.community_leiden(resolution=1.0)

505

louvain_comm = g.community_louvain()

506

walktrap_comm = g.community_walktrap().as_clustering()

507

infomap_comm = g.community_infomap()

508

509

clusterings = [

510

("Leiden", leiden_comm),

511

("Louvain", louvain_comm),

512

("Walktrap", walktrap_comm),

513

("Infomap", infomap_comm)

514

]

515

516

# Compare all pairs

517

print("Variation of Information (lower = more similar):")

518

for i, (name1, comm1) in enumerate(clusterings):

519

for j, (name2, comm2) in enumerate(clusterings):

520

if i < j: # Avoid duplicate comparisons

521

vi = compare_communities(comm1, comm2, method="vi")

522

nmi = compare_communities(comm1, comm2, method="nmi")

523

sj = split_join_distance(comm1, comm2)

524

print(f"{name1} vs {name2}: VI={vi:.3f}, NMI={nmi:.3f}, SJ={sj}")

525

526

# Consensus clustering

527

def consensus_clustering(clusterings, threshold=0.5):

528

"""Create consensus from multiple clusterings."""

529

n = len(clusterings[0].membership)

530

consensus_matrix = [[0 for _ in range(n)] for _ in range(n)]

531

532

# Count co-occurrences

533

for clustering in clusterings:

534

for i in range(n):

535

for j in range(n):

536

if clustering.membership[i] == clustering.membership[j]:

537

consensus_matrix[i][j] += 1

538

539

# Normalize

540

for i in range(n):

541

for j in range(n):

542

consensus_matrix[i][j] /= len(clusterings)

543

544

return consensus_matrix

545

546

# Create consensus

547

all_comms = [comm for _, comm in clusterings]

548

consensus = consensus_clustering(all_comms)

549

print("Consensus matrix computed")

550

```

551

552

### Clustering Evaluation

553

554

Assess clustering quality using various internal and external measures.

555

556

**Usage Examples:**

557

558

```python

559

def evaluate_clustering(graph, clustering):

560

"""Comprehensive clustering evaluation."""

561

results = {}

562

563

# Basic statistics

564

results["n_clusters"] = len(clustering)

565

results["sizes"] = clustering.sizes()

566

results["modularity"] = clustering.modularity()

567

568

# Size distribution

569

sizes = clustering.sizes()

570

results["size_mean"] = sum(sizes) / len(sizes)

571

results["size_std"] = (sum((s - results["size_mean"])**2 for s in sizes) / len(sizes))**0.5

572

results["size_max"] = max(sizes)

573

results["size_min"] = min(sizes)

574

575

# Coverage and crossing edges

576

crossing = clustering.crossing()

577

results["coverage"] = 1 - sum(crossing) / len(crossing)

578

results["internal_edges"] = len(crossing) - sum(crossing)

579

results["external_edges"] = sum(crossing)

580

581

# Conductance (for each cluster)

582

conductances = []

583

for i in range(len(clustering)):

584

cluster_vertices = clustering[i]

585

if len(cluster_vertices) > 0:

586

subgraph = clustering.subgraph(i)

587

internal_degree = sum(subgraph.degree())

588

589

# External degree

590

external_degree = 0

591

for v in cluster_vertices:

592

for neighbor in graph.neighbors(v):

593

if neighbor not in cluster_vertices:

594

external_degree += 1

595

596

total_degree = internal_degree + external_degree

597

conductance = external_degree / total_degree if total_degree > 0 else 0

598

conductances.append(conductance)

599

600

results["avg_conductance"] = sum(conductances) / len(conductances) if conductances else 0

601

602

return results

603

604

# Evaluate different algorithms

605

for name, comm in clusterings:

606

eval_results = evaluate_clustering(g, comm)

607

print(f"\\n{name}:")

608

print(f" Clusters: {eval_results['n_clusters']}")

609

print(f" Modularity: {eval_results['modularity']:.3f}")

610

print(f" Coverage: {eval_results['coverage']:.3f}")

611

print(f" Avg conductance: {eval_results['avg_conductance']:.3f}")

612

print(f" Size range: {eval_results['size_min']}-{eval_results['size_max']}")

613

614

# Stability analysis

615

def clustering_stability(graph, algorithm_func, n_trials=10):

616

"""Assess clustering stability across multiple runs."""

617

clusterings = []

618

619

for _ in range(n_trials):

620

clustering = algorithm_func()

621

clusterings.append(clustering)

622

623

# Compare all pairs

624

similarities = []

625

for i in range(len(clusterings)):

626

for j in range(i+1, len(clusterings)):

627

nmi = compare_communities(clusterings[i], clusterings[j], method="nmi")

628

similarities.append(nmi)

629

630

avg_similarity = sum(similarities) / len(similarities)

631

return avg_similarity, similarities

632

633

# Test stability

634

leiden_stability = clustering_stability(g, lambda: g.community_leiden())

635

print(f"\\nLeiden stability (NMI): {leiden_stability[0]:.3f}")

636

```