or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

index.mdnode-operations.mdtree-analysis.mdtree-io.mdtree-manipulation.mdtree-traversal.mdvisualization.md

tree-analysis.mddocs/

0

# Tree Analysis and Metrics

1

2

TreeSwift provides comprehensive tree analysis including distance calculations, balance indices, phylogenetic statistics, and coalescence analysis. These methods support both basic tree metrics and advanced phylogenetic measures used in evolutionary biology and comparative studies.

3

4

## Capabilities

5

6

### Distance Calculations

7

8

Compute pairwise distances and tree-wide distance metrics.

9

10

```python { .api }

11

def distance_between(self, u: Node, v: Node) -> float:

12

"""

13

Calculate distance between two nodes.

14

15

Parameters:

16

- u (Node): First node

17

- v (Node): Second node

18

19

Returns:

20

- float: Distance between nodes u and v

21

"""

22

23

def distance_matrix(self, leaf_labels: bool = False) -> dict:

24

"""

25

Compute pairwise distance matrix of all leaves.

26

27

Parameters:

28

- leaf_labels (bool): Use leaf labels as keys instead of Node objects

29

30

Returns:

31

- dict: 2D dictionary with distances between all leaf pairs

32

"""

33

34

def distances_from_root(self, leaves: bool = True, internal: bool = True, unlabeled: bool = False, weighted: bool = True) -> Generator[tuple[Node, float], None, None]:

35

"""

36

Generate distances from root to selected nodes.

37

38

Parameters:

39

- leaves (bool): Include leaf nodes

40

- internal (bool): Include internal nodes

41

- unlabeled (bool): Include nodes without labels

42

- weighted (bool): Use edge lengths (False for node counts)

43

44

Yields:

45

- tuple[Node, float]: (node, distance_from_root) pairs

46

"""

47

48

def distances_from_parent(self, leaves: bool = True, internal: bool = True, unlabeled: bool = False) -> Generator[tuple[Node, float], None, None]:

49

"""

50

Generate distances from each node to its parent.

51

52

Parameters:

53

- leaves (bool): Include leaf nodes

54

- internal (bool): Include internal nodes

55

- unlabeled (bool): Include nodes without labels

56

57

Yields:

58

- tuple[Node, float]: (node, distance_to_parent) pairs

59

"""

60

```

61

62

Usage examples:

63

64

```python

65

import treeswift

66

67

tree = treeswift.read_tree_newick("((A:0.1,B:0.2):0.3,(C:0.4,D:0.5):0.6);")

68

69

# Distance between specific nodes

70

nodeA = None

71

nodeB = None

72

for node in tree.traverse_leaves():

73

if node.get_label() == "A":

74

nodeA = node

75

elif node.get_label() == "B":

76

nodeB = node

77

78

if nodeA and nodeB:

79

dist = tree.distance_between(nodeA, nodeB)

80

print(f"Distance A-B: {dist}")

81

82

# Complete distance matrix

83

dist_matrix = tree.distance_matrix(leaf_labels=True)

84

print("Distance matrix:")

85

for taxon1 in ["A", "B", "C", "D"]:

86

for taxon2 in ["A", "B", "C", "D"]:

87

if taxon1 in dist_matrix and taxon2 in dist_matrix[taxon1]:

88

print(f" {taxon1}-{taxon2}: {dist_matrix[taxon1][taxon2]:.3f}")

89

90

# Distances from root

91

print("Distances from root:")

92

for node, distance in tree.distances_from_root():

93

label = node.get_label() or "internal"

94

print(f" {label}: {distance:.3f}")

95

```

96

97

### Tree Dimensions and Extremes

98

99

Calculate tree height, diameter, and find extreme nodes.

100

101

```python { .api }

102

def height(self, weighted: bool = True) -> float:

103

"""

104

Compute tree height (maximum distance from root to leaf).

105

106

Parameters:

107

- weighted (bool): Use edge lengths (False for node counts)

108

109

Returns:

110

- float: Maximum distance from root to any leaf

111

"""

112

113

def diameter(self) -> float:

114

"""

115

Compute tree diameter (maximum pairwise leaf distance).

116

117

Returns:

118

- float: Maximum distance between any two leaves

119

"""

120

121

def closest_leaf_to_root(self) -> tuple[Node, float]:

122

"""

123

Find leaf closest to root.

124

125

Returns:

126

- tuple[Node, float]: (closest_leaf, distance) pair

127

"""

128

129

def furthest_from_root(self) -> tuple[Node, float]:

130

"""

131

Find node furthest from root.

132

133

Returns:

134

- tuple[Node, float]: (furthest_node, distance) pair

135

"""

136

```

137

138

Usage examples:

139

140

```python

141

import treeswift

142

143

tree = treeswift.read_tree_newick("((A:0.1,B:0.4):0.2,(C:0.3,D:0.1):0.5);")

144

145

# Basic tree dimensions

146

print(f"Tree height: {tree.height():.3f}")

147

print(f"Tree diameter: {tree.diameter():.3f}")

148

149

# Find extreme nodes

150

closest_leaf, closest_dist = tree.closest_leaf_to_root()

151

print(f"Closest leaf to root: {closest_leaf.get_label()} (distance: {closest_dist:.3f})")

152

153

furthest_node, furthest_dist = tree.furthest_from_root()

154

furthest_label = furthest_node.get_label() or "internal"

155

print(f"Furthest node from root: {furthest_label} (distance: {furthest_dist:.3f})")

156

157

# Compare weighted vs unweighted height

158

print(f"Weighted height: {tree.height(weighted=True):.3f}")

159

print(f"Unweighted height (node count): {tree.height(weighted=False)}")

160

```

161

162

### Tree Balance Indices

163

164

Quantify tree balance using standard phylogenetic indices.

165

166

```python { .api }

167

def colless(self, normalize: str = 'leaves') -> float:

168

"""

169

Compute Colless balance index.

170

171

Parameters:

172

- normalize (str): Normalization method ('leaves', 'yule', 'pda', or None)

173

174

Returns:

175

- float: Colless index (lower values = more balanced)

176

"""

177

178

def sackin(self, normalize: str = 'leaves') -> float:

179

"""

180

Compute Sackin balance index.

181

182

Parameters:

183

- normalize (str): Normalization method ('leaves', 'yule', 'pda', or None)

184

185

Returns:

186

- float: Sackin index (lower values = more balanced)

187

"""

188

```

189

190

Usage examples:

191

192

```python

193

import treeswift

194

195

# Compare balanced vs unbalanced trees

196

balanced_tree = treeswift.read_tree_newick("((A,B),(C,D));")

197

unbalanced_tree = treeswift.read_tree_newick("(((A,B),C),D);")

198

199

print("Balanced tree:")

200

print(f" Colless: {balanced_tree.colless():.3f}")

201

print(f" Sackin: {balanced_tree.sackin():.3f}")

202

203

print("Unbalanced tree:")

204

print(f" Colless: {unbalanced_tree.colless():.3f}")

205

print(f" Sackin: {unbalanced_tree.sackin():.3f}")

206

207

# Different normalization methods

208

tree = treeswift.read_tree_newick("(((A,B),C),(D,E));")

209

print("Normalization methods:")

210

for norm in [None, 'leaves', 'yule', 'pda']:

211

colless_val = tree.colless(normalize=norm)

212

print(f" Colless ({norm}): {colless_val:.3f}")

213

```

214

215

### Phylogenetic Statistics

216

217

Calculate statistics specific to phylogenetic trees.

218

219

```python { .api }

220

def gamma_statistic(self) -> float:

221

"""

222

Compute Gamma statistic of Pybus and Harvey (2000).

223

224

Returns:

225

- float: Gamma statistic (negative = early branching, positive = late branching)

226

"""

227

228

def treeness(self) -> float:

229

"""

230

Compute treeness (proportion of total tree length in internal branches).

231

232

Returns:

233

- float: Ratio of internal branch length sum to total branch length sum

234

"""

235

236

def num_cherries(self) -> int:

237

"""

238

Count cherries (internal nodes with only leaf children).

239

240

Returns:

241

- int: Number of cherries in the tree

242

"""

243

```

244

245

Usage examples:

246

247

```python

248

import treeswift

249

250

# Phylogenetic statistics

251

tree = treeswift.read_tree_newick("((A:0.1,B:0.1):0.5,(C:0.2,D:0.2):0.3);")

252

253

gamma = tree.gamma_statistic()

254

print(f"Gamma statistic: {gamma:.3f}")

255

if gamma < 0:

256

print(" Early diversification pattern")

257

elif gamma > 0:

258

print(" Late diversification pattern")

259

else:

260

print(" Constant rate diversification")

261

262

treeness = tree.treeness()

263

print(f"Treeness: {treeness:.3f} ({treeness*100:.1f}% internal branches)")

264

265

cherries = tree.num_cherries()

266

print(f"Number of cherries: {cherries}")

267

```

268

269

### Coalescence Analysis

270

271

Analyze coalescence patterns and lineage dynamics.

272

273

```python { .api }

274

def coalescence_times(self, backward: bool = True) -> Generator[float, None, None]:

275

"""

276

Generate coalescence event times.

277

278

Parameters:

279

- backward (bool): Times going backward from present (True) or forward from root (False)

280

281

Yields:

282

- float: Times of successive coalescence events

283

"""

284

285

def coalescence_waiting_times(self, backward: bool = True) -> Generator[float, None, None]:

286

"""

287

Generate waiting times between coalescence events.

288

289

Parameters:

290

- backward (bool): Going backward from present (True) or forward from root (False)

291

292

Yields:

293

- float: Waiting times between successive coalescence events

294

"""

295

296

def num_lineages_at(self, distance: float) -> int:

297

"""

298

Count lineages at specified distance from root.

299

300

Parameters:

301

- distance (float): Distance from root

302

303

Returns:

304

- int: Number of lineages existing at given distance

305

"""

306

```

307

308

Usage examples:

309

310

```python

311

import treeswift

312

313

tree = treeswift.read_tree_newick("((A:0.1,B:0.1):0.3,(C:0.2,D:0.2):0.2);")

314

315

# Coalescence times

316

print("Coalescence times (backward from present):")

317

for i, time in enumerate(tree.coalescence_times(backward=True)):

318

print(f" Event {i+1}: {time:.3f}")

319

320

# Waiting times between events

321

print("Waiting times between coalescence events:")

322

for i, wait_time in enumerate(tree.coalescence_waiting_times()):

323

print(f" Interval {i+1}: {wait_time:.3f}")

324

325

# Lineage count through time

326

print("Lineages at different distances from root:")

327

for dist in [0.0, 0.1, 0.2, 0.3, 0.4]:

328

count = tree.num_lineages_at(dist)

329

print(f" Distance {dist}: {count} lineages")

330

```

331

332

### Branch Length Analysis

333

334

Analyze patterns in branch lengths across the tree.

335

336

```python { .api }

337

def avg_branch_length(self, terminal: bool = True, internal: bool = True) -> float:

338

"""

339

Compute average length of selected branches.

340

341

Parameters:

342

- terminal (bool): Include terminal branches

343

- internal (bool): Include internal branches

344

345

Returns:

346

- float: Average branch length

347

"""

348

349

def branch_lengths(self, terminal: bool = True, internal: bool = True) -> Generator[float, None, None]:

350

"""

351

Generate branch lengths of selected branches.

352

353

Parameters:

354

- terminal (bool): Include terminal branches

355

- internal (bool): Include internal branches

356

357

Yields:

358

- float: Branch lengths (None edges yield 0)

359

"""

360

361

def edge_length_sum(self, terminal: bool = True, internal: bool = True) -> float:

362

"""

363

Sum all selected edge lengths.

364

365

Parameters:

366

- terminal (bool): Include terminal branches

367

- internal (bool): Include internal branches

368

369

Returns:

370

- float: Total length of selected branches

371

"""

372

```

373

374

Usage examples:

375

376

```python

377

import treeswift

378

379

tree = treeswift.read_tree_newick("((A:0.1,B:0.3):0.2,(C:0.4,D:0.1):0.5);")

380

381

# Branch length statistics

382

total_length = tree.edge_length_sum()

383

avg_length = tree.avg_branch_length()

384

print(f"Total tree length: {total_length:.3f}")

385

print(f"Average branch length: {avg_length:.3f}")

386

387

# Separate terminal vs internal branches

388

term_avg = tree.avg_branch_length(internal=False)

389

int_avg = tree.avg_branch_length(terminal=False)

390

print(f"Average terminal branch: {term_avg:.3f}")

391

print(f"Average internal branch: {int_avg:.3f}")

392

393

# Collect all branch lengths

394

all_lengths = list(tree.branch_lengths())

395

print(f"All branch lengths: {[round(x, 3) for x in all_lengths]}")

396

print(f"Range: {min(all_lengths):.3f} - {max(all_lengths):.3f}")

397

```

398

399

### Node Search and Utility Methods

400

401

Find nodes by labels and work with node collections in the tree.

402

403

```python { .api }

404

def find_node(self, label: object, leaves: bool = True, internal: bool = False) -> Node | list[Node] | None:

405

"""

406

Find node(s) with specified label.

407

408

Parameters:

409

- label (object): Label to search for

410

- leaves (bool): Include leaf nodes in search

411

- internal (bool): Include internal nodes in search

412

413

Returns:

414

- Node: Single node if only one found

415

- list[Node]: List of nodes if multiple found

416

- None: If no nodes found

417

"""

418

419

def label_to_node(self, selection: str | set = 'leaves') -> dict:

420

"""

421

Return dictionary mapping labels to Node objects.

422

423

Parameters:

424

- selection (str | set): Node selection - 'leaves', 'internal', 'all', or set of labels

425

426

Returns:

427

- dict: Dictionary mapping labels to Node objects

428

"""

429

430

def labels(self, leaves: bool = True, internal: bool = True) -> Generator[object, None, None]:

431

"""

432

Generate non-None node labels.

433

434

Parameters:

435

- leaves (bool): Include leaf node labels

436

- internal (bool): Include internal node labels

437

438

Yields:

439

- object: Node labels (non-None only)

440

"""

441

```

442

443

Usage examples:

444

445

```python

446

import treeswift

447

448

tree = treeswift.read_tree_newick("((A:0.1,B:0.2):0.3,(C:0.4,D:0.5):0.6);")

449

450

# Find specific nodes

451

node_a = tree.find_node("A")

452

print(f"Found node A: {node_a.get_label()}")

453

454

# Find nodes that might not exist

455

node_x = tree.find_node("X")

456

print(f"Node X found: {node_x is not None}")

457

458

# Search in both leaves and internal nodes

459

all_matches = tree.find_node("A", leaves=True, internal=True)

460

461

# Get mapping of labels to nodes

462

leaf_map = tree.label_to_node('leaves')

463

print(f"Leaf labels: {list(leaf_map.keys())}")

464

465

# Get mapping for all nodes

466

all_map = tree.label_to_node('all')

467

print(f"All labeled nodes: {len(all_map)}")

468

469

# Get mapping for specific labels only

470

specific_map = tree.label_to_node({'A', 'B'})

471

print(f"Specific nodes: {list(specific_map.keys())}")

472

473

# Iterate over all labels

474

all_labels = list(tree.labels())

475

print(f"All labels: {all_labels}")

476

477

# Get only leaf labels

478

leaf_labels = list(tree.labels(internal=False))

479

print(f"Leaf labels only: {leaf_labels}")

480

```

481

482

### MRCA Analysis

483

484

Find most recent common ancestors and analyze relationships.

485

486

```python { .api }

487

def mrca(self, labels: set) -> Node:

488

"""

489

Find most recent common ancestor of nodes with specified labels.

490

491

Parameters:

492

- labels (set): Set of node labels to find MRCA for

493

494

Returns:

495

- Node: Most recent common ancestor node

496

"""

497

498

def mrca_matrix(self) -> dict:

499

"""

500

Compute matrix of all pairwise MRCAs.

501

502

Returns:

503

- dict: 2D dictionary storing all pairwise MRCA relationships

504

"""

505

```

506

507

Usage examples:

508

509

```python

510

import treeswift

511

512

tree = treeswift.read_tree_newick("(((A,B),C),((D,E),F));")

513

514

# Find MRCA of specific taxa

515

mrca_ab = tree.mrca({"A", "B"})

516

mrca_abc = tree.mrca({"A", "B", "C"})

517

mrca_all = tree.mrca({"A", "B", "D", "E"})

518

519

print(f"MRCA of A,B has {mrca_ab.num_children()} children")

520

print(f"MRCA of A,B,C has {mrca_abc.num_children()} children")

521

print(f"MRCA of A,B,D,E is root: {mrca_all == tree.root}")

522

523

# Full MRCA matrix

524

mrca_matrix = tree.mrca_matrix()

525

leaves = ["A", "B", "C", "D", "E", "F"]

526

print("MRCA relationships (showing number of descendants):")

527

for i, leaf1 in enumerate(leaves):

528

for leaf2 in leaves[i+1:]:

529

# Find nodes with these labels

530

node1 = tree.find_node(leaf1)

531

node2 = tree.find_node(leaf2)

532

if node1 and node2 and node1 in mrca_matrix and node2 in mrca_matrix[node1]:

533

mrca_node = mrca_matrix[node1][node2]

534

desc_count = mrca_node.num_nodes()

535

print(f" MRCA({leaf1},{leaf2}): {desc_count} descendants")

536

```