0
# Tree Analysis and Metrics
1
2
TreeSwift provides comprehensive tree analysis including distance calculations, balance indices, phylogenetic statistics, and coalescence analysis. These methods support both basic tree metrics and advanced phylogenetic measures used in evolutionary biology and comparative studies.
3
4
## Capabilities
5
6
### Distance Calculations
7
8
Compute pairwise distances and tree-wide distance metrics.
9
10
```python { .api }
11
def distance_between(self, u: Node, v: Node) -> float:
12
"""
13
Calculate distance between two nodes.
14
15
Parameters:
16
- u (Node): First node
17
- v (Node): Second node
18
19
Returns:
20
- float: Distance between nodes u and v
21
"""
22
23
def distance_matrix(self, leaf_labels: bool = False) -> dict:
24
"""
25
Compute pairwise distance matrix of all leaves.
26
27
Parameters:
28
- leaf_labels (bool): Use leaf labels as keys instead of Node objects
29
30
Returns:
31
- dict: 2D dictionary with distances between all leaf pairs
32
"""
33
34
def distances_from_root(self, leaves: bool = True, internal: bool = True, unlabeled: bool = False, weighted: bool = True) -> Generator[tuple[Node, float], None, None]:
35
"""
36
Generate distances from root to selected nodes.
37
38
Parameters:
39
- leaves (bool): Include leaf nodes
40
- internal (bool): Include internal nodes
41
- unlabeled (bool): Include nodes without labels
42
- weighted (bool): Use edge lengths (False for node counts)
43
44
Yields:
45
- tuple[Node, float]: (node, distance_from_root) pairs
46
"""
47
48
def distances_from_parent(self, leaves: bool = True, internal: bool = True, unlabeled: bool = False) -> Generator[tuple[Node, float], None, None]:
49
"""
50
Generate distances from each node to its parent.
51
52
Parameters:
53
- leaves (bool): Include leaf nodes
54
- internal (bool): Include internal nodes
55
- unlabeled (bool): Include nodes without labels
56
57
Yields:
58
- tuple[Node, float]: (node, distance_to_parent) pairs
59
"""
60
```
61
62
Usage examples:
63
64
```python
65
import treeswift
66
67
tree = treeswift.read_tree_newick("((A:0.1,B:0.2):0.3,(C:0.4,D:0.5):0.6);")
68
69
# Distance between specific nodes
70
nodeA = None
71
nodeB = None
72
for node in tree.traverse_leaves():
73
if node.get_label() == "A":
74
nodeA = node
75
elif node.get_label() == "B":
76
nodeB = node
77
78
if nodeA and nodeB:
79
dist = tree.distance_between(nodeA, nodeB)
80
print(f"Distance A-B: {dist}")
81
82
# Complete distance matrix
83
dist_matrix = tree.distance_matrix(leaf_labels=True)
84
print("Distance matrix:")
85
for taxon1 in ["A", "B", "C", "D"]:
86
for taxon2 in ["A", "B", "C", "D"]:
87
if taxon1 in dist_matrix and taxon2 in dist_matrix[taxon1]:
88
print(f" {taxon1}-{taxon2}: {dist_matrix[taxon1][taxon2]:.3f}")
89
90
# Distances from root
91
print("Distances from root:")
92
for node, distance in tree.distances_from_root():
93
label = node.get_label() or "internal"
94
print(f" {label}: {distance:.3f}")
95
```
96
97
### Tree Dimensions and Extremes
98
99
Calculate tree height, diameter, and find extreme nodes.
100
101
```python { .api }
102
def height(self, weighted: bool = True) -> float:
103
"""
104
Compute tree height (maximum distance from root to leaf).
105
106
Parameters:
107
- weighted (bool): Use edge lengths (False for node counts)
108
109
Returns:
110
- float: Maximum distance from root to any leaf
111
"""
112
113
def diameter(self) -> float:
114
"""
115
Compute tree diameter (maximum pairwise leaf distance).
116
117
Returns:
118
- float: Maximum distance between any two leaves
119
"""
120
121
def closest_leaf_to_root(self) -> tuple[Node, float]:
122
"""
123
Find leaf closest to root.
124
125
Returns:
126
- tuple[Node, float]: (closest_leaf, distance) pair
127
"""
128
129
def furthest_from_root(self) -> tuple[Node, float]:
130
"""
131
Find node furthest from root.
132
133
Returns:
134
- tuple[Node, float]: (furthest_node, distance) pair
135
"""
136
```
137
138
Usage examples:
139
140
```python
141
import treeswift
142
143
tree = treeswift.read_tree_newick("((A:0.1,B:0.4):0.2,(C:0.3,D:0.1):0.5);")
144
145
# Basic tree dimensions
146
print(f"Tree height: {tree.height():.3f}")
147
print(f"Tree diameter: {tree.diameter():.3f}")
148
149
# Find extreme nodes
150
closest_leaf, closest_dist = tree.closest_leaf_to_root()
151
print(f"Closest leaf to root: {closest_leaf.get_label()} (distance: {closest_dist:.3f})")
152
153
furthest_node, furthest_dist = tree.furthest_from_root()
154
furthest_label = furthest_node.get_label() or "internal"
155
print(f"Furthest node from root: {furthest_label} (distance: {furthest_dist:.3f})")
156
157
# Compare weighted vs unweighted height
158
print(f"Weighted height: {tree.height(weighted=True):.3f}")
159
print(f"Unweighted height (node count): {tree.height(weighted=False)}")
160
```
161
162
### Tree Balance Indices
163
164
Quantify tree balance using standard phylogenetic indices.
165
166
```python { .api }
167
def colless(self, normalize: str = 'leaves') -> float:
168
"""
169
Compute Colless balance index.
170
171
Parameters:
172
- normalize (str): Normalization method ('leaves', 'yule', 'pda', or None)
173
174
Returns:
175
- float: Colless index (lower values = more balanced)
176
"""
177
178
def sackin(self, normalize: str = 'leaves') -> float:
179
"""
180
Compute Sackin balance index.
181
182
Parameters:
183
- normalize (str): Normalization method ('leaves', 'yule', 'pda', or None)
184
185
Returns:
186
- float: Sackin index (lower values = more balanced)
187
"""
188
```
189
190
Usage examples:
191
192
```python
193
import treeswift
194
195
# Compare balanced vs unbalanced trees
196
balanced_tree = treeswift.read_tree_newick("((A,B),(C,D));")
197
unbalanced_tree = treeswift.read_tree_newick("(((A,B),C),D);")
198
199
print("Balanced tree:")
200
print(f" Colless: {balanced_tree.colless():.3f}")
201
print(f" Sackin: {balanced_tree.sackin():.3f}")
202
203
print("Unbalanced tree:")
204
print(f" Colless: {unbalanced_tree.colless():.3f}")
205
print(f" Sackin: {unbalanced_tree.sackin():.3f}")
206
207
# Different normalization methods
208
tree = treeswift.read_tree_newick("(((A,B),C),(D,E));")
209
print("Normalization methods:")
210
for norm in [None, 'leaves', 'yule', 'pda']:
211
colless_val = tree.colless(normalize=norm)
212
print(f" Colless ({norm}): {colless_val:.3f}")
213
```
214
215
### Phylogenetic Statistics
216
217
Calculate statistics specific to phylogenetic trees.
218
219
```python { .api }
220
def gamma_statistic(self) -> float:
221
"""
222
Compute Gamma statistic of Pybus and Harvey (2000).
223
224
Returns:
225
- float: Gamma statistic (negative = early branching, positive = late branching)
226
"""
227
228
def treeness(self) -> float:
229
"""
230
Compute treeness (proportion of total tree length in internal branches).
231
232
Returns:
233
- float: Ratio of internal branch length sum to total branch length sum
234
"""
235
236
def num_cherries(self) -> int:
237
"""
238
Count cherries (internal nodes with only leaf children).
239
240
Returns:
241
- int: Number of cherries in the tree
242
"""
243
```
244
245
Usage examples:
246
247
```python
248
import treeswift
249
250
# Phylogenetic statistics
251
tree = treeswift.read_tree_newick("((A:0.1,B:0.1):0.5,(C:0.2,D:0.2):0.3);")
252
253
gamma = tree.gamma_statistic()
254
print(f"Gamma statistic: {gamma:.3f}")
255
if gamma < 0:
256
print(" Early diversification pattern")
257
elif gamma > 0:
258
print(" Late diversification pattern")
259
else:
260
print(" Constant rate diversification")
261
262
treeness = tree.treeness()
263
print(f"Treeness: {treeness:.3f} ({treeness*100:.1f}% internal branches)")
264
265
cherries = tree.num_cherries()
266
print(f"Number of cherries: {cherries}")
267
```
268
269
### Coalescence Analysis
270
271
Analyze coalescence patterns and lineage dynamics.
272
273
```python { .api }
274
def coalescence_times(self, backward: bool = True) -> Generator[float, None, None]:
275
"""
276
Generate coalescence event times.
277
278
Parameters:
279
- backward (bool): Times going backward from present (True) or forward from root (False)
280
281
Yields:
282
- float: Times of successive coalescence events
283
"""
284
285
def coalescence_waiting_times(self, backward: bool = True) -> Generator[float, None, None]:
286
"""
287
Generate waiting times between coalescence events.
288
289
Parameters:
290
- backward (bool): Going backward from present (True) or forward from root (False)
291
292
Yields:
293
- float: Waiting times between successive coalescence events
294
"""
295
296
def num_lineages_at(self, distance: float) -> int:
297
"""
298
Count lineages at specified distance from root.
299
300
Parameters:
301
- distance (float): Distance from root
302
303
Returns:
304
- int: Number of lineages existing at given distance
305
"""
306
```
307
308
Usage examples:
309
310
```python
311
import treeswift
312
313
tree = treeswift.read_tree_newick("((A:0.1,B:0.1):0.3,(C:0.2,D:0.2):0.2);")
314
315
# Coalescence times
316
print("Coalescence times (backward from present):")
317
for i, time in enumerate(tree.coalescence_times(backward=True)):
318
print(f" Event {i+1}: {time:.3f}")
319
320
# Waiting times between events
321
print("Waiting times between coalescence events:")
322
for i, wait_time in enumerate(tree.coalescence_waiting_times()):
323
print(f" Interval {i+1}: {wait_time:.3f}")
324
325
# Lineage count through time
326
print("Lineages at different distances from root:")
327
for dist in [0.0, 0.1, 0.2, 0.3, 0.4]:
328
count = tree.num_lineages_at(dist)
329
print(f" Distance {dist}: {count} lineages")
330
```
331
332
### Branch Length Analysis
333
334
Analyze patterns in branch lengths across the tree.
335
336
```python { .api }
337
def avg_branch_length(self, terminal: bool = True, internal: bool = True) -> float:
338
"""
339
Compute average length of selected branches.
340
341
Parameters:
342
- terminal (bool): Include terminal branches
343
- internal (bool): Include internal branches
344
345
Returns:
346
- float: Average branch length
347
"""
348
349
def branch_lengths(self, terminal: bool = True, internal: bool = True) -> Generator[float, None, None]:
350
"""
351
Generate branch lengths of selected branches.
352
353
Parameters:
354
- terminal (bool): Include terminal branches
355
- internal (bool): Include internal branches
356
357
Yields:
358
- float: Branch lengths (None edges yield 0)
359
"""
360
361
def edge_length_sum(self, terminal: bool = True, internal: bool = True) -> float:
362
"""
363
Sum all selected edge lengths.
364
365
Parameters:
366
- terminal (bool): Include terminal branches
367
- internal (bool): Include internal branches
368
369
Returns:
370
- float: Total length of selected branches
371
"""
372
```
373
374
Usage examples:
375
376
```python
377
import treeswift
378
379
tree = treeswift.read_tree_newick("((A:0.1,B:0.3):0.2,(C:0.4,D:0.1):0.5);")
380
381
# Branch length statistics
382
total_length = tree.edge_length_sum()
383
avg_length = tree.avg_branch_length()
384
print(f"Total tree length: {total_length:.3f}")
385
print(f"Average branch length: {avg_length:.3f}")
386
387
# Separate terminal vs internal branches
388
term_avg = tree.avg_branch_length(internal=False)
389
int_avg = tree.avg_branch_length(terminal=False)
390
print(f"Average terminal branch: {term_avg:.3f}")
391
print(f"Average internal branch: {int_avg:.3f}")
392
393
# Collect all branch lengths
394
all_lengths = list(tree.branch_lengths())
395
print(f"All branch lengths: {[round(x, 3) for x in all_lengths]}")
396
print(f"Range: {min(all_lengths):.3f} - {max(all_lengths):.3f}")
397
```
398
399
### Node Search and Utility Methods
400
401
Find nodes by labels and work with node collections in the tree.
402
403
```python { .api }
404
def find_node(self, label: object, leaves: bool = True, internal: bool = False) -> Node | list[Node] | None:
405
"""
406
Find node(s) with specified label.
407
408
Parameters:
409
- label (object): Label to search for
410
- leaves (bool): Include leaf nodes in search
411
- internal (bool): Include internal nodes in search
412
413
Returns:
414
- Node: Single node if only one found
415
- list[Node]: List of nodes if multiple found
416
- None: If no nodes found
417
"""
418
419
def label_to_node(self, selection: str | set = 'leaves') -> dict:
420
"""
421
Return dictionary mapping labels to Node objects.
422
423
Parameters:
424
- selection (str | set): Node selection - 'leaves', 'internal', 'all', or set of labels
425
426
Returns:
427
- dict: Dictionary mapping labels to Node objects
428
"""
429
430
def labels(self, leaves: bool = True, internal: bool = True) -> Generator[object, None, None]:
431
"""
432
Generate non-None node labels.
433
434
Parameters:
435
- leaves (bool): Include leaf node labels
436
- internal (bool): Include internal node labels
437
438
Yields:
439
- object: Node labels (non-None only)
440
"""
441
```
442
443
Usage examples:
444
445
```python
446
import treeswift
447
448
tree = treeswift.read_tree_newick("((A:0.1,B:0.2):0.3,(C:0.4,D:0.5):0.6);")
449
450
# Find specific nodes
451
node_a = tree.find_node("A")
452
print(f"Found node A: {node_a.get_label()}")
453
454
# Find nodes that might not exist
455
node_x = tree.find_node("X")
456
print(f"Node X found: {node_x is not None}")
457
458
# Search in both leaves and internal nodes
459
all_matches = tree.find_node("A", leaves=True, internal=True)
460
461
# Get mapping of labels to nodes
462
leaf_map = tree.label_to_node('leaves')
463
print(f"Leaf labels: {list(leaf_map.keys())}")
464
465
# Get mapping for all nodes
466
all_map = tree.label_to_node('all')
467
print(f"All labeled nodes: {len(all_map)}")
468
469
# Get mapping for specific labels only
470
specific_map = tree.label_to_node({'A', 'B'})
471
print(f"Specific nodes: {list(specific_map.keys())}")
472
473
# Iterate over all labels
474
all_labels = list(tree.labels())
475
print(f"All labels: {all_labels}")
476
477
# Get only leaf labels
478
leaf_labels = list(tree.labels(internal=False))
479
print(f"Leaf labels only: {leaf_labels}")
480
```
481
482
### MRCA Analysis
483
484
Find most recent common ancestors and analyze relationships.
485
486
```python { .api }
487
def mrca(self, labels: set) -> Node:
488
"""
489
Find most recent common ancestor of nodes with specified labels.
490
491
Parameters:
492
- labels (set): Set of node labels to find MRCA for
493
494
Returns:
495
- Node: Most recent common ancestor node
496
"""
497
498
def mrca_matrix(self) -> dict:
499
"""
500
Compute matrix of all pairwise MRCAs.
501
502
Returns:
503
- dict: 2D dictionary storing all pairwise MRCA relationships
504
"""
505
```
506
507
Usage examples:
508
509
```python
510
import treeswift
511
512
tree = treeswift.read_tree_newick("(((A,B),C),((D,E),F));")
513
514
# Find MRCA of specific taxa
515
mrca_ab = tree.mrca({"A", "B"})
516
mrca_abc = tree.mrca({"A", "B", "C"})
517
mrca_all = tree.mrca({"A", "B", "D", "E"})
518
519
print(f"MRCA of A,B has {mrca_ab.num_children()} children")
520
print(f"MRCA of A,B,C has {mrca_abc.num_children()} children")
521
print(f"MRCA of A,B,D,E is root: {mrca_all == tree.root}")
522
523
# Full MRCA matrix
524
mrca_matrix = tree.mrca_matrix()
525
leaves = ["A", "B", "C", "D", "E", "F"]
526
print("MRCA relationships (showing number of descendants):")
527
for i, leaf1 in enumerate(leaves):
528
for leaf2 in leaves[i+1:]:
529
# Find nodes with these labels
530
node1 = tree.find_node(leaf1)
531
node2 = tree.find_node(leaf2)
532
if node1 and node2 and node1 in mrca_matrix and node2 in mrca_matrix[node1]:
533
mrca_node = mrca_matrix[node1][node2]
534
desc_count = mrca_node.num_nodes()
535
print(f" MRCA({leaf1},{leaf2}): {desc_count} descendants")
536
```