TreeSwift: Fast tree module for Python 2 and 3 - A Python library for parsing, manipulating, and iterating over rooted tree structures with emphasis on performance and speed.
TreeSwift provides comprehensive tree analysis including distance calculations, balance indices, phylogenetic statistics, and coalescence analysis. These methods support both basic tree metrics and advanced phylogenetic measures used in evolutionary biology and comparative studies.
Compute pairwise distances and tree-wide distance metrics.
def distance_between(self, u: Node, v: Node) -> float:
"""
Calculate distance between two nodes.
Parameters:
- u (Node): First node
- v (Node): Second node
Returns:
- float: Distance between nodes u and v
"""
def distance_matrix(self, leaf_labels: bool = False) -> dict:
"""
Compute pairwise distance matrix of all leaves.
Parameters:
- leaf_labels (bool): Use leaf labels as keys instead of Node objects
Returns:
- dict: 2D dictionary with distances between all leaf pairs
"""
def distances_from_root(self, leaves: bool = True, internal: bool = True, unlabeled: bool = False, weighted: bool = True) -> Generator[tuple[Node, float], None, None]:
"""
Generate distances from root to selected nodes.
Parameters:
- leaves (bool): Include leaf nodes
- internal (bool): Include internal nodes
- unlabeled (bool): Include nodes without labels
- weighted (bool): Use edge lengths (False for node counts)
Yields:
- tuple[Node, float]: (node, distance_from_root) pairs
"""
def distances_from_parent(self, leaves: bool = True, internal: bool = True, unlabeled: bool = False) -> Generator[tuple[Node, float], None, None]:
"""
Generate distances from each node to its parent.
Parameters:
- leaves (bool): Include leaf nodes
- internal (bool): Include internal nodes
- unlabeled (bool): Include nodes without labels
Yields:
- tuple[Node, float]: (node, distance_to_parent) pairs
"""Usage examples:
import treeswift
tree = treeswift.read_tree_newick("((A:0.1,B:0.2):0.3,(C:0.4,D:0.5):0.6);")
# Distance between specific nodes
nodeA = None
nodeB = None
for node in tree.traverse_leaves():
if node.get_label() == "A":
nodeA = node
elif node.get_label() == "B":
nodeB = node
if nodeA and nodeB:
dist = tree.distance_between(nodeA, nodeB)
print(f"Distance A-B: {dist}")
# Complete distance matrix
dist_matrix = tree.distance_matrix(leaf_labels=True)
print("Distance matrix:")
for taxon1 in ["A", "B", "C", "D"]:
for taxon2 in ["A", "B", "C", "D"]:
if taxon1 in dist_matrix and taxon2 in dist_matrix[taxon1]:
print(f" {taxon1}-{taxon2}: {dist_matrix[taxon1][taxon2]:.3f}")
# Distances from root
print("Distances from root:")
for node, distance in tree.distances_from_root():
label = node.get_label() or "internal"
print(f" {label}: {distance:.3f}")Calculate tree height, diameter, and find extreme nodes.
def height(self, weighted: bool = True) -> float:
"""
Compute tree height (maximum distance from root to leaf).
Parameters:
- weighted (bool): Use edge lengths (False for node counts)
Returns:
- float: Maximum distance from root to any leaf
"""
def diameter(self) -> float:
"""
Compute tree diameter (maximum pairwise leaf distance).
Returns:
- float: Maximum distance between any two leaves
"""
def closest_leaf_to_root(self) -> tuple[Node, float]:
"""
Find leaf closest to root.
Returns:
- tuple[Node, float]: (closest_leaf, distance) pair
"""
def furthest_from_root(self) -> tuple[Node, float]:
"""
Find node furthest from root.
Returns:
- tuple[Node, float]: (furthest_node, distance) pair
"""Usage examples:
import treeswift
tree = treeswift.read_tree_newick("((A:0.1,B:0.4):0.2,(C:0.3,D:0.1):0.5);")
# Basic tree dimensions
print(f"Tree height: {tree.height():.3f}")
print(f"Tree diameter: {tree.diameter():.3f}")
# Find extreme nodes
closest_leaf, closest_dist = tree.closest_leaf_to_root()
print(f"Closest leaf to root: {closest_leaf.get_label()} (distance: {closest_dist:.3f})")
furthest_node, furthest_dist = tree.furthest_from_root()
furthest_label = furthest_node.get_label() or "internal"
print(f"Furthest node from root: {furthest_label} (distance: {furthest_dist:.3f})")
# Compare weighted vs unweighted height
print(f"Weighted height: {tree.height(weighted=True):.3f}")
print(f"Unweighted height (node count): {tree.height(weighted=False)}")Quantify tree balance using standard phylogenetic indices.
def colless(self, normalize: str = 'leaves') -> float:
"""
Compute Colless balance index.
Parameters:
- normalize (str): Normalization method ('leaves', 'yule', 'pda', or None)
Returns:
- float: Colless index (lower values = more balanced)
"""
def sackin(self, normalize: str = 'leaves') -> float:
"""
Compute Sackin balance index.
Parameters:
- normalize (str): Normalization method ('leaves', 'yule', 'pda', or None)
Returns:
- float: Sackin index (lower values = more balanced)
"""Usage examples:
import treeswift
# Compare balanced vs unbalanced trees
balanced_tree = treeswift.read_tree_newick("((A,B),(C,D));")
unbalanced_tree = treeswift.read_tree_newick("(((A,B),C),D);")
print("Balanced tree:")
print(f" Colless: {balanced_tree.colless():.3f}")
print(f" Sackin: {balanced_tree.sackin():.3f}")
print("Unbalanced tree:")
print(f" Colless: {unbalanced_tree.colless():.3f}")
print(f" Sackin: {unbalanced_tree.sackin():.3f}")
# Different normalization methods
tree = treeswift.read_tree_newick("(((A,B),C),(D,E));")
print("Normalization methods:")
for norm in [None, 'leaves', 'yule', 'pda']:
colless_val = tree.colless(normalize=norm)
print(f" Colless ({norm}): {colless_val:.3f}")Calculate statistics specific to phylogenetic trees.
def gamma_statistic(self) -> float:
"""
Compute Gamma statistic of Pybus and Harvey (2000).
Returns:
- float: Gamma statistic (negative = early branching, positive = late branching)
"""
def treeness(self) -> float:
"""
Compute treeness (proportion of total tree length in internal branches).
Returns:
- float: Ratio of internal branch length sum to total branch length sum
"""
def num_cherries(self) -> int:
"""
Count cherries (internal nodes with only leaf children).
Returns:
- int: Number of cherries in the tree
"""Usage examples:
import treeswift
# Phylogenetic statistics
tree = treeswift.read_tree_newick("((A:0.1,B:0.1):0.5,(C:0.2,D:0.2):0.3);")
gamma = tree.gamma_statistic()
print(f"Gamma statistic: {gamma:.3f}")
if gamma < 0:
print(" Early diversification pattern")
elif gamma > 0:
print(" Late diversification pattern")
else:
print(" Constant rate diversification")
treeness = tree.treeness()
print(f"Treeness: {treeness:.3f} ({treeness*100:.1f}% internal branches)")
cherries = tree.num_cherries()
print(f"Number of cherries: {cherries}")Analyze coalescence patterns and lineage dynamics.
def coalescence_times(self, backward: bool = True) -> Generator[float, None, None]:
"""
Generate coalescence event times.
Parameters:
- backward (bool): Times going backward from present (True) or forward from root (False)
Yields:
- float: Times of successive coalescence events
"""
def coalescence_waiting_times(self, backward: bool = True) -> Generator[float, None, None]:
"""
Generate waiting times between coalescence events.
Parameters:
- backward (bool): Going backward from present (True) or forward from root (False)
Yields:
- float: Waiting times between successive coalescence events
"""
def num_lineages_at(self, distance: float) -> int:
"""
Count lineages at specified distance from root.
Parameters:
- distance (float): Distance from root
Returns:
- int: Number of lineages existing at given distance
"""Usage examples:
import treeswift
tree = treeswift.read_tree_newick("((A:0.1,B:0.1):0.3,(C:0.2,D:0.2):0.2);")
# Coalescence times
print("Coalescence times (backward from present):")
for i, time in enumerate(tree.coalescence_times(backward=True)):
print(f" Event {i+1}: {time:.3f}")
# Waiting times between events
print("Waiting times between coalescence events:")
for i, wait_time in enumerate(tree.coalescence_waiting_times()):
print(f" Interval {i+1}: {wait_time:.3f}")
# Lineage count through time
print("Lineages at different distances from root:")
for dist in [0.0, 0.1, 0.2, 0.3, 0.4]:
count = tree.num_lineages_at(dist)
print(f" Distance {dist}: {count} lineages")Analyze patterns in branch lengths across the tree.
def avg_branch_length(self, terminal: bool = True, internal: bool = True) -> float:
"""
Compute average length of selected branches.
Parameters:
- terminal (bool): Include terminal branches
- internal (bool): Include internal branches
Returns:
- float: Average branch length
"""
def branch_lengths(self, terminal: bool = True, internal: bool = True) -> Generator[float, None, None]:
"""
Generate branch lengths of selected branches.
Parameters:
- terminal (bool): Include terminal branches
- internal (bool): Include internal branches
Yields:
- float: Branch lengths (None edges yield 0)
"""
def edge_length_sum(self, terminal: bool = True, internal: bool = True) -> float:
"""
Sum all selected edge lengths.
Parameters:
- terminal (bool): Include terminal branches
- internal (bool): Include internal branches
Returns:
- float: Total length of selected branches
"""Usage examples:
import treeswift
tree = treeswift.read_tree_newick("((A:0.1,B:0.3):0.2,(C:0.4,D:0.1):0.5);")
# Branch length statistics
total_length = tree.edge_length_sum()
avg_length = tree.avg_branch_length()
print(f"Total tree length: {total_length:.3f}")
print(f"Average branch length: {avg_length:.3f}")
# Separate terminal vs internal branches
term_avg = tree.avg_branch_length(internal=False)
int_avg = tree.avg_branch_length(terminal=False)
print(f"Average terminal branch: {term_avg:.3f}")
print(f"Average internal branch: {int_avg:.3f}")
# Collect all branch lengths
all_lengths = list(tree.branch_lengths())
print(f"All branch lengths: {[round(x, 3) for x in all_lengths]}")
print(f"Range: {min(all_lengths):.3f} - {max(all_lengths):.3f}")Find nodes by labels and work with node collections in the tree.
def find_node(self, label: object, leaves: bool = True, internal: bool = False) -> Node | list[Node] | None:
"""
Find node(s) with specified label.
Parameters:
- label (object): Label to search for
- leaves (bool): Include leaf nodes in search
- internal (bool): Include internal nodes in search
Returns:
- Node: Single node if only one found
- list[Node]: List of nodes if multiple found
- None: If no nodes found
"""
def label_to_node(self, selection: str | set = 'leaves') -> dict:
"""
Return dictionary mapping labels to Node objects.
Parameters:
- selection (str | set): Node selection - 'leaves', 'internal', 'all', or set of labels
Returns:
- dict: Dictionary mapping labels to Node objects
"""
def labels(self, leaves: bool = True, internal: bool = True) -> Generator[object, None, None]:
"""
Generate non-None node labels.
Parameters:
- leaves (bool): Include leaf node labels
- internal (bool): Include internal node labels
Yields:
- object: Node labels (non-None only)
"""Usage examples:
import treeswift
tree = treeswift.read_tree_newick("((A:0.1,B:0.2):0.3,(C:0.4,D:0.5):0.6);")
# Find specific nodes
node_a = tree.find_node("A")
print(f"Found node A: {node_a.get_label()}")
# Find nodes that might not exist
node_x = tree.find_node("X")
print(f"Node X found: {node_x is not None}")
# Search in both leaves and internal nodes
all_matches = tree.find_node("A", leaves=True, internal=True)
# Get mapping of labels to nodes
leaf_map = tree.label_to_node('leaves')
print(f"Leaf labels: {list(leaf_map.keys())}")
# Get mapping for all nodes
all_map = tree.label_to_node('all')
print(f"All labeled nodes: {len(all_map)}")
# Get mapping for specific labels only
specific_map = tree.label_to_node({'A', 'B'})
print(f"Specific nodes: {list(specific_map.keys())}")
# Iterate over all labels
all_labels = list(tree.labels())
print(f"All labels: {all_labels}")
# Get only leaf labels
leaf_labels = list(tree.labels(internal=False))
print(f"Leaf labels only: {leaf_labels}")Find most recent common ancestors and analyze relationships.
def mrca(self, labels: set) -> Node:
"""
Find most recent common ancestor of nodes with specified labels.
Parameters:
- labels (set): Set of node labels to find MRCA for
Returns:
- Node: Most recent common ancestor node
"""
def mrca_matrix(self) -> dict:
"""
Compute matrix of all pairwise MRCAs.
Returns:
- dict: 2D dictionary storing all pairwise MRCA relationships
"""Usage examples:
import treeswift
tree = treeswift.read_tree_newick("(((A,B),C),((D,E),F));")
# Find MRCA of specific taxa
mrca_ab = tree.mrca({"A", "B"})
mrca_abc = tree.mrca({"A", "B", "C"})
mrca_all = tree.mrca({"A", "B", "D", "E"})
print(f"MRCA of A,B has {mrca_ab.num_children()} children")
print(f"MRCA of A,B,C has {mrca_abc.num_children()} children")
print(f"MRCA of A,B,D,E is root: {mrca_all == tree.root}")
# Full MRCA matrix
mrca_matrix = tree.mrca_matrix()
leaves = ["A", "B", "C", "D", "E", "F"]
print("MRCA relationships (showing number of descendants):")
for i, leaf1 in enumerate(leaves):
for leaf2 in leaves[i+1:]:
# Find nodes with these labels
node1 = tree.find_node(leaf1)
node2 = tree.find_node(leaf2)
if node1 and node2 and node1 in mrca_matrix and node2 in mrca_matrix[node1]:
mrca_node = mrca_matrix[node1][node2]
desc_count = mrca_node.num_nodes()
print(f" MRCA({leaf1},{leaf2}): {desc_count} descendants")Install with Tessl CLI
npx tessl i tessl/pypi-treeswift