CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-treeswift

TreeSwift: Fast tree module for Python 2 and 3 - A Python library for parsing, manipulating, and iterating over rooted tree structures with emphasis on performance and speed.

Overview
Eval results
Files

tree-analysis.mddocs/

Tree Analysis and Metrics

TreeSwift provides comprehensive tree analysis including distance calculations, balance indices, phylogenetic statistics, and coalescence analysis. These methods support both basic tree metrics and advanced phylogenetic measures used in evolutionary biology and comparative studies.

Capabilities

Distance Calculations

Compute pairwise distances and tree-wide distance metrics.

def distance_between(self, u: Node, v: Node) -> float:
    """
    Calculate distance between two nodes.

    Parameters:
    - u (Node): First node
    - v (Node): Second node

    Returns:
    - float: Distance between nodes u and v
    """

def distance_matrix(self, leaf_labels: bool = False) -> dict:
    """
    Compute pairwise distance matrix of all leaves.

    Parameters:
    - leaf_labels (bool): Use leaf labels as keys instead of Node objects

    Returns:
    - dict: 2D dictionary with distances between all leaf pairs
    """

def distances_from_root(self, leaves: bool = True, internal: bool = True, unlabeled: bool = False, weighted: bool = True) -> Generator[tuple[Node, float], None, None]:
    """
    Generate distances from root to selected nodes.

    Parameters:
    - leaves (bool): Include leaf nodes
    - internal (bool): Include internal nodes
    - unlabeled (bool): Include nodes without labels
    - weighted (bool): Use edge lengths (False for node counts)

    Yields:
    - tuple[Node, float]: (node, distance_from_root) pairs
    """

def distances_from_parent(self, leaves: bool = True, internal: bool = True, unlabeled: bool = False) -> Generator[tuple[Node, float], None, None]:
    """
    Generate distances from each node to its parent.

    Parameters:
    - leaves (bool): Include leaf nodes
    - internal (bool): Include internal nodes
    - unlabeled (bool): Include nodes without labels

    Yields:
    - tuple[Node, float]: (node, distance_to_parent) pairs
    """

Usage examples:

import treeswift

tree = treeswift.read_tree_newick("((A:0.1,B:0.2):0.3,(C:0.4,D:0.5):0.6);")

# Distance between specific nodes
nodeA = None
nodeB = None
for node in tree.traverse_leaves():
    if node.get_label() == "A":
        nodeA = node
    elif node.get_label() == "B":
        nodeB = node

if nodeA and nodeB:
    dist = tree.distance_between(nodeA, nodeB)
    print(f"Distance A-B: {dist}")

# Complete distance matrix
dist_matrix = tree.distance_matrix(leaf_labels=True)
print("Distance matrix:")
for taxon1 in ["A", "B", "C", "D"]:
    for taxon2 in ["A", "B", "C", "D"]:
        if taxon1 in dist_matrix and taxon2 in dist_matrix[taxon1]:
            print(f"  {taxon1}-{taxon2}: {dist_matrix[taxon1][taxon2]:.3f}")

# Distances from root
print("Distances from root:")
for node, distance in tree.distances_from_root():
    label = node.get_label() or "internal"
    print(f"  {label}: {distance:.3f}")

Tree Dimensions and Extremes

Calculate tree height, diameter, and find extreme nodes.

def height(self, weighted: bool = True) -> float:
    """
    Compute tree height (maximum distance from root to leaf).

    Parameters:
    - weighted (bool): Use edge lengths (False for node counts)

    Returns:
    - float: Maximum distance from root to any leaf
    """

def diameter(self) -> float:
    """
    Compute tree diameter (maximum pairwise leaf distance).

    Returns:
    - float: Maximum distance between any two leaves
    """

def closest_leaf_to_root(self) -> tuple[Node, float]:
    """
    Find leaf closest to root.

    Returns:
    - tuple[Node, float]: (closest_leaf, distance) pair
    """

def furthest_from_root(self) -> tuple[Node, float]:
    """
    Find node furthest from root.

    Returns:
    - tuple[Node, float]: (furthest_node, distance) pair
    """

Usage examples:

import treeswift

tree = treeswift.read_tree_newick("((A:0.1,B:0.4):0.2,(C:0.3,D:0.1):0.5);")

# Basic tree dimensions
print(f"Tree height: {tree.height():.3f}")
print(f"Tree diameter: {tree.diameter():.3f}")

# Find extreme nodes
closest_leaf, closest_dist = tree.closest_leaf_to_root()
print(f"Closest leaf to root: {closest_leaf.get_label()} (distance: {closest_dist:.3f})")

furthest_node, furthest_dist = tree.furthest_from_root()
furthest_label = furthest_node.get_label() or "internal"
print(f"Furthest node from root: {furthest_label} (distance: {furthest_dist:.3f})")

# Compare weighted vs unweighted height
print(f"Weighted height: {tree.height(weighted=True):.3f}")
print(f"Unweighted height (node count): {tree.height(weighted=False)}")

Tree Balance Indices

Quantify tree balance using standard phylogenetic indices.

def colless(self, normalize: str = 'leaves') -> float:
    """
    Compute Colless balance index.

    Parameters:
    - normalize (str): Normalization method ('leaves', 'yule', 'pda', or None)

    Returns:
    - float: Colless index (lower values = more balanced)
    """

def sackin(self, normalize: str = 'leaves') -> float:
    """
    Compute Sackin balance index.

    Parameters:
    - normalize (str): Normalization method ('leaves', 'yule', 'pda', or None)

    Returns:
    - float: Sackin index (lower values = more balanced)
    """

Usage examples:

import treeswift

# Compare balanced vs unbalanced trees
balanced_tree = treeswift.read_tree_newick("((A,B),(C,D));")
unbalanced_tree = treeswift.read_tree_newick("(((A,B),C),D);")

print("Balanced tree:")
print(f"  Colless: {balanced_tree.colless():.3f}")
print(f"  Sackin: {balanced_tree.sackin():.3f}")

print("Unbalanced tree:")
print(f"  Colless: {unbalanced_tree.colless():.3f}")
print(f"  Sackin: {unbalanced_tree.sackin():.3f}")

# Different normalization methods
tree = treeswift.read_tree_newick("(((A,B),C),(D,E));")
print("Normalization methods:")
for norm in [None, 'leaves', 'yule', 'pda']:
    colless_val = tree.colless(normalize=norm)
    print(f"  Colless ({norm}): {colless_val:.3f}")

Phylogenetic Statistics

Calculate statistics specific to phylogenetic trees.

def gamma_statistic(self) -> float:
    """
    Compute Gamma statistic of Pybus and Harvey (2000).

    Returns:
    - float: Gamma statistic (negative = early branching, positive = late branching)
    """

def treeness(self) -> float:
    """
    Compute treeness (proportion of total tree length in internal branches).

    Returns:
    - float: Ratio of internal branch length sum to total branch length sum
    """

def num_cherries(self) -> int:
    """
    Count cherries (internal nodes with only leaf children).

    Returns:
    - int: Number of cherries in the tree
    """

Usage examples:

import treeswift

# Phylogenetic statistics
tree = treeswift.read_tree_newick("((A:0.1,B:0.1):0.5,(C:0.2,D:0.2):0.3);")

gamma = tree.gamma_statistic()
print(f"Gamma statistic: {gamma:.3f}")
if gamma < 0:
    print("  Early diversification pattern")
elif gamma > 0:
    print("  Late diversification pattern")
else:
    print("  Constant rate diversification")

treeness = tree.treeness()
print(f"Treeness: {treeness:.3f} ({treeness*100:.1f}% internal branches)")

cherries = tree.num_cherries()
print(f"Number of cherries: {cherries}")

Coalescence Analysis

Analyze coalescence patterns and lineage dynamics.

def coalescence_times(self, backward: bool = True) -> Generator[float, None, None]:
    """
    Generate coalescence event times.

    Parameters:
    - backward (bool): Times going backward from present (True) or forward from root (False)

    Yields:
    - float: Times of successive coalescence events
    """

def coalescence_waiting_times(self, backward: bool = True) -> Generator[float, None, None]:
    """
    Generate waiting times between coalescence events.

    Parameters:
    - backward (bool): Going backward from present (True) or forward from root (False)

    Yields:
    - float: Waiting times between successive coalescence events
    """

def num_lineages_at(self, distance: float) -> int:
    """
    Count lineages at specified distance from root.

    Parameters:
    - distance (float): Distance from root

    Returns:
    - int: Number of lineages existing at given distance
    """

Usage examples:

import treeswift

tree = treeswift.read_tree_newick("((A:0.1,B:0.1):0.3,(C:0.2,D:0.2):0.2);")

# Coalescence times
print("Coalescence times (backward from present):")
for i, time in enumerate(tree.coalescence_times(backward=True)):
    print(f"  Event {i+1}: {time:.3f}")

# Waiting times between events
print("Waiting times between coalescence events:")
for i, wait_time in enumerate(tree.coalescence_waiting_times()):
    print(f"  Interval {i+1}: {wait_time:.3f}")

# Lineage count through time
print("Lineages at different distances from root:")
for dist in [0.0, 0.1, 0.2, 0.3, 0.4]:
    count = tree.num_lineages_at(dist)
    print(f"  Distance {dist}: {count} lineages")

Branch Length Analysis

Analyze patterns in branch lengths across the tree.

def avg_branch_length(self, terminal: bool = True, internal: bool = True) -> float:
    """
    Compute average length of selected branches.

    Parameters:
    - terminal (bool): Include terminal branches
    - internal (bool): Include internal branches

    Returns:
    - float: Average branch length
    """

def branch_lengths(self, terminal: bool = True, internal: bool = True) -> Generator[float, None, None]:
    """
    Generate branch lengths of selected branches.

    Parameters:
    - terminal (bool): Include terminal branches
    - internal (bool): Include internal branches

    Yields:
    - float: Branch lengths (None edges yield 0)
    """

def edge_length_sum(self, terminal: bool = True, internal: bool = True) -> float:
    """
    Sum all selected edge lengths.

    Parameters:
    - terminal (bool): Include terminal branches
    - internal (bool): Include internal branches

    Returns:
    - float: Total length of selected branches
    """

Usage examples:

import treeswift

tree = treeswift.read_tree_newick("((A:0.1,B:0.3):0.2,(C:0.4,D:0.1):0.5);")

# Branch length statistics
total_length = tree.edge_length_sum()
avg_length = tree.avg_branch_length()
print(f"Total tree length: {total_length:.3f}")
print(f"Average branch length: {avg_length:.3f}")

# Separate terminal vs internal branches
term_avg = tree.avg_branch_length(internal=False)
int_avg = tree.avg_branch_length(terminal=False)
print(f"Average terminal branch: {term_avg:.3f}")
print(f"Average internal branch: {int_avg:.3f}")

# Collect all branch lengths
all_lengths = list(tree.branch_lengths())
print(f"All branch lengths: {[round(x, 3) for x in all_lengths]}")
print(f"Range: {min(all_lengths):.3f} - {max(all_lengths):.3f}")

Node Search and Utility Methods

Find nodes by labels and work with node collections in the tree.

def find_node(self, label: object, leaves: bool = True, internal: bool = False) -> Node | list[Node] | None:
    """
    Find node(s) with specified label.

    Parameters:
    - label (object): Label to search for
    - leaves (bool): Include leaf nodes in search
    - internal (bool): Include internal nodes in search

    Returns:
    - Node: Single node if only one found
    - list[Node]: List of nodes if multiple found
    - None: If no nodes found
    """

def label_to_node(self, selection: str | set = 'leaves') -> dict:
    """
    Return dictionary mapping labels to Node objects.

    Parameters:
    - selection (str | set): Node selection - 'leaves', 'internal', 'all', or set of labels

    Returns:
    - dict: Dictionary mapping labels to Node objects
    """

def labels(self, leaves: bool = True, internal: bool = True) -> Generator[object, None, None]:
    """
    Generate non-None node labels.

    Parameters:
    - leaves (bool): Include leaf node labels
    - internal (bool): Include internal node labels

    Yields:
    - object: Node labels (non-None only)
    """

Usage examples:

import treeswift

tree = treeswift.read_tree_newick("((A:0.1,B:0.2):0.3,(C:0.4,D:0.5):0.6);")

# Find specific nodes
node_a = tree.find_node("A")
print(f"Found node A: {node_a.get_label()}")

# Find nodes that might not exist
node_x = tree.find_node("X")
print(f"Node X found: {node_x is not None}")

# Search in both leaves and internal nodes
all_matches = tree.find_node("A", leaves=True, internal=True)

# Get mapping of labels to nodes
leaf_map = tree.label_to_node('leaves')
print(f"Leaf labels: {list(leaf_map.keys())}")

# Get mapping for all nodes
all_map = tree.label_to_node('all')
print(f"All labeled nodes: {len(all_map)}")

# Get mapping for specific labels only
specific_map = tree.label_to_node({'A', 'B'})
print(f"Specific nodes: {list(specific_map.keys())}")

# Iterate over all labels
all_labels = list(tree.labels())
print(f"All labels: {all_labels}")

# Get only leaf labels
leaf_labels = list(tree.labels(internal=False))
print(f"Leaf labels only: {leaf_labels}")

MRCA Analysis

Find most recent common ancestors and analyze relationships.

def mrca(self, labels: set) -> Node:
    """
    Find most recent common ancestor of nodes with specified labels.

    Parameters:
    - labels (set): Set of node labels to find MRCA for

    Returns:
    - Node: Most recent common ancestor node
    """

def mrca_matrix(self) -> dict:
    """
    Compute matrix of all pairwise MRCAs.

    Returns:
    - dict: 2D dictionary storing all pairwise MRCA relationships
    """

Usage examples:

import treeswift

tree = treeswift.read_tree_newick("(((A,B),C),((D,E),F));")

# Find MRCA of specific taxa
mrca_ab = tree.mrca({"A", "B"})
mrca_abc = tree.mrca({"A", "B", "C"})
mrca_all = tree.mrca({"A", "B", "D", "E"})

print(f"MRCA of A,B has {mrca_ab.num_children()} children")
print(f"MRCA of A,B,C has {mrca_abc.num_children()} children")
print(f"MRCA of A,B,D,E is root: {mrca_all == tree.root}")

# Full MRCA matrix
mrca_matrix = tree.mrca_matrix()
leaves = ["A", "B", "C", "D", "E", "F"]
print("MRCA relationships (showing number of descendants):")
for i, leaf1 in enumerate(leaves):
    for leaf2 in leaves[i+1:]:
        # Find nodes with these labels
        node1 = tree.find_node(leaf1)
        node2 = tree.find_node(leaf2)
        if node1 and node2 and node1 in mrca_matrix and node2 in mrca_matrix[node1]:
            mrca_node = mrca_matrix[node1][node2]
            desc_count = mrca_node.num_nodes()
            print(f"  MRCA({leaf1},{leaf2}): {desc_count} descendants")

Install with Tessl CLI

npx tessl i tessl/pypi-treeswift

docs

index.md

node-operations.md

tree-analysis.md

tree-io.md

tree-manipulation.md

tree-traversal.md

visualization.md

tile.json