A Python Environment for (phylogenetic) Tree Exploration
—
Advanced phylogenetic tree analysis capabilities including species tree operations, monophyly testing, evolutionary analysis, and specialized phylogenetic methods. These features extend core tree functionality with domain-specific phylogenetic tools.
Enhanced tree classes with phylogenetic-specific features and methods.
class PhyloTree(Tree):
"""
Phylogenetic tree with species-aware operations.
Inherits all Tree functionality plus phylogenetic methods.
"""
def __init__(self, newick=None, alignment=None, alg_format="fasta",
sp_naming_function=None, format=0):
"""
Initialize phylogenetic tree.
Parameters:
- newick (str): Newick format string or file
- alignment (str): Sequence alignment file or string
- alg_format (str): Alignment format ("fasta", "phylip", "iphylip")
- sp_naming_function (function): Function to extract species from node names
- format (int): Newick format specification
"""
class PhyloNode(PhyloTree):
"""Alias for PhyloTree - same functionality."""
passConfigure how species names are extracted from node names and manage species-level operations.
def set_species_naming_function(self, fn):
"""
Set function to extract species name from node name.
Parameters:
- fn (function): Function that takes node name, returns species name
Example: lambda x: x.split('_')[0]
"""
species: str # Species name property (read-only)
def get_species(self):
"""
Get set of all species in tree.
Returns:
set: Species names present in tree
"""
def annotate_gtdb_taxa(self, taxid_attr="name"):
"""
Annotate tree with GTDB (Genome Taxonomy Database) taxonomic information.
Parameters:
- taxid_attr (str): Node attribute containing taxonomic IDs
"""Test and analyze monophyletic groups in phylogenetic trees.
def check_monophyly(self, values, target_attr, ignore_missing=False):
"""
Check if specified values form monophyletic group.
Parameters:
- values (list): List of values to test for monophyly
- target_attr (str): Node attribute to check ("species", "name", etc.)
- ignore_missing (bool): Ignore nodes without target attribute
Returns:
tuple: (is_monophyletic: bool, clade_type: str, broken_branches: list)
clade_type can be "monophyletic", "paraphyletic", or "polyphyletic"
"""
def get_monophyletic(self, values, target_attr):
"""
Get node that represents monophyletic group of specified values.
Parameters:
- values (list): Values that should form monophyletic group
- target_attr (str): Node attribute to match against
Returns:
TreeNode: Node representing the monophyletic group, or None if not monophyletic
"""Calculate evolutionary distances and analyze tree metrics.
def get_age(self, species2age):
"""
Get age of node based on species age information.
Parameters:
- species2age (dict): Mapping from species names to ages
Returns:
float: Estimated age of node
"""
def get_closest_leaf(self, topology_only=False):
"""
Find closest leaf node with phylogenetic distance.
Parameters:
- topology_only (bool): Use only topology, ignore branch lengths
Returns:
tuple: (closest_leaf_node, distance)
"""
def get_farthest_leaf(self, topology_only=False):
"""
Find most distant leaf node.
Parameters:
- topology_only (bool): Use only topology, ignore branch lengths
Returns:
tuple: (farthest_leaf_node, distance)
"""
def get_farthest_node(self, topology_only=False):
"""
Find most distant node (leaf or internal).
Parameters:
- topology_only (bool): Use only topology, ignore branch lengths
Returns:
tuple: (farthest_node, distance)
"""
def get_midpoint_outgroup(self):
"""
Find optimal outgroup for midpoint rooting.
Returns:
TreeNode: Node that serves as midpoint outgroup
"""Link phylogenetic trees with molecular sequence data.
def link_to_alignment(self, alignment, alg_format="fasta", **kwargs):
"""
Associate sequence alignment with tree nodes.
Parameters:
- alignment (str): Alignment file path or sequence string
- alg_format (str): Format ("fasta", "phylip", "iphylip", "paml")
- kwargs: Additional format-specific parameters
"""
sequence: str # Associated sequence data (when linked to alignment)Compare phylogenetic trees with NCBI taxonomic relationships.
def ncbi_compare(self, autodetect_duplications=True):
"""
Compare tree topology with NCBI taxonomy.
Parameters:
- autodetect_duplications (bool): Automatically detect gene duplications
Returns:
dict: Comparison results including conflicts and agreements
"""Reconcile gene trees with species trees to infer evolutionary events.
def reconcile(self, species_tree, inplace=True):
"""
Reconcile gene tree with species tree.
Parameters:
- species_tree (PhyloTree): Reference species tree
- inplace (bool): Modify current tree or return new one
Returns:
PhyloTree: Reconciled tree with duplication/speciation events annotated
"""
# Properties set by reconciliation
evoltype: str # Event type: "S" (speciation), "D" (duplication)Calculate various phylogenetic tree statistics and metrics.
def get_cached_content(self, store_attr=None):
"""
Cache tree content for efficient repeated access.
Parameters:
- store_attr (str): Specific attribute to cache
Returns:
dict: Cached tree statistics and content
"""
def robinson_foulds(self, ref_tree, attr_t1="name", attr_t2="name",
expand_polytomies=False, polytomy_size_limit=5,
skip_large_polytomies=True):
"""
Calculate Robinson-Foulds distance between trees.
Parameters:
- ref_tree (Tree): Reference tree for comparison
- attr_t1 (str): Attribute for leaf matching in self
- attr_t2 (str): Attribute for leaf matching in ref_tree
- expand_polytomies (bool): Resolve polytomies before comparison
- polytomy_size_limit (int): Max size for polytomy expansion
- skip_large_polytomies (bool): Skip large polytomies
Returns:
tuple: (RF_distance, max_RF, common_leaves, parts_t1, parts_t2,
discard_t1, discard_t2)
"""Specialized tree class for evolutionary model analysis and molecular evolution studies.
class EvolTree(PhyloTree):
"""
Tree specialized for evolutionary analysis and molecular evolution models.
"""
def get_evol_model(self, model_name):
"""
Get evolutionary model associated with tree.
Parameters:
- model_name (str): Name of evolutionary model
Returns:
EvolModel: Evolutionary model object
"""
def link_to_evol_model(self, model_file, workdir=None):
"""
Link tree to evolutionary analysis results.
Parameters:
- model_file (str): Path to model results file
- workdir (str): Working directory for analysis files
"""
def run_model(self, model_name_or_fname):
"""
Run evolutionary model analysis.
Parameters:
- model_name_or_fname (str): Model name or file path
Returns:
dict: Model analysis results
"""
class EvolNode(EvolTree):
"""Alias for EvolTree - same functionality."""
passdef get_subtrees(tree, full_copy=False, features=None, newick_only=False):
"""
Calculate all possible species trees within a gene tree.
Parameters:
- tree (PhyloTree): Input gene tree
- full_copy (bool): Create full copies of subtrees
- features (list): Features to preserve in subtrees
- newick_only (bool): Return only Newick strings
Returns:
tuple: (num_trees, num_duplications, tree_iterator)
"""
def is_dup(node):
"""
Check if node represents a duplication event.
Parameters:
- node (TreeNode): Node to test
Returns:
bool: True if node is duplication
"""from ete3 import PhyloTree
# Create phylogenetic tree with species naming
tree = PhyloTree("(human_gene1:0.1,(chimp_gene1:0.05,bonobo_gene1:0.05):0.02);")
tree.set_species_naming_function(lambda x: x.split('_')[0])
# Check species representation
species = tree.get_species()
print(f"Species in tree: {species}")
# Test monophyly
is_mono, clade_type, broken = tree.check_monophyly(['human', 'chimp'], 'species')
print(f"Human-Chimp monophyly: {is_mono} ({clade_type})")from ete3 import PhyloTree
# Create tree and link to alignment
tree = PhyloTree("(seq1:0.1,seq2:0.2,seq3:0.15);")
tree.link_to_alignment("alignment.fasta")
# Access sequence data
for leaf in tree.get_leaves():
print(f"{leaf.name}: {leaf.sequence}")from ete3 import PhyloTree
# Gene tree and species tree
gene_tree = PhyloTree("(human_gene1:0.1,(chimp_gene1:0.05,chimp_gene2:0.05):0.02);")
species_tree = PhyloTree("(human:0.1,chimp:0.1);")
# Set species naming
gene_tree.set_species_naming_function(lambda x: x.split('_')[0])
# Reconcile trees
reconciled = gene_tree.reconcile(species_tree)
# Check event types
for node in reconciled.traverse():
if hasattr(node, 'evoltype'):
print(f"Node {node.name}: {node.evoltype}")from ete3 import PhyloTree, NCBITaxa
ncbi = NCBITaxa()
# Create tree from NCBI taxonomy
tree = ncbi.get_topology([9606, 9598, 9597]) # Human, chimp, bonobo
# Compare with gene tree
gene_tree = PhyloTree("(human:0.1,(chimp:0.05,bonobo:0.05):0.02);")
comparison = gene_tree.ncbi_compare()
print(f"Topology conflicts: {comparison}")Install with Tessl CLI
npx tessl i tessl/pypi-ete3