CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-ete3

A Python Environment for (phylogenetic) Tree Exploration

Pending
Overview
Eval results
Files

phylogenetic.mddocs/

Phylogenetic Analysis

Advanced phylogenetic tree analysis capabilities including species tree operations, monophyly testing, evolutionary analysis, and specialized phylogenetic methods. These features extend core tree functionality with domain-specific phylogenetic tools.

Capabilities

Phylogenetic Tree Classes

Enhanced tree classes with phylogenetic-specific features and methods.

class PhyloTree(Tree):
    """
    Phylogenetic tree with species-aware operations.
    Inherits all Tree functionality plus phylogenetic methods.
    """
    
    def __init__(self, newick=None, alignment=None, alg_format="fasta", 
                 sp_naming_function=None, format=0):
        """
        Initialize phylogenetic tree.

        Parameters:
        - newick (str): Newick format string or file
        - alignment (str): Sequence alignment file or string
        - alg_format (str): Alignment format ("fasta", "phylip", "iphylip")
        - sp_naming_function (function): Function to extract species from node names
        - format (int): Newick format specification
        """

class PhyloNode(PhyloTree):
    """Alias for PhyloTree - same functionality."""
    pass

Species Naming and Annotation

Configure how species names are extracted from node names and manage species-level operations.

def set_species_naming_function(self, fn):
    """
    Set function to extract species name from node name.

    Parameters:
    - fn (function): Function that takes node name, returns species name
                     Example: lambda x: x.split('_')[0]
    """

species: str  # Species name property (read-only)

def get_species(self):
    """
    Get set of all species in tree.

    Returns:
    set: Species names present in tree
    """

def annotate_gtdb_taxa(self, taxid_attr="name"):
    """
    Annotate tree with GTDB (Genome Taxonomy Database) taxonomic information.

    Parameters:
    - taxid_attr (str): Node attribute containing taxonomic IDs
    """

Monophyly Testing

Test and analyze monophyletic groups in phylogenetic trees.

def check_monophyly(self, values, target_attr, ignore_missing=False):
    """
    Check if specified values form monophyletic group.

    Parameters:
    - values (list): List of values to test for monophyly
    - target_attr (str): Node attribute to check ("species", "name", etc.)
    - ignore_missing (bool): Ignore nodes without target attribute

    Returns:
    tuple: (is_monophyletic: bool, clade_type: str, broken_branches: list)
           clade_type can be "monophyletic", "paraphyletic", or "polyphyletic"
    """

def get_monophyletic(self, values, target_attr):
    """
    Get node that represents monophyletic group of specified values.

    Parameters:
    - values (list): Values that should form monophyletic group
    - target_attr (str): Node attribute to match against

    Returns:
    TreeNode: Node representing the monophyletic group, or None if not monophyletic
    """

Distance and Divergence Analysis

Calculate evolutionary distances and analyze tree metrics.

def get_age(self, species2age):
    """
    Get age of node based on species age information.

    Parameters:
    - species2age (dict): Mapping from species names to ages

    Returns:
    float: Estimated age of node
    """

def get_closest_leaf(self, topology_only=False):
    """
    Find closest leaf node with phylogenetic distance.

    Parameters:
    - topology_only (bool): Use only topology, ignore branch lengths

    Returns:
    tuple: (closest_leaf_node, distance)
    """

def get_farthest_leaf(self, topology_only=False):
    """
    Find most distant leaf node.

    Parameters:
    - topology_only (bool): Use only topology, ignore branch lengths

    Returns:
    tuple: (farthest_leaf_node, distance)
    """

def get_farthest_node(self, topology_only=False):
    """
    Find most distant node (leaf or internal).

    Parameters:
    - topology_only (bool): Use only topology, ignore branch lengths

    Returns:
    tuple: (farthest_node, distance)
    """

def get_midpoint_outgroup(self):
    """
    Find optimal outgroup for midpoint rooting.

    Returns:
    TreeNode: Node that serves as midpoint outgroup
    """

Sequence Integration

Link phylogenetic trees with molecular sequence data.

def link_to_alignment(self, alignment, alg_format="fasta", **kwargs):
    """
    Associate sequence alignment with tree nodes.

    Parameters:
    - alignment (str): Alignment file path or sequence string
    - alg_format (str): Format ("fasta", "phylip", "iphylip", "paml")
    - kwargs: Additional format-specific parameters
    """

sequence: str  # Associated sequence data (when linked to alignment)

NCBI Taxonomy Comparison

Compare phylogenetic trees with NCBI taxonomic relationships.

def ncbi_compare(self, autodetect_duplications=True):
    """
    Compare tree topology with NCBI taxonomy.

    Parameters:
    - autodetect_duplications (bool): Automatically detect gene duplications

    Returns:
    dict: Comparison results including conflicts and agreements
    """

Tree Reconciliation

Reconcile gene trees with species trees to infer evolutionary events.

def reconcile(self, species_tree, inplace=True):
    """
    Reconcile gene tree with species tree.

    Parameters:
    - species_tree (PhyloTree): Reference species tree
    - inplace (bool): Modify current tree or return new one

    Returns:
    PhyloTree: Reconciled tree with duplication/speciation events annotated
    """

# Properties set by reconciliation
evoltype: str  # Event type: "S" (speciation), "D" (duplication)

Phylogenetic Tree Statistics

Calculate various phylogenetic tree statistics and metrics.

def get_cached_content(self, store_attr=None):
    """
    Cache tree content for efficient repeated access.

    Parameters:
    - store_attr (str): Specific attribute to cache

    Returns:
    dict: Cached tree statistics and content
    """

def robinson_foulds(self, ref_tree, attr_t1="name", attr_t2="name", 
                   expand_polytomies=False, polytomy_size_limit=5,
                   skip_large_polytomies=True):
    """
    Calculate Robinson-Foulds distance between trees.

    Parameters:
    - ref_tree (Tree): Reference tree for comparison
    - attr_t1 (str): Attribute for leaf matching in self
    - attr_t2 (str): Attribute for leaf matching in ref_tree
    - expand_polytomies (bool): Resolve polytomies before comparison
    - polytomy_size_limit (int): Max size for polytomy expansion
    - skip_large_polytomies (bool): Skip large polytomies

    Returns:
    tuple: (RF_distance, max_RF, common_leaves, parts_t1, parts_t2, 
            discard_t1, discard_t2)
    """

Evolution-Specific Tree Classes

EvolTree for Evolutionary Analysis

Specialized tree class for evolutionary model analysis and molecular evolution studies.

class EvolTree(PhyloTree):
    """
    Tree specialized for evolutionary analysis and molecular evolution models.
    """

    def get_evol_model(self, model_name):
        """
        Get evolutionary model associated with tree.

        Parameters:
        - model_name (str): Name of evolutionary model

        Returns:
        EvolModel: Evolutionary model object
        """

    def link_to_evol_model(self, model_file, workdir=None):
        """
        Link tree to evolutionary analysis results.

        Parameters:
        - model_file (str): Path to model results file
        - workdir (str): Working directory for analysis files
        """

    def run_model(self, model_name_or_fname):
        """
        Run evolutionary model analysis.

        Parameters:
        - model_name_or_fname (str): Model name or file path

        Returns:
        dict: Model analysis results
        """

class EvolNode(EvolTree):
    """Alias for EvolTree - same functionality."""
    pass

Utility Functions

Species Tree Analysis

def get_subtrees(tree, full_copy=False, features=None, newick_only=False):
    """
    Calculate all possible species trees within a gene tree.

    Parameters:
    - tree (PhyloTree): Input gene tree
    - full_copy (bool): Create full copies of subtrees
    - features (list): Features to preserve in subtrees
    - newick_only (bool): Return only Newick strings

    Returns:
    tuple: (num_trees, num_duplications, tree_iterator)
    """

def is_dup(node):
    """
    Check if node represents a duplication event.

    Parameters:
    - node (TreeNode): Node to test

    Returns:
    bool: True if node is duplication
    """

Usage Examples

Basic Phylogenetic Operations

from ete3 import PhyloTree

# Create phylogenetic tree with species naming
tree = PhyloTree("(human_gene1:0.1,(chimp_gene1:0.05,bonobo_gene1:0.05):0.02);")
tree.set_species_naming_function(lambda x: x.split('_')[0])

# Check species representation
species = tree.get_species()
print(f"Species in tree: {species}")

# Test monophyly
is_mono, clade_type, broken = tree.check_monophyly(['human', 'chimp'], 'species')
print(f"Human-Chimp monophyly: {is_mono} ({clade_type})")

Sequence Integration

from ete3 import PhyloTree

# Create tree and link to alignment
tree = PhyloTree("(seq1:0.1,seq2:0.2,seq3:0.15);")
tree.link_to_alignment("alignment.fasta")

# Access sequence data
for leaf in tree.get_leaves():
    print(f"{leaf.name}: {leaf.sequence}")

Tree Reconciliation

from ete3 import PhyloTree

# Gene tree and species tree
gene_tree = PhyloTree("(human_gene1:0.1,(chimp_gene1:0.05,chimp_gene2:0.05):0.02);")
species_tree = PhyloTree("(human:0.1,chimp:0.1);")

# Set species naming
gene_tree.set_species_naming_function(lambda x: x.split('_')[0])

# Reconcile trees
reconciled = gene_tree.reconcile(species_tree)

# Check event types
for node in reconciled.traverse():
    if hasattr(node, 'evoltype'):
        print(f"Node {node.name}: {node.evoltype}")

NCBI Taxonomy Integration

from ete3 import PhyloTree, NCBITaxa

ncbi = NCBITaxa()

# Create tree from NCBI taxonomy
tree = ncbi.get_topology([9606, 9598, 9597])  # Human, chimp, bonobo

# Compare with gene tree
gene_tree = PhyloTree("(human:0.1,(chimp:0.05,bonobo:0.05):0.02);")
comparison = gene_tree.ncbi_compare()
print(f"Topology conflicts: {comparison}")

Install with Tessl CLI

npx tessl i tessl/pypi-ete3

docs

clustering.md

core-tree.md

data-tables.md

external-formats.md

index.md

ncbi-taxonomy.md

phylogenetic.md

sequences.md

visualization.md

tile.json