CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-ete3

A Python Environment for (phylogenetic) Tree Exploration

Pending
Overview
Eval results
Files

external-formats.mddocs/

External Format Support

Support for reading and writing multiple phylogenetic data formats including PhyloXML, NeXML, and various sequence formats. ETE3 provides comprehensive interoperability with standard bioinformatics file formats.

Capabilities

PhyloXML Format Support

Complete support for PhyloXML standard for phylogenetic data exchange.

class Phyloxml:
    """
    PhyloXML format parser and writer for phylogenetic data exchange.
    """
    
    def __init__(self):
        """Initialize PhyloXML handler."""

    def build_from_file(self, fname):
        """
        Parse PhyloXML file and build tree structure.

        Parameters:
        - fname (str): Path to PhyloXML file

        Returns:
        PhyloxmlTree: Parsed phylogenetic tree with PhyloXML annotations
        """

    def export(self, outfile=None):
        """
        Export tree to PhyloXML format.

        Parameters:
        - outfile (str): Output file path, if None returns string

        Returns:
        str: PhyloXML formatted string (if outfile is None)
        """

class PhyloxmlTree(PhyloTree):
    """
    Phylogenetic tree with PhyloXML-specific features and annotations.
    """
    
    def __init__(self, phyloxml_file=None):
        """
        Initialize PhyloXML tree.

        Parameters:
        - phyloxml_file (str): Path to PhyloXML file to load
        """

    # PhyloXML-specific properties
    phyloxml: dict          # PhyloXML annotations and metadata
    confidence: float       # Confidence value for branches
    taxonomy: dict          # Taxonomic information
    sequence: dict          # Sequence data and annotations
    events: dict           # Evolutionary events (duplication, speciation)
    properties: dict       # Custom properties from PhyloXML

NeXML Format Support

Support for NeXML format, the NeXus XML standard for phylogenetic data.

class Nexml:
    """
    NeXML format parser and writer for phylogenetic data exchange.
    """
    
    def __init__(self):
        """Initialize NeXML handler."""

    def build_from_file(self, fname):
        """
        Parse NeXML file and build tree structure.

        Parameters:
        - fname (str): Path to NeXML file

        Returns:
        NexmlTree: Parsed phylogenetic tree with NeXML annotations
        """

    def export(self, outfile=None):
        """
        Export tree to NeXML format.

        Parameters:
        - outfile (str): Output file path, if None returns string

        Returns:
        str: NeXML formatted string (if outfile is None)
        """

class NexmlTree(PhyloTree):
    """
    Phylogenetic tree with NeXML-specific features and annotations.
    """
    
    def __init__(self, nexml_file=None):
        """
        Initialize NeXML tree.

        Parameters:
        - nexml_file (str): Path to NeXML file to load
        """

    # NeXML-specific properties
    nexml: dict            # NeXML annotations and metadata
    otus: dict            # Operational Taxonomic Units information
    characters: dict       # Character data and matrices
    meta: dict            # Metadata annotations

Newick Format Variations

Enhanced support for different Newick format variations and extensions.

def read_newick(newick_string, root_node=None, format=0, quoted_node_names=False):
    """
    Parse Newick format string with extensive format support.

    Parameters:
    - newick_string (str): Newick formatted tree string
    - root_node (TreeNode): Existing node to use as root
    - format (int): Newick subformat (0-9)
        0: flexible with support values  
        1: flexible with internal node names
        2: all branches + leaf names + internal supports
        3: all branches + all names  
        4: leaf branches + leaf names
        5: internal and leaf branches + leaf names
        6: internal branches + leaf names
        7: leaf branches + all names
        8: all names
        9: leaf names
    - quoted_node_names (bool): Handle quoted node names with special characters

    Returns:
    TreeNode: Parsed tree structure
    """

def write_newick(tree, features=None, format=0, format_root_node=True, 
                is_leaf_fn=None, quoted_node_names=False):
    """
    Export tree to Newick format with customizable options.

    Parameters:
    - tree (TreeNode): Tree to export
    - features (list): Node features to include in output
    - format (int): Newick output format (0-9)
    - format_root_node (bool): Include root node in output
    - is_leaf_fn (function): Custom function to determine leaf nodes
    - quoted_node_names (bool): Quote node names with special characters

    Returns:
    str: Newick formatted string
    """

Sequence Format Integration

Support for various sequence formats when working with phylogenetic data.

# FASTA format with phylogenetic extensions
def read_fasta_with_tree(fasta_file, tree_file=None):
    """
    Read FASTA sequences and optionally associate with tree.

    Parameters:
    - fasta_file (str): Path to FASTA file
    - tree_file (str): Optional tree file for sequence-tree association

    Returns:
    tuple: (SeqGroup, PhyloTree) if tree_file provided, else SeqGroup
    """

# PHYLIP format variations
def read_phylip(source, interleaved=False, relaxed=False, tree_names=True):
    """
    Read PHYLIP format with tree compatibility options.

    Parameters:
    - source (str): PHYLIP file path or string
    - interleaved (bool): Interleaved PHYLIP format
    - relaxed (bool): Relaxed naming (>10 characters)
    - tree_names (bool): Ensure names compatible with tree formats

    Returns:
    SeqGroup: Parsed sequences
    """

# Nexus format support  
def read_nexus(nexus_file):
    """
    Read Nexus format files containing trees and/or data.

    Parameters:
    - nexus_file (str): Path to Nexus file

    Returns:
    dict: Dictionary containing trees, data, and metadata
    """

Format Detection and Auto-parsing

Automatic format detection and parsing for mixed-format workflows.

def detect_format(filename):
    """
    Automatically detect file format based on content and extension.

    Parameters:
    - filename (str): Path to file

    Returns:
    str: Detected format ("newick", "phyloxml", "nexml", "nexus", "fasta", "phylip")
    """

def auto_parse(filename, **kwargs):
    """
    Automatically parse file using detected format.

    Parameters:
    - filename (str): File to parse
    - kwargs: Format-specific parsing options

    Returns:
    Tree or SeqGroup: Parsed data structure
    """

Web Integration

WebTreeApplication

Web-based tree visualization and sharing capabilities.

class WebTreeApplication:
    """
    Web-based tree visualization application for interactive tree exploration.
    """
    
    def __init__(self, tree, name=None, host="localhost", port=8080):
        """
        Initialize web tree application.

        Parameters:
        - tree (Tree): Tree to visualize
        - name (str): Application name
        - host (str): Server host address
        - port (int): Server port number
        """

    def launch(self, open_browser=True):
        """
        Launch web server for tree visualization.

        Parameters:
        - open_browser (bool): Automatically open browser

        Returns:
        str: URL of launched application
        """

    def add_tree(self, tree, name=None):
        """
        Add additional tree to web application.

        Parameters:
        - tree (Tree): Tree to add
        - name (str): Tree identifier
        """

    def set_tree_style(self, tree_style):
        """
        Set default tree style for web display.

        Parameters:
        - tree_style (TreeStyle): Style configuration
        """

Database Integration

PhylomeDB Integration

Integration with PhylomeDB phylogenomic database.

class PhylomeDB3Connector:
    """
    Interface to PhylomeDB3 phylogenomic database.
    """
    
    def __init__(self, host="phylomedb.org"):
        """
        Initialize PhylomeDB connector.

        Parameters:
        - host (str): PhylomeDB server hostname
        """

    def get_best_tree(self, seed_taxid, target_taxid=None):
        """
        Retrieve best phylogenetic tree for given taxonomic IDs.

        Parameters:
        - seed_taxid (int): Seed species taxonomic ID
        - target_taxid (int): Target species taxonomic ID (optional)

        Returns:
        PhyloTree: Best available phylogenetic tree
        """

    def search_trees(self, seed_taxid, target_species=None):
        """
        Search for trees containing specified taxa.

        Parameters:
        - seed_taxid (int): Seed taxonomic ID
        - target_species (list): Target species list

        Returns:
        list: Available trees matching criteria
        """

    def get_tree_ages(self, phylome_id):
        """
        Get age estimates for trees in phylome.

        Parameters:
        - phylome_id (int): PhylomeDB phylome identifier

        Returns:
        dict: Age estimates for phylome trees
        """

Usage Examples

PhyloXML Operations

from ete3 import Phyloxml, PhyloxmlTree

# Parse PhyloXML file
phyloxml_parser = Phyloxml()
tree = phyloxml_parser.build_from_file("example.phyloxml")

# Access PhyloXML-specific data
for node in tree.traverse():
    if hasattr(node, 'confidence'):
        print(f"Node confidence: {node.confidence}")
    if hasattr(node, 'taxonomy'):
        print(f"Taxonomy: {node.taxonomy}")

# Export with annotations
output_xml = phyloxml_parser.export("output.phyloxml")

NeXML Operations

from ete3 import Nexml, NexmlTree

# Parse NeXML file
nexml_parser = Nexml()
tree = nexml_parser.build_from_file("data.nexml")

# Access NeXML metadata
if hasattr(tree, 'meta'):
    print(f"Metadata: {tree.meta}")

# Work with character data
if hasattr(tree, 'characters'):
    print(f"Character matrix: {tree.characters}")

# Export to NeXML
nexml_parser.export("output.nexml")

Format Conversion

from ete3 import Tree, Phyloxml, Nexml

# Load tree in Newick format
tree = Tree("(A:1,(B:1,C:1):0.5);")

# Convert to PhyloXML
phyloxml = Phyloxml()
tree.phyloxml = {"description": "Example tree"}
phyloxml_output = phyloxml.export()

# Convert to NeXML
nexml = Nexml()
tree.nexml = {"title": "Example phylogeny"}
nexml_output = nexml.export()

# Save in different formats
with open("tree.phyloxml", "w") as f:
    f.write(phyloxml_output)
    
with open("tree.nexml", "w") as f:
    f.write(nexml_output)

Auto-parsing and Format Detection

from ete3 import detect_format, auto_parse

# Detect format automatically
file_format = detect_format("unknown_format_file.txt")
print(f"Detected format: {file_format}")

# Parse automatically based on format
data = auto_parse("phylogenetic_data.xml")

if isinstance(data, Tree):
    print(f"Parsed tree with {len(data)} nodes")
elif isinstance(data, SeqGroup):
    print(f"Parsed {len(data)} sequences")

Web Application

from ete3 import Tree, WebTreeApplication, TreeStyle

# Create tree and style
tree = Tree("(human:1,(chimp:0.5,bonobo:0.5):0.5);")
ts = TreeStyle()
ts.show_leaf_name = True
ts.show_branch_length = True

# Launch web application
webapp = WebTreeApplication(tree, name="Primate Tree")
webapp.set_tree_style(ts)
url = webapp.launch()

print(f"Tree visualization available at: {url}")

PhylomeDB Integration

from ete3 import PhylomeDB3Connector

# Connect to PhylomeDB
phylomedb = PhylomeDB3Connector()

# Search for trees
human_trees = phylomedb.search_trees(seed_taxid=9606)  # Human
print(f"Found {len(human_trees)} trees with human sequences")

# Get best tree
best_tree = phylomedb.get_best_tree(seed_taxid=9606, target_taxid=9598)  # Human-chimp
print(f"Best tree: {best_tree.get_ascii()}")

# Get age estimates
ages = phylomedb.get_tree_ages(phylome_id=1)
print(f"Age estimates: {ages}")

Comprehensive Format Workflow

from ete3 import Tree, SeqGroup, Phyloxml, NCBITaxa

# Multi-format phylogenetic workflow
def process_phylogenetic_data(tree_file, sequence_file=None):
    # Auto-detect and parse tree
    tree_format = detect_format(tree_file)
    tree = auto_parse(tree_file)
    
    # Load sequences if provided
    if sequence_file:
        seq_format = detect_format(sequence_file)
        sequences = auto_parse(sequence_file)
        
        # Link sequences to tree
        if isinstance(tree, PhyloTree):
            tree.link_to_alignment(sequences)
    
    # Add taxonomic information
    ncbi = NCBITaxa()
    tree = ncbi.annotate_tree(tree)
    
    # Export in multiple formats
    results = {
        'newick': tree.write(format=1),
        'phyloxml': None,
        'ascii': tree.get_ascii()
    }
    
    # Export to PhyloXML with annotations
    if hasattr(tree, 'taxonomy'):
        phyloxml = Phyloxml()
        results['phyloxml'] = phyloxml.export()
    
    return tree, results

# Process data
tree, outputs = process_phylogenetic_data("input.nw", "sequences.fasta")

# Save all formats
for format_name, content in outputs.items():
    if content:
        with open(f"output.{format_name}", "w") as f:
            f.write(content)

Install with Tessl CLI

npx tessl i tessl/pypi-ete3

docs

clustering.md

core-tree.md

data-tables.md

external-formats.md

index.md

ncbi-taxonomy.md

phylogenetic.md

sequences.md

visualization.md

tile.json