A Python Environment for (phylogenetic) Tree Exploration
—
Support for reading and writing multiple phylogenetic data formats including PhyloXML, NeXML, and various sequence formats. ETE3 provides comprehensive interoperability with standard bioinformatics file formats.
Complete support for PhyloXML standard for phylogenetic data exchange.
class Phyloxml:
"""
PhyloXML format parser and writer for phylogenetic data exchange.
"""
def __init__(self):
"""Initialize PhyloXML handler."""
def build_from_file(self, fname):
"""
Parse PhyloXML file and build tree structure.
Parameters:
- fname (str): Path to PhyloXML file
Returns:
PhyloxmlTree: Parsed phylogenetic tree with PhyloXML annotations
"""
def export(self, outfile=None):
"""
Export tree to PhyloXML format.
Parameters:
- outfile (str): Output file path, if None returns string
Returns:
str: PhyloXML formatted string (if outfile is None)
"""
class PhyloxmlTree(PhyloTree):
"""
Phylogenetic tree with PhyloXML-specific features and annotations.
"""
def __init__(self, phyloxml_file=None):
"""
Initialize PhyloXML tree.
Parameters:
- phyloxml_file (str): Path to PhyloXML file to load
"""
# PhyloXML-specific properties
phyloxml: dict # PhyloXML annotations and metadata
confidence: float # Confidence value for branches
taxonomy: dict # Taxonomic information
sequence: dict # Sequence data and annotations
events: dict # Evolutionary events (duplication, speciation)
properties: dict # Custom properties from PhyloXMLSupport for NeXML format, the NeXus XML standard for phylogenetic data.
class Nexml:
"""
NeXML format parser and writer for phylogenetic data exchange.
"""
def __init__(self):
"""Initialize NeXML handler."""
def build_from_file(self, fname):
"""
Parse NeXML file and build tree structure.
Parameters:
- fname (str): Path to NeXML file
Returns:
NexmlTree: Parsed phylogenetic tree with NeXML annotations
"""
def export(self, outfile=None):
"""
Export tree to NeXML format.
Parameters:
- outfile (str): Output file path, if None returns string
Returns:
str: NeXML formatted string (if outfile is None)
"""
class NexmlTree(PhyloTree):
"""
Phylogenetic tree with NeXML-specific features and annotations.
"""
def __init__(self, nexml_file=None):
"""
Initialize NeXML tree.
Parameters:
- nexml_file (str): Path to NeXML file to load
"""
# NeXML-specific properties
nexml: dict # NeXML annotations and metadata
otus: dict # Operational Taxonomic Units information
characters: dict # Character data and matrices
meta: dict # Metadata annotationsEnhanced support for different Newick format variations and extensions.
def read_newick(newick_string, root_node=None, format=0, quoted_node_names=False):
"""
Parse Newick format string with extensive format support.
Parameters:
- newick_string (str): Newick formatted tree string
- root_node (TreeNode): Existing node to use as root
- format (int): Newick subformat (0-9)
0: flexible with support values
1: flexible with internal node names
2: all branches + leaf names + internal supports
3: all branches + all names
4: leaf branches + leaf names
5: internal and leaf branches + leaf names
6: internal branches + leaf names
7: leaf branches + all names
8: all names
9: leaf names
- quoted_node_names (bool): Handle quoted node names with special characters
Returns:
TreeNode: Parsed tree structure
"""
def write_newick(tree, features=None, format=0, format_root_node=True,
is_leaf_fn=None, quoted_node_names=False):
"""
Export tree to Newick format with customizable options.
Parameters:
- tree (TreeNode): Tree to export
- features (list): Node features to include in output
- format (int): Newick output format (0-9)
- format_root_node (bool): Include root node in output
- is_leaf_fn (function): Custom function to determine leaf nodes
- quoted_node_names (bool): Quote node names with special characters
Returns:
str: Newick formatted string
"""Support for various sequence formats when working with phylogenetic data.
# FASTA format with phylogenetic extensions
def read_fasta_with_tree(fasta_file, tree_file=None):
"""
Read FASTA sequences and optionally associate with tree.
Parameters:
- fasta_file (str): Path to FASTA file
- tree_file (str): Optional tree file for sequence-tree association
Returns:
tuple: (SeqGroup, PhyloTree) if tree_file provided, else SeqGroup
"""
# PHYLIP format variations
def read_phylip(source, interleaved=False, relaxed=False, tree_names=True):
"""
Read PHYLIP format with tree compatibility options.
Parameters:
- source (str): PHYLIP file path or string
- interleaved (bool): Interleaved PHYLIP format
- relaxed (bool): Relaxed naming (>10 characters)
- tree_names (bool): Ensure names compatible with tree formats
Returns:
SeqGroup: Parsed sequences
"""
# Nexus format support
def read_nexus(nexus_file):
"""
Read Nexus format files containing trees and/or data.
Parameters:
- nexus_file (str): Path to Nexus file
Returns:
dict: Dictionary containing trees, data, and metadata
"""Automatic format detection and parsing for mixed-format workflows.
def detect_format(filename):
"""
Automatically detect file format based on content and extension.
Parameters:
- filename (str): Path to file
Returns:
str: Detected format ("newick", "phyloxml", "nexml", "nexus", "fasta", "phylip")
"""
def auto_parse(filename, **kwargs):
"""
Automatically parse file using detected format.
Parameters:
- filename (str): File to parse
- kwargs: Format-specific parsing options
Returns:
Tree or SeqGroup: Parsed data structure
"""Web-based tree visualization and sharing capabilities.
class WebTreeApplication:
"""
Web-based tree visualization application for interactive tree exploration.
"""
def __init__(self, tree, name=None, host="localhost", port=8080):
"""
Initialize web tree application.
Parameters:
- tree (Tree): Tree to visualize
- name (str): Application name
- host (str): Server host address
- port (int): Server port number
"""
def launch(self, open_browser=True):
"""
Launch web server for tree visualization.
Parameters:
- open_browser (bool): Automatically open browser
Returns:
str: URL of launched application
"""
def add_tree(self, tree, name=None):
"""
Add additional tree to web application.
Parameters:
- tree (Tree): Tree to add
- name (str): Tree identifier
"""
def set_tree_style(self, tree_style):
"""
Set default tree style for web display.
Parameters:
- tree_style (TreeStyle): Style configuration
"""Integration with PhylomeDB phylogenomic database.
class PhylomeDB3Connector:
"""
Interface to PhylomeDB3 phylogenomic database.
"""
def __init__(self, host="phylomedb.org"):
"""
Initialize PhylomeDB connector.
Parameters:
- host (str): PhylomeDB server hostname
"""
def get_best_tree(self, seed_taxid, target_taxid=None):
"""
Retrieve best phylogenetic tree for given taxonomic IDs.
Parameters:
- seed_taxid (int): Seed species taxonomic ID
- target_taxid (int): Target species taxonomic ID (optional)
Returns:
PhyloTree: Best available phylogenetic tree
"""
def search_trees(self, seed_taxid, target_species=None):
"""
Search for trees containing specified taxa.
Parameters:
- seed_taxid (int): Seed taxonomic ID
- target_species (list): Target species list
Returns:
list: Available trees matching criteria
"""
def get_tree_ages(self, phylome_id):
"""
Get age estimates for trees in phylome.
Parameters:
- phylome_id (int): PhylomeDB phylome identifier
Returns:
dict: Age estimates for phylome trees
"""from ete3 import Phyloxml, PhyloxmlTree
# Parse PhyloXML file
phyloxml_parser = Phyloxml()
tree = phyloxml_parser.build_from_file("example.phyloxml")
# Access PhyloXML-specific data
for node in tree.traverse():
if hasattr(node, 'confidence'):
print(f"Node confidence: {node.confidence}")
if hasattr(node, 'taxonomy'):
print(f"Taxonomy: {node.taxonomy}")
# Export with annotations
output_xml = phyloxml_parser.export("output.phyloxml")from ete3 import Nexml, NexmlTree
# Parse NeXML file
nexml_parser = Nexml()
tree = nexml_parser.build_from_file("data.nexml")
# Access NeXML metadata
if hasattr(tree, 'meta'):
print(f"Metadata: {tree.meta}")
# Work with character data
if hasattr(tree, 'characters'):
print(f"Character matrix: {tree.characters}")
# Export to NeXML
nexml_parser.export("output.nexml")from ete3 import Tree, Phyloxml, Nexml
# Load tree in Newick format
tree = Tree("(A:1,(B:1,C:1):0.5);")
# Convert to PhyloXML
phyloxml = Phyloxml()
tree.phyloxml = {"description": "Example tree"}
phyloxml_output = phyloxml.export()
# Convert to NeXML
nexml = Nexml()
tree.nexml = {"title": "Example phylogeny"}
nexml_output = nexml.export()
# Save in different formats
with open("tree.phyloxml", "w") as f:
f.write(phyloxml_output)
with open("tree.nexml", "w") as f:
f.write(nexml_output)from ete3 import detect_format, auto_parse
# Detect format automatically
file_format = detect_format("unknown_format_file.txt")
print(f"Detected format: {file_format}")
# Parse automatically based on format
data = auto_parse("phylogenetic_data.xml")
if isinstance(data, Tree):
print(f"Parsed tree with {len(data)} nodes")
elif isinstance(data, SeqGroup):
print(f"Parsed {len(data)} sequences")from ete3 import Tree, WebTreeApplication, TreeStyle
# Create tree and style
tree = Tree("(human:1,(chimp:0.5,bonobo:0.5):0.5);")
ts = TreeStyle()
ts.show_leaf_name = True
ts.show_branch_length = True
# Launch web application
webapp = WebTreeApplication(tree, name="Primate Tree")
webapp.set_tree_style(ts)
url = webapp.launch()
print(f"Tree visualization available at: {url}")from ete3 import PhylomeDB3Connector
# Connect to PhylomeDB
phylomedb = PhylomeDB3Connector()
# Search for trees
human_trees = phylomedb.search_trees(seed_taxid=9606) # Human
print(f"Found {len(human_trees)} trees with human sequences")
# Get best tree
best_tree = phylomedb.get_best_tree(seed_taxid=9606, target_taxid=9598) # Human-chimp
print(f"Best tree: {best_tree.get_ascii()}")
# Get age estimates
ages = phylomedb.get_tree_ages(phylome_id=1)
print(f"Age estimates: {ages}")from ete3 import Tree, SeqGroup, Phyloxml, NCBITaxa
# Multi-format phylogenetic workflow
def process_phylogenetic_data(tree_file, sequence_file=None):
# Auto-detect and parse tree
tree_format = detect_format(tree_file)
tree = auto_parse(tree_file)
# Load sequences if provided
if sequence_file:
seq_format = detect_format(sequence_file)
sequences = auto_parse(sequence_file)
# Link sequences to tree
if isinstance(tree, PhyloTree):
tree.link_to_alignment(sequences)
# Add taxonomic information
ncbi = NCBITaxa()
tree = ncbi.annotate_tree(tree)
# Export in multiple formats
results = {
'newick': tree.write(format=1),
'phyloxml': None,
'ascii': tree.get_ascii()
}
# Export to PhyloXML with annotations
if hasattr(tree, 'taxonomy'):
phyloxml = Phyloxml()
results['phyloxml'] = phyloxml.export()
return tree, results
# Process data
tree, outputs = process_phylogenetic_data("input.nw", "sequences.fasta")
# Save all formats
for format_name, content in outputs.items():
if content:
with open(f"output.{format_name}", "w") as f:
f.write(content)Install with Tessl CLI
npx tessl i tessl/pypi-ete3