tessl/pypi-dendropy

A Python library for phylogenetics and phylogenetic computing: reading, writing, simulation, processing and manipulation of phylogenetic trees and characters.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Data Input/Output

Name: tessl/pypi-dendropy
Author: tessl

Comprehensive I/O framework supporting all major phylogenetic file formats with configurable reading and writing options. DendroPy handles NEXUS, Newick, NeXML, FASTA, PHYLIP formats with automatic format detection and extensive customization options.

Capabilities

Universal I/O Methods

All DendroPy data classes (Tree, TreeList, CharacterMatrix, DataSet) support unified I/O methods for reading and writing data.

# Factory method for reading from external sources
@classmethod
def get(cls, **kwargs):
    """
    Factory method to create object by reading from external source.
    
    Parameters:
    - file: File object or file-like object
    - path: File path string
    - url: URL string  
    - data: Raw data string
    - schema: Format specification ('newick', 'nexus', 'nexml', 'fasta', 'phylip')
    - preserve_underscores: Keep underscores in taxon names (default: False)
    - suppress_internal_node_taxa: Ignore internal node labels as taxa (default: False)
    - rooting: How to handle rooting ('force-rooted', 'force-unrooted', 'default-rooted', 'default-unrooted')
    - taxon_namespace: TaxonNamespace to use for taxa
    - collection_offset: Skip first N items when reading multiple items
    - tree_offset: Skip first N trees (for tree sources)
    - ignore_unrecognized_keyword_arguments: Suppress warnings for unknown kwargs
    
    Returns:
    New object of appropriate type with data loaded
    """

def read(self, **kwargs):
    """
    Read data from external source into existing object.
    
    Same parameters as get() method, but loads into existing object
    rather than creating new one.
    """

def write(self, **kwargs):
    """
    Write object data to external destination.
    
    Parameters:
    - file: File object or file-like object for output
    - path: File path string for output
    - schema: Output format ('newick', 'nexus', 'nexml', 'fasta', 'phylip')
    - suppress_leaf_taxon_labels: Don't write leaf taxon names (default: False)
    - suppress_internal_taxon_labels: Don't write internal taxon names (default: False) 
    - suppress_rooting: Don't write rooting information (default: False)
    - suppress_edge_lengths: Don't write branch lengths (default: False)
    - unquoted_underscores: Don't quote underscores in names (default: False)
    - preserve_spaces: Keep spaces in taxon names (default: False)
    - store_tree_weights: Include tree weights in output (default: False)
    - suppress_annotations: Don't write annotations (default: True)
    - annotations_as_nhx: Write annotations in NHX format (default: False)
    - suppress_item_comments: Don't write item comments
    - ignore_unrecognized_keyword_arguments: Suppress warnings for unknown kwargs
    """

def write_to_stream(self, dest, schema, **kwargs):
    """Write data to stream in specified format."""

Format-Specific Reading

DendroPy supports reading from multiple phylogenetic file formats with format-specific options.

# Newick format options (for trees)
Tree.get(path="tree.nwk", schema="newick", 
         rooting="default-unrooted",
         preserve_underscores=True)

# NEXUS format options (for trees and character data)  
TreeList.get(path="trees.nex", schema="nexus",
             preserve_underscores=False,
             suppress_internal_node_taxa=True)

# NeXML format options
DataSet.get(path="data.xml", schema="nexml")

# FASTA format options (for character matrices)
DnaCharacterMatrix.get(path="seqs.fasta", schema="fasta",
                       data_type="dna")

# PHYLIP format options
ProteinCharacterMatrix.get(path="alignment.phy", schema="phylip",
                           multispace_delimiter=True,
                           interleaved=False)

Format-Specific Writing

Write data in various phylogenetic formats with extensive customization options.

# Newick output with options
tree.write(path="output.nwk", schema="newick",
           suppress_edge_lengths=False,
           suppress_leaf_taxon_labels=False,
           unquoted_underscores=True)

# NEXUS output with metadata
trees.write(path="output.nex", schema="nexus", 
            suppress_rooting=False,
            store_tree_weights=True,
            suppress_annotations=False)

# NeXML structured output
dataset.write(path="output.xml", schema="nexml")

# FASTA sequence output
char_matrix.write(path="output.fasta", schema="fasta",
                  wrap_width=70)

# PHYLIP alignment output  
char_matrix.write(path="output.phy", schema="phylip",
                  force_unique_taxon_labels=True,
                  spaces_to_underscores=True)

I/O Factory Functions

Factory functions for creating format-specific readers, writers, and tree yielders.

def get_reader(schema, **kwargs):
    """
    Get reader instance for specified format.
    
    Parameters:
    - schema: Format name ('newick', 'nexus', 'nexml', 'fasta', 'phylip')
    - **kwargs: Format-specific options
    
    Returns:
    Reader object for specified format
    """

def get_writer(schema, **kwargs):
    """
    Get writer instance for specified format.
    
    Parameters:
    - schema: Format name ('newick', 'nexus', 'nexml', 'fasta', 'phylip')
    - **kwargs: Format-specific options
    
    Returns:
    Writer object for specified format
    """

def get_tree_yielder(files, schema, **kwargs):
    """
    Get iterator for reading trees from multiple files.
    
    Parameters:
    - files: List of file paths or file objects
    - schema: Format specification
    - **kwargs: Format-specific options
    
    Returns:
    Iterator yielding Tree objects
    """

Streaming Tree I/O

For large tree collections, DendroPy provides memory-efficient streaming iterators.

# Stream trees from single file
for tree in Tree.yield_from_files([path], schema="nexus"):
    # Process one tree at a time without loading all into memory
    print(f"Tree has {len(tree.leaf_nodes())} leaves")

# Stream trees from multiple files
tree_files = ["trees1.nex", "trees2.nex", "trees3.nex"]
for tree in Tree.yield_from_files(tree_files, schema="nexus"):
    # Process trees from all files sequentially
    analyze_tree(tree)

# Tree yielder with filtering
def large_tree_filter(tree):
    return len(tree.leaf_nodes()) > 100

for tree in Tree.yield_from_files([path], schema="newick", 
                                  tree_filter=large_tree_filter):
    # Only process trees with >100 leaves
    process_large_tree(tree)

Character Matrix I/O

Specialized I/O methods for different types of character data with format-specific options.

# DNA sequence matrices
dna_matrix = DnaCharacterMatrix.get(
    path="alignment.fasta", 
    schema="fasta",
    data_type="dna"
)

# Protein sequence matrices  
protein_matrix = ProteinCharacterMatrix.get(
    path="proteins.fasta",
    schema="fasta", 
    data_type="protein"
)

# Standard morphological matrices
morpho_matrix = StandardCharacterMatrix.get(
    path="morphology.nex",
    schema="nexus",
    default_state_alphabet=BINARY_STATE_ALPHABET
)

# Continuous character matrices
continuous_matrix = ContinuousCharacterMatrix.get(
    path="measurements.nex", 
    schema="nexus"
)

# Writing character matrices with format options
dna_matrix.write(
    path="output.phy",
    schema="phylip",
    strict=True,  # Strict PHYLIP format
    spaces_to_underscores=True,
    force_unique_taxon_labels=True
)

Multi-Format Dataset I/O

DataSet objects can read and write files containing multiple data types.

# Read mixed data (trees + character matrices)
dataset = DataSet.get(path="combined.nex", schema="nexus")

# Access different data types
for tree_list in dataset.tree_lists:
    print(f"Tree list has {len(tree_list)} trees")

for char_matrix in dataset.char_matrices:
    print(f"Character matrix: {type(char_matrix).__name__}")
    print(f"  {len(char_matrix)} taxa, {char_matrix.max_sequence_size} characters")

# Write entire dataset
dataset.write(path="complete_dataset.xml", schema="nexml")

Format Support Details

# Supported input/output schemas
SUPPORTED_SCHEMAS = [
    "newick",           # Newick tree format  
    "nexus",            # NEXUS format (trees + character data)
    "nexml",            # NeXML format (XML-based)
    "fasta",            # FASTA sequence format
    "phylip",           # PHYLIP format variants
    "phylip-relaxed",   # Relaxed PHYLIP format
    "fasta-relaxed",    # FASTA with relaxed parsing
]

# Character data types
CHARACTER_DATA_TYPES = [
    "dna",              # DNA sequences
    "rna",              # RNA sequences  
    "protein",          # Protein sequences
    "nucleotide",       # General nucleotide
    "standard",         # Standard morphological
    "continuous",       # Continuous characters
    "restriction",      # Restriction sites
    "infinite-sites",   # Infinite sites
]

Reader and Writer Classes

# Format-specific reader classes
class NewickReader:
    """Reader for Newick format trees."""
    def __init__(self, **kwargs): ...
    def read(self, stream): ...

class NexusReader:
    """Reader for NEXUS format files."""
    def __init__(self, **kwargs): ...
    def read(self, stream): ...

class NexmlReader:
    """Reader for NeXML format files."""
    def __init__(self, **kwargs): ...
    def read(self, stream): ...

class FastaReader:
    """Reader for FASTA sequence files."""
    def __init__(self, **kwargs): ...
    def read(self, stream): ...

class PhylipReader:
    """Reader for PHYLIP format files."""
    def __init__(self, **kwargs): ...
    def read(self, stream): ...

# Format-specific writer classes
class NewickWriter:
    """Writer for Newick format trees."""
    def __init__(self, **kwargs): ...
    def write(self, obj, stream): ...

class NexusWriter:
    """Writer for NEXUS format files."""
    def __init__(self, **kwargs): ...
    def write(self, obj, stream): ...

class NexmlWriter:
    """Writer for NeXML format files."""  
    def __init__(self, **kwargs): ...
    def write(self, obj, stream): ...

class FastaWriter:
    """Writer for FASTA sequence files."""
    def __init__(self, **kwargs): ...
    def write(self, obj, stream): ...

class PhylipWriter:
    """Writer for PHYLIP format files."""
    def __init__(self, **kwargs): ...
    def write(self, obj, stream): ...

Error Handling

# I/O related exceptions
class DataParseError(Exception):
    """Raised when data cannot be parsed in expected format."""

class UnsupportedSchemaError(Exception):
    """Raised when unsupported file format is specified."""

class UnspecifiedSchemaError(Exception):
    """Raised when file format is not specified and cannot be auto-detected."""

class UnspecifiedSourceError(Exception):
    """Raised when no data source is provided."""

Install with Tessl CLI