CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-dendropy

A Python library for phylogenetics and phylogenetic computing: reading, writing, simulation, processing and manipulation of phylogenetic trees and characters.

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

character-data.mddocs/

Character Data & Evolution

Character matrices for molecular and morphological data, state alphabets, and evolutionary models. DendroPy supports DNA, RNA, protein, restriction sites, standard morphological, and continuous character data with comprehensive state alphabet management.

Capabilities

State Alphabets

Classes defining the possible character states for different data types.

class StateAlphabet:
    """
    Base class for character state alphabets.
    
    Parameters:
    - fundamental_states: Core states (excluding ambiguous states)
    - ambiguous_states: States representing multiple fundamental states
    """
    
    def __init__(self, fundamental_states=None, ambiguous_states=None): ...
    
    def __len__(self):
        """Number of fundamental states."""
    
    def __iter__(self):
        """Iterate over all states."""
    
    def __contains__(self, state):
        """Check if state is in alphabet."""
    
    def state_for_symbol(self, symbol):
        """Get state object for symbol."""
    
    def symbol_for_state(self, state):
        """Get symbol for state object."""
    
    def fundamental_states(self):
        """Iterator over fundamental states only."""
    
    def ambiguous_states(self):
        """Iterator over ambiguous states only."""

# Specialized alphabet classes
class DnaStateAlphabet(StateAlphabet):
    """DNA state alphabet with A, C, G, T and ambiguity codes."""

class RnaStateAlphabet(StateAlphabet):
    """RNA state alphabet with A, C, G, U and ambiguity codes."""

class NucleotideStateAlphabet(StateAlphabet):
    """General nucleotide alphabet (DNA or RNA)."""

class ProteinStateAlphabet(StateAlphabet):
    """Protein state alphabet with 20 amino acids and ambiguity codes."""

class BinaryStateAlphabet(StateAlphabet):
    """Binary state alphabet (0, 1)."""

class RestrictionSitesStateAlphabet(StateAlphabet):
    """Restriction sites alphabet (0=absent, 1=present)."""

class InfiniteSitesStateAlphabet(StateAlphabet):
    """Infinite sites alphabet for phylogenetic analysis."""

# Predefined alphabet instances
DNA_STATE_ALPHABET: DnaStateAlphabet
RNA_STATE_ALPHABET: RnaStateAlphabet  
NUCLEOTIDE_STATE_ALPHABET: NucleotideStateAlphabet
PROTEIN_STATE_ALPHABET: ProteinStateAlphabet
BINARY_STATE_ALPHABET: BinaryStateAlphabet
RESTRICTION_SITES_STATE_ALPHABET: RestrictionSitesStateAlphabet
INFINITE_SITES_STATE_ALPHABET: InfiniteSitesStateAlphabet

def new_standard_state_alphabet(symbols):
    """
    Create custom standard morphological state alphabet.
    
    Parameters:
    - symbols: String or list of state symbols
    
    Returns:
    StateAlphabet: Custom alphabet with specified symbols
    """

Character Sequences

Classes representing individual character sequences (rows in alignment matrices).

class CharacterDataSequence:
    """
    Base class for character data sequences.
    
    Parameters:
    - taxon: Associated Taxon object
    - values: Sequence of character states
    """
    
    def __init__(self, taxon=None, values=None): ...
    
    def __len__(self):
        """Length of sequence."""
    
    def __iter__(self):
        """Iterate over character states."""
    
    def __getitem__(self, index):
        """Get character state at position."""
    
    def __setitem__(self, index, value):
        """Set character state at position."""
    
    def append(self, value):
        """Append character state to sequence."""
    
    def extend(self, values):
        """Extend sequence with multiple states."""
    
    def symbols_as_string(self):
        """Return sequence as string of symbols."""

# Specific sequence types
class DnaCharacterDataSequence(CharacterDataSequence):
    """DNA character sequence with nucleotide states."""

class RnaCharacterDataSequence(CharacterDataSequence):
    """RNA character sequence with nucleotide states."""

class NucleotideCharacterDataSequence(CharacterDataSequence):
    """General nucleotide character sequence."""

class ProteinCharacterDataSequence(CharacterDataSequence):
    """Protein character sequence with amino acid states."""

class StandardCharacterDataSequence(CharacterDataSequence):
    """Standard morphological character sequence."""

class RestrictionSitesCharacterDataSequence(CharacterDataSequence):
    """Restriction sites character sequence."""

class InfiniteSitesCharacterDataSequence(CharacterDataSequence):
    """Infinite sites character sequence."""

class ContinuousCharacterDataSequence(CharacterDataSequence):
    """Continuous (quantitative) character sequence."""

Character Matrices

Classes representing character data matrices (alignments) with multiple sequences.

class CharacterMatrix:
    """
    Base class for character data matrices.
    
    Parameters:
    - taxon_namespace: TaxonNamespace for matrix taxa
    - default_state_alphabet: StateAlphabet for character states
    """
    
    def __init__(self, taxon_namespace=None, default_state_alphabet=None): ...
    
    @classmethod
    def get(cls, **kwargs):
        """Read character matrix from external source."""
    
    def read(self, **kwargs):
        """Read data from external source into existing matrix."""
    
    def write(self, **kwargs):
        """Write matrix to external destination."""
    
    # Matrix access and manipulation
    def __len__(self):
        """Number of sequences (taxa) in matrix."""
    
    def __iter__(self):
        """Iterate over taxon-sequence pairs."""
    
    def __getitem__(self, taxon):
        """Get sequence for specific taxon."""
    
    def __setitem__(self, taxon, sequence):
        """Set sequence for specific taxon."""
    
    def __contains__(self, taxon):
        """Check if taxon has sequence in matrix."""
    
    def __delitem__(self, taxon):
        """Remove taxon and its sequence from matrix."""
    
    def new_sequence(self, taxon, values=None):
        """Create new sequence for taxon."""
    
    def add_sequence(self, sequence):
        """Add existing sequence to matrix."""
    
    def remove_sequences(self, taxa):
        """Remove sequences for specified taxa."""
    
    def keep_chars(self, indices):
        """Keep only characters at specified indices."""
    
    def remove_chars(self, indices):
        """Remove characters at specified indices."""
    
    # Matrix properties
    def max_sequence_size(self):
        """Length of longest sequence in matrix."""
    
    def sequence_size_is_uniform(self):
        """Check if all sequences have same length."""
    
    def pack(self, pad_to_size=None):
        """Pad sequences to uniform length."""
    
    def concatenate(self, other_matrices):
        """Concatenate with other character matrices."""
    
    def export_character_indices(self, indices):
        """Export subset of characters as new matrix."""
    
    def export_character_subset(self, character_set):
        """Export character subset as new matrix."""

# Molecular sequence matrices
class DnaCharacterMatrix(CharacterMatrix):
    """DNA sequence alignment matrix."""
    
    def __init__(self, **kwargs): ...
    
    def nucleotide_frequencies(self):
        """Calculate nucleotide frequencies across matrix."""
    
    def gc_content(self):
        """Calculate GC content of matrix."""

class RnaCharacterMatrix(CharacterMatrix):
    """RNA sequence alignment matrix."""

class NucleotideCharacterMatrix(CharacterMatrix):
    """General nucleotide sequence matrix."""

class ProteinCharacterMatrix(CharacterMatrix):
    """Protein sequence alignment matrix."""
    
    def amino_acid_frequencies(self):
        """Calculate amino acid frequencies."""

# Morphological matrices  
class StandardCharacterMatrix(CharacterMatrix):
    """Standard morphological character matrix."""

class BinaryCharacterMatrix(CharacterMatrix):
    """Binary character matrix (0/1 states)."""

class RestrictionSitesCharacterMatrix(CharacterMatrix):
    """Restriction sites presence/absence matrix."""

class InfiniteSitesCharacterMatrix(CharacterMatrix):
    """Infinite sites character matrix."""

# Quantitative data
class ContinuousCharacterMatrix(CharacterMatrix):
    """Continuous (quantitative) character matrix."""
    
    def mean_vector(self):
        """Calculate mean values for each character."""
    
    def variance_vector(self):
        """Calculate variance for each character."""
    
    def covariance_matrix(self):
        """Calculate character covariance matrix."""

Character Evolution Models

Classes for modeling discrete character evolution along phylogenetic trees.

class DiscreteCharacterEvolutionModel:
    """
    General discrete character evolution model.
    
    Parameters:
    - state_alphabet: StateAlphabet defining possible states
    - stationary_freqs: Equilibrium state frequencies
    - rate_matrix: Instantaneous rate matrix
    """
    
    def __init__(self, state_alphabet=None, **kwargs): ...
    
    def p_matrix(self, edge_length):
        """Calculate transition probability matrix for given time."""
    
    def stationary_sample(self, rng=None):
        """Sample character state from equilibrium distribution."""

class Hky85(DiscreteCharacterEvolutionModel):
    """
    HKY85 nucleotide substitution model.
    
    Parameters:
    - kappa: Transition/transversion ratio
    - base_freqs: Equilibrium base frequencies [A, C, G, T]
    """
    
    def __init__(self, kappa=1.0, base_freqs=None): ...

class Jc69(DiscreteCharacterEvolutionModel):
    """
    Jukes-Cantor 69 nucleotide substitution model.
    
    All substitution rates equal, equal base frequencies.
    """
    
    def __init__(self): ...

class DiscreteCharacterEvolver:
    """
    Engine for evolving discrete characters on trees.
    
    Parameters:
    - seq_model: DiscreteCharacterEvolutionModel
    - seq_len: Length of sequences to simulate
    """
    
    def __init__(self, seq_model=None, seq_len=None): ...
    
    def evolve_states(self, tree, seq_len=None, rng=None):
        """
        Simulate character evolution on tree.
        
        Parameters:
        - tree: Tree for simulation
        - seq_len: Number of characters to simulate
        - rng: Random number generator
        
        Returns:
        CharacterMatrix: Simulated character data
        """

Character Simulation Functions

Functions for simulating character evolution under various models.

def simulate_discrete_char_dataset(tree, seq_len, **kwargs):
    """
    Simulate discrete character dataset on tree.
    
    Parameters:
    - tree: Tree for character simulation
    - seq_len: Number of characters to simulate
    - char_model: Character evolution model
    - mutation_rate: Overall mutation rate
    - rng: Random number generator
    
    Returns:
    CharacterMatrix: Simulated character data
    """

def simulate_discrete_chars(tree, char_model, seq_len, **kwargs):
    """
    Simulate discrete characters with specified model.
    
    Parameters:
    - tree: Phylogenetic tree
    - char_model: DiscreteCharacterEvolutionModel
    - seq_len: Sequence length
    - rng: Random number generator
    
    Returns:
    CharacterMatrix: Simulated alignment
    """

def hky85_chars(tree, seq_len, kappa=1.0, base_freqs=None, **kwargs):
    """
    Simulate DNA sequences under HKY85 model.
    
    Parameters:
    - tree: Phylogenetic tree with branch lengths
    - seq_len: Length of sequences to simulate
    - kappa: Transition/transversion ratio
    - base_freqs: Base frequencies [A, C, G, T]
    - mutation_rate: Mutation rate multiplier
    - rng: Random number generator
    
    Returns:
    DnaCharacterMatrix: Simulated DNA alignment
    """

def evolve_continuous_char(tree, char_matrix, **kwargs):
    """
    Evolve continuous characters using Brownian motion.
    
    Parameters:
    - tree: Phylogenetic tree
    - char_matrix: Initial continuous character values
    - rate: Rate of character evolution
    - rng: Random number generator
    
    Returns:
    ContinuousCharacterMatrix: Evolved character data
    """

Character Data Conversion

Functions for converting between character data types and formats.

def concatenate_matrices(matrices, taxon_namespace=None):
    """
    Concatenate multiple character matrices.
    
    Parameters:
    - matrices: List of CharacterMatrix objects
    - taxon_namespace: Target TaxonNamespace
    
    Returns:
    CharacterMatrix: Concatenated matrix
    """

def standardize_taxon_namespace(matrices, taxon_namespace=None):
    """
    Standardize taxon namespace across multiple matrices.
    
    Parameters:
    - matrices: List of CharacterMatrix objects  
    - taxon_namespace: Target TaxonNamespace
    
    Returns:
    None (modifies matrices in place)
    """

def convert_dna_to_protein(dna_matrix, genetic_code=None):
    """
    Translate DNA matrix to protein matrix.
    
    Parameters:
    - dna_matrix: DnaCharacterMatrix to translate
    - genetic_code: Genetic code for translation
    
    Returns:
    ProteinCharacterMatrix: Translated sequences
    """

Character Statistics

Functions for calculating statistics on character data.

def char_state_frequencies(char_matrix, gap_as_missing=True):
    """
    Calculate character state frequencies.
    
    Parameters:
    - char_matrix: CharacterMatrix to analyze
    - gap_as_missing: Treat gaps as missing data
    
    Returns:
    dict: State frequencies across matrix
    """

def pairwise_sequence_distances(char_matrix, distance_fn=None):
    """
    Calculate pairwise distances between sequences.
    
    Parameters:
    - char_matrix: CharacterMatrix for distance calculation
    - distance_fn: Distance function (default: p-distance)
    
    Returns:
    dict: Pairwise distance matrix
    """

def invariant_sites_proportion(char_matrix):
    """
    Calculate proportion of invariant sites.
    
    Parameters:
    - char_matrix: CharacterMatrix to analyze
    
    Returns:
    float: Proportion of sites with no variation
    """

def segregating_sites_count(char_matrix):
    """
    Count number of segregating (variable) sites.
    
    Parameters:
    - char_matrix: CharacterMatrix to analyze
    
    Returns:
    int: Number of variable sites
    """

Install with Tessl CLI

npx tessl i tessl/pypi-dendropy

docs

character-data.md

core-data-models.md

data-io.md

index.md

simulation.md

tree-analysis.md

visualization-interop.md

tile.json