tessl/pypi-alphabase

An infrastructure Python package of the AlphaX ecosystem for MS proteomics

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Fragment Ion Generation

Name: tessl/pypi-alphabase
Author: tessl

Complete fragment ion series generation with support for multiple fragment types, neutral losses, and charge states. Enables creation of theoretical spectra for spectral library construction, peptide identification, and mass spectrometry data analysis workflows.

Capabilities

Fragment Type Definitions

Core classes and constants that define the available fragment ion types and their properties.

class Direction:
    """Fragment direction constants."""
    FORWARD: str = "forward"  # N-terminal fragments (a, b, c)
    REVERSE: str = "reverse"  # C-terminal fragments (x, y, z)

class Loss:
    """Fragment loss type constants."""
    MODLOSS: int = 0      # Modification loss
    H2O: int = 1          # Water loss (-18.01056 Da)
    NH3: int = 2          # Ammonia loss (-17.02655 Da)
    MODLOSS_H2O: int = 3  # Modification + water loss
    MODLOSS_NH3: int = 4  # Modification + ammonia loss

class Series:
    """Fragment series constants."""
    A: int = 0  # a-ions (N-terminal, -CO)
    B: int = 1  # b-ions (N-terminal)
    C: int = 2  # c-ions (N-terminal, +NH3)
    X: int = 3  # x-ions (C-terminal, +CO)
    Y: int = 4  # y-ions (C-terminal)
    Z: int = 5  # z-ions (C-terminal, -NH3)

class FragmentType:
    """Dataclass defining complete fragment type."""
    series: int     # Fragment series (A, B, C, X, Y, Z)
    loss: int       # Loss type (MODLOSS, H2O, NH3, etc.)
    direction: int  # Direction (FORWARD, REVERSE)
    charge: int     # Fragment charge state

# Available fragment types
FRAGMENT_TYPES: dict  # Dictionary of all fragment type definitions
DIRECTION_MAPPING: dict = {"forward": 'forward', "reverse": 'reverse'}
LOSS_MAPPING: dict = {0: 'noloss', 1: 'H2O', 2: 'NH3', 3: 'modloss_H2O', 4: 'modloss_NH3'}
SERIES_MAPPING: dict = {0: 'a', 1: 'b', 2: 'c', 3: 'x', 4: 'y', 5: 'z'}

Fragment Type Generation and Validation

Functions for creating, parsing, and validating fragment type combinations with charge states.

def get_charged_frag_types(frag_types: List[str], charges: List[int]) -> List[str]:
    """
    Generate charged fragment type combinations.
    
    Parameters:
    - frag_types: List of fragment types like ['b', 'y', 'b-H2O']
    - charges: List of charge states like [1, 2, 3]
    
    Returns:
    List of charged fragment types like ['b+', 'b++', 'y+', 'y++', 'b-H2O+']
    """

def sort_charged_frag_types(frag_types: List[str]) -> List[str]:
    """
    Sort fragment types by loss/no-loss categories.
    
    Parameters:
    - frag_types: List of charged fragment types
    
    Returns:
    Sorted list with no-loss fragments first, then losses
    """

def filter_valid_charged_frag_types(frag_types: List[str]) -> List[str]:
    """
    Validate and filter fragment type list.
    
    Parameters:
    - frag_types: List of fragment type strings
    
    Returns:
    Filtered list with only valid fragment types
    """

def parse_charged_frag_type(frag_type: str) -> tuple[str, int]:
    """
    Parse fragment type string and extract charge.
    
    Parameters:
    - frag_type: Fragment type like 'b++' or 'y-H2O+'
    
    Returns:
    Tuple of (base_type, charge) like ('b', 2) or ('y-H2O', 1)
    """

def sort_charged_frag_types(frag_types: List[str]) -> List[str]:
    """
    Sort fragment types by loss/no-loss categories.
    
    Parameters:
    - frag_types: List of charged fragment types
    
    Returns:
    Sorted list with no-loss fragments first, then losses
    """

def filter_valid_charged_frag_types(frag_types: List[str]) -> List[str]:
    """
    Validate and filter fragment type list.
    
    Parameters:
    - frag_types: List of fragment type strings
    
    Returns:
    Filtered list with only valid fragment types
    """

Fragment DataFrame Creation

Functions for creating and initializing fragment DataFrames with proper structure and indexing.

def init_zero_fragment_dataframe(precursor_df: pd.DataFrame,
                                frag_types: List[str]) -> pd.DataFrame:
    """
    Initialize empty fragment DataFrame with zero intensities.
    
    Parameters:
    - precursor_df: Precursor DataFrame with sequence and charge info
    - frag_types: List of fragment types to include
    
    Returns:
    DataFrame with fragment structure and zero intensities
    """

def init_fragment_dataframe_from_other(template_df: pd.DataFrame,
                                      frag_types: List[str]) -> pd.DataFrame:
    """
    Initialize fragment DataFrame from reference template.
    
    Parameters:
    - template_df: Template DataFrame with proper structure
    - frag_types: List of fragment types
    
    Returns:
    New DataFrame with same structure but specified fragment types
    """

def init_fragment_by_precursor_dataframe(precursor_df: pd.DataFrame,
                                        frag_types: List[str],
                                        max_frag_charge: int = 2) -> pd.DataFrame:
    """
    Initialize fragment DataFrame for precursor list.
    
    Parameters:
    - precursor_df: Precursor DataFrame
    - frag_types: Fragment types to generate
    - max_frag_charge: Maximum fragment charge to consider
    
    Returns:
    Complete fragment DataFrame with m/z calculations
    """

def create_fragment_mz_dataframe(precursor_df: pd.DataFrame,
                                frag_types: List[str],
                                max_frag_charge: int = 2) -> pd.DataFrame:
    """
    Generate fragment m/z values for spectral library.
    
    Parameters:
    - precursor_df: Precursor DataFrame with sequences and modifications
    - frag_types: List of fragment types like ['b+', 'y+', 'b++', 'y++']
    - max_frag_charge: Maximum fragment charge state
    
    Returns:
    DataFrame with fragment m/z values and metadata
    """

Fragment Processing and Optimization

Functions for processing, filtering, and optimizing fragment DataFrames for spectral libraries.

def flatten_fragments(fragment_df: pd.DataFrame) -> pd.DataFrame:
    """
    Convert tabular fragment data to linear format.
    
    Parameters:
    - fragment_df: Fragment DataFrame in tabular format
    
    Returns:
    Flattened DataFrame with one row per fragment
    """

def remove_unused_fragments(fragment_mz_df: pd.DataFrame,
                           fragment_intensity_df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
    """
    Compress fragment libraries by removing unused entries.
    
    Parameters:
    - fragment_mz_df: Fragment m/z DataFrame
    - fragment_intensity_df: Fragment intensity DataFrame
    
    Returns:
    Tuple of (compressed_mz_df, compressed_intensity_df)
    """

def calc_fragment_count(fragment_df: pd.DataFrame) -> pd.Series:
    """
    Count fragments per precursor.
    
    Parameters:
    - fragment_df: Fragment DataFrame
    
    Returns:
    Series with fragment counts indexed by precursor
    """

def filter_fragment_number(fragment_mz_df: pd.DataFrame,
                          fragment_intensity_df: pd.DataFrame,
                          top_k: int = 100) -> tuple[pd.DataFrame, pd.DataFrame]:
    """
    Filter top-k fragments per precursor by intensity.
    
    Parameters:
    - fragment_mz_df: Fragment m/z DataFrame
    - fragment_intensity_df: Fragment intensity DataFrame
    - top_k: Number of top fragments to keep per precursor
    
    Returns:
    Tuple of (filtered_mz_df, filtered_intensity_df)
    """

def calc_fragment_cardinality(fragment_df: pd.DataFrame,
                             group_by: str = 'proteins') -> pd.DataFrame:
    """
    Calculate fragment sharing statistics across groups.
    
    Parameters:
    - fragment_df: Fragment DataFrame
    - group_by: Column to group by for cardinality calculation
    
    Returns:
    DataFrame with fragment sharing statistics
    """

Mass Calculations

Direct mass calculation functions for peptide fragments and b/y ion series.

def calc_b_y_and_peptide_mass(sequences: List[str],
                             mod_masses: np.ndarray = None) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Calculate b/y fragment ions and peptide mass simultaneously.
    
    Parameters:
    - sequences: List of peptide sequences
    - mod_masses: Optional modification masses array
    
    Returns:
    Tuple of (b_masses, y_masses, peptide_masses)
    """

def calc_b_y_and_peptide_masses_for_same_len_seqs(sequences: List[str],
                                                 mod_masses: np.ndarray = None) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Batch b/y ion and peptide mass calculation for equal-length sequences.
    
    Parameters:
    - sequences: List of equal-length peptide sequences
    - mod_masses: Optional modification masses array
    
    Returns:
    Tuple of (b_masses, y_masses, peptide_masses) with optimized layout
    """

def calc_peptide_masses_for_same_len_seqs(sequences: List[str],
                                         mod_masses: np.ndarray = None) -> np.ndarray:
    """
    Calculate peptide masses for equal-length sequences.
    
    Parameters:
    - sequences: List of equal-length peptide sequences
    - mod_masses: Optional modification masses array
    
    Returns:
    1D numpy array with peptide masses
    """

def calc_diff_modification_mass(mod_sequences: List[str]) -> np.ndarray:
    """
    Calculate mass differences for open search workflows.
    
    Parameters:
    - mod_sequences: List of modified sequences
    
    Returns:
    Array with mass differences from unmodified peptides
    """

Usage Examples

Basic Fragment Generation

from alphabase.peptide.fragment import get_charged_frag_types, create_fragment_mz_dataframe
import pandas as pd

# Define fragment types and charges
base_types = ['b', 'y', 'b-H2O', 'y-H2O']
charges = [1, 2]
frag_types = get_charged_frag_types(base_types, charges)
print(f"Fragment types: {frag_types}")
# Output: ['b+', 'b++', 'y+', 'y++', 'b-H2O+', 'b-H2O++', 'y-H2O+', 'y-H2O++']

# Create precursor DataFrame
precursor_df = pd.DataFrame({
    'sequence': ['PEPTIDE', 'SEQUENCE', 'EXAMPLE'],
    'mods': ['', 'Phospho (STY)@2', ''],
    'charge': [2, 3, 2],
    'proteins': ['P12345', 'P67890', 'P11111']
})

# Generate fragment m/z values
fragment_mz_df = create_fragment_mz_dataframe(
    precursor_df=precursor_df,
    frag_types=frag_types,
    max_frag_charge=2
)

print(f"Generated {len(fragment_mz_df)} fragment entries")

Working with Fragment Types

from alphabase.peptide.fragment import FragmentType, Series, Loss, Direction

# Create specific fragment type
frag_type = FragmentType(
    series=Series.B,      # b-ion
    loss=Loss.H2O,        # with water loss
    direction=Direction.FORWARD,  # N-terminal
    charge=2              # doubly charged
)

print(f"Fragment: {frag_type}")

# Parse fragment type string
from alphabase.peptide.fragment import parse_charged_frag_type

base_type, charge = parse_charged_frag_type('y-NH3++')
print(f"Base type: {base_type}, Charge: {charge}")
# Output: Base type: y-NH3, Charge: 2

Mass Calculations

from alphabase.peptide.fragment import calc_b_y_and_peptide_mass
import numpy as np

# Calculate b/y ions and peptide masses
sequences = ['PEPTIDE', 'SEQUENCE']
b_masses, y_masses, peptide_masses = calc_b_y_and_peptide_mass(sequences)

print(f"B-ion masses shape: {b_masses.shape}")
print(f"Y-ion masses shape: {y_masses.shape}")
print(f"Peptide masses: {peptide_masses}")

Fragment Library Processing

from alphabase.peptide.fragment import filter_fragment_number, remove_unused_fragments

# Assume we have fragment DataFrames
# fragment_mz_df and fragment_intensity_df from previous steps

# Keep only top 50 fragments per precursor
top_mz_df, top_intensity_df = filter_fragment_number(
    fragment_mz_df, fragment_intensity_df, top_k=50
)

# Remove unused fragment entries
compressed_mz_df, compressed_intensity_df = remove_unused_fragments(
    top_mz_df, top_intensity_df
)

print(f"Original fragments: {len(fragment_mz_df)}")
print(f"After filtering: {len(top_mz_df)}")
print(f"After compression: {len(compressed_mz_df)}")

Advanced Fragment Analysis

from alphabase.peptide.fragment import calc_fragment_cardinality, calc_fragment_count

# Calculate fragment counts per precursor
frag_counts = calc_fragment_count(fragment_mz_df)
print(f"Average fragments per precursor: {frag_counts.mean():.1f}")

# Analyze fragment sharing across proteins
cardinality_df = calc_fragment_cardinality(
    fragment_mz_df, group_by='proteins'
)
print(f"Fragment cardinality analysis:\n{cardinality_df.head()}")

Install with Tessl CLI