An infrastructure Python package of the AlphaX ecosystem for MS proteomics
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Complete fragment ion series generation with support for multiple fragment types, neutral losses, and charge states. Enables creation of theoretical spectra for spectral library construction, peptide identification, and mass spectrometry data analysis workflows.
Core classes and constants that define the available fragment ion types and their properties.
class Direction:
"""Fragment direction constants."""
FORWARD: str = "forward" # N-terminal fragments (a, b, c)
REVERSE: str = "reverse" # C-terminal fragments (x, y, z)
class Loss:
"""Fragment loss type constants."""
MODLOSS: int = 0 # Modification loss
H2O: int = 1 # Water loss (-18.01056 Da)
NH3: int = 2 # Ammonia loss (-17.02655 Da)
MODLOSS_H2O: int = 3 # Modification + water loss
MODLOSS_NH3: int = 4 # Modification + ammonia loss
class Series:
"""Fragment series constants."""
A: int = 0 # a-ions (N-terminal, -CO)
B: int = 1 # b-ions (N-terminal)
C: int = 2 # c-ions (N-terminal, +NH3)
X: int = 3 # x-ions (C-terminal, +CO)
Y: int = 4 # y-ions (C-terminal)
Z: int = 5 # z-ions (C-terminal, -NH3)
class FragmentType:
"""Dataclass defining complete fragment type."""
series: int # Fragment series (A, B, C, X, Y, Z)
loss: int # Loss type (MODLOSS, H2O, NH3, etc.)
direction: int # Direction (FORWARD, REVERSE)
charge: int # Fragment charge state
# Available fragment types
FRAGMENT_TYPES: dict # Dictionary of all fragment type definitions
DIRECTION_MAPPING: dict = {"forward": 'forward', "reverse": 'reverse'}
LOSS_MAPPING: dict = {0: 'noloss', 1: 'H2O', 2: 'NH3', 3: 'modloss_H2O', 4: 'modloss_NH3'}
SERIES_MAPPING: dict = {0: 'a', 1: 'b', 2: 'c', 3: 'x', 4: 'y', 5: 'z'}Functions for creating, parsing, and validating fragment type combinations with charge states.
def get_charged_frag_types(frag_types: List[str], charges: List[int]) -> List[str]:
"""
Generate charged fragment type combinations.
Parameters:
- frag_types: List of fragment types like ['b', 'y', 'b-H2O']
- charges: List of charge states like [1, 2, 3]
Returns:
List of charged fragment types like ['b+', 'b++', 'y+', 'y++', 'b-H2O+']
"""
def sort_charged_frag_types(frag_types: List[str]) -> List[str]:
"""
Sort fragment types by loss/no-loss categories.
Parameters:
- frag_types: List of charged fragment types
Returns:
Sorted list with no-loss fragments first, then losses
"""
def filter_valid_charged_frag_types(frag_types: List[str]) -> List[str]:
"""
Validate and filter fragment type list.
Parameters:
- frag_types: List of fragment type strings
Returns:
Filtered list with only valid fragment types
"""
def parse_charged_frag_type(frag_type: str) -> tuple[str, int]:
"""
Parse fragment type string and extract charge.
Parameters:
- frag_type: Fragment type like 'b++' or 'y-H2O+'
Returns:
Tuple of (base_type, charge) like ('b', 2) or ('y-H2O', 1)
"""
def sort_charged_frag_types(frag_types: List[str]) -> List[str]:
"""
Sort fragment types by loss/no-loss categories.
Parameters:
- frag_types: List of charged fragment types
Returns:
Sorted list with no-loss fragments first, then losses
"""
def filter_valid_charged_frag_types(frag_types: List[str]) -> List[str]:
"""
Validate and filter fragment type list.
Parameters:
- frag_types: List of fragment type strings
Returns:
Filtered list with only valid fragment types
"""Functions for creating and initializing fragment DataFrames with proper structure and indexing.
def init_zero_fragment_dataframe(precursor_df: pd.DataFrame,
frag_types: List[str]) -> pd.DataFrame:
"""
Initialize empty fragment DataFrame with zero intensities.
Parameters:
- precursor_df: Precursor DataFrame with sequence and charge info
- frag_types: List of fragment types to include
Returns:
DataFrame with fragment structure and zero intensities
"""
def init_fragment_dataframe_from_other(template_df: pd.DataFrame,
frag_types: List[str]) -> pd.DataFrame:
"""
Initialize fragment DataFrame from reference template.
Parameters:
- template_df: Template DataFrame with proper structure
- frag_types: List of fragment types
Returns:
New DataFrame with same structure but specified fragment types
"""
def init_fragment_by_precursor_dataframe(precursor_df: pd.DataFrame,
frag_types: List[str],
max_frag_charge: int = 2) -> pd.DataFrame:
"""
Initialize fragment DataFrame for precursor list.
Parameters:
- precursor_df: Precursor DataFrame
- frag_types: Fragment types to generate
- max_frag_charge: Maximum fragment charge to consider
Returns:
Complete fragment DataFrame with m/z calculations
"""
def create_fragment_mz_dataframe(precursor_df: pd.DataFrame,
frag_types: List[str],
max_frag_charge: int = 2) -> pd.DataFrame:
"""
Generate fragment m/z values for spectral library.
Parameters:
- precursor_df: Precursor DataFrame with sequences and modifications
- frag_types: List of fragment types like ['b+', 'y+', 'b++', 'y++']
- max_frag_charge: Maximum fragment charge state
Returns:
DataFrame with fragment m/z values and metadata
"""Functions for processing, filtering, and optimizing fragment DataFrames for spectral libraries.
def flatten_fragments(fragment_df: pd.DataFrame) -> pd.DataFrame:
"""
Convert tabular fragment data to linear format.
Parameters:
- fragment_df: Fragment DataFrame in tabular format
Returns:
Flattened DataFrame with one row per fragment
"""
def remove_unused_fragments(fragment_mz_df: pd.DataFrame,
fragment_intensity_df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
"""
Compress fragment libraries by removing unused entries.
Parameters:
- fragment_mz_df: Fragment m/z DataFrame
- fragment_intensity_df: Fragment intensity DataFrame
Returns:
Tuple of (compressed_mz_df, compressed_intensity_df)
"""
def calc_fragment_count(fragment_df: pd.DataFrame) -> pd.Series:
"""
Count fragments per precursor.
Parameters:
- fragment_df: Fragment DataFrame
Returns:
Series with fragment counts indexed by precursor
"""
def filter_fragment_number(fragment_mz_df: pd.DataFrame,
fragment_intensity_df: pd.DataFrame,
top_k: int = 100) -> tuple[pd.DataFrame, pd.DataFrame]:
"""
Filter top-k fragments per precursor by intensity.
Parameters:
- fragment_mz_df: Fragment m/z DataFrame
- fragment_intensity_df: Fragment intensity DataFrame
- top_k: Number of top fragments to keep per precursor
Returns:
Tuple of (filtered_mz_df, filtered_intensity_df)
"""
def calc_fragment_cardinality(fragment_df: pd.DataFrame,
group_by: str = 'proteins') -> pd.DataFrame:
"""
Calculate fragment sharing statistics across groups.
Parameters:
- fragment_df: Fragment DataFrame
- group_by: Column to group by for cardinality calculation
Returns:
DataFrame with fragment sharing statistics
"""Direct mass calculation functions for peptide fragments and b/y ion series.
def calc_b_y_and_peptide_mass(sequences: List[str],
mod_masses: np.ndarray = None) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Calculate b/y fragment ions and peptide mass simultaneously.
Parameters:
- sequences: List of peptide sequences
- mod_masses: Optional modification masses array
Returns:
Tuple of (b_masses, y_masses, peptide_masses)
"""
def calc_b_y_and_peptide_masses_for_same_len_seqs(sequences: List[str],
mod_masses: np.ndarray = None) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
"""
Batch b/y ion and peptide mass calculation for equal-length sequences.
Parameters:
- sequences: List of equal-length peptide sequences
- mod_masses: Optional modification masses array
Returns:
Tuple of (b_masses, y_masses, peptide_masses) with optimized layout
"""
def calc_peptide_masses_for_same_len_seqs(sequences: List[str],
mod_masses: np.ndarray = None) -> np.ndarray:
"""
Calculate peptide masses for equal-length sequences.
Parameters:
- sequences: List of equal-length peptide sequences
- mod_masses: Optional modification masses array
Returns:
1D numpy array with peptide masses
"""
def calc_diff_modification_mass(mod_sequences: List[str]) -> np.ndarray:
"""
Calculate mass differences for open search workflows.
Parameters:
- mod_sequences: List of modified sequences
Returns:
Array with mass differences from unmodified peptides
"""from alphabase.peptide.fragment import get_charged_frag_types, create_fragment_mz_dataframe
import pandas as pd
# Define fragment types and charges
base_types = ['b', 'y', 'b-H2O', 'y-H2O']
charges = [1, 2]
frag_types = get_charged_frag_types(base_types, charges)
print(f"Fragment types: {frag_types}")
# Output: ['b+', 'b++', 'y+', 'y++', 'b-H2O+', 'b-H2O++', 'y-H2O+', 'y-H2O++']
# Create precursor DataFrame
precursor_df = pd.DataFrame({
'sequence': ['PEPTIDE', 'SEQUENCE', 'EXAMPLE'],
'mods': ['', 'Phospho (STY)@2', ''],
'charge': [2, 3, 2],
'proteins': ['P12345', 'P67890', 'P11111']
})
# Generate fragment m/z values
fragment_mz_df = create_fragment_mz_dataframe(
precursor_df=precursor_df,
frag_types=frag_types,
max_frag_charge=2
)
print(f"Generated {len(fragment_mz_df)} fragment entries")from alphabase.peptide.fragment import FragmentType, Series, Loss, Direction
# Create specific fragment type
frag_type = FragmentType(
series=Series.B, # b-ion
loss=Loss.H2O, # with water loss
direction=Direction.FORWARD, # N-terminal
charge=2 # doubly charged
)
print(f"Fragment: {frag_type}")
# Parse fragment type string
from alphabase.peptide.fragment import parse_charged_frag_type
base_type, charge = parse_charged_frag_type('y-NH3++')
print(f"Base type: {base_type}, Charge: {charge}")
# Output: Base type: y-NH3, Charge: 2from alphabase.peptide.fragment import calc_b_y_and_peptide_mass
import numpy as np
# Calculate b/y ions and peptide masses
sequences = ['PEPTIDE', 'SEQUENCE']
b_masses, y_masses, peptide_masses = calc_b_y_and_peptide_mass(sequences)
print(f"B-ion masses shape: {b_masses.shape}")
print(f"Y-ion masses shape: {y_masses.shape}")
print(f"Peptide masses: {peptide_masses}")from alphabase.peptide.fragment import filter_fragment_number, remove_unused_fragments
# Assume we have fragment DataFrames
# fragment_mz_df and fragment_intensity_df from previous steps
# Keep only top 50 fragments per precursor
top_mz_df, top_intensity_df = filter_fragment_number(
fragment_mz_df, fragment_intensity_df, top_k=50
)
# Remove unused fragment entries
compressed_mz_df, compressed_intensity_df = remove_unused_fragments(
top_mz_df, top_intensity_df
)
print(f"Original fragments: {len(fragment_mz_df)}")
print(f"After filtering: {len(top_mz_df)}")
print(f"After compression: {len(compressed_mz_df)}")from alphabase.peptide.fragment import calc_fragment_cardinality, calc_fragment_count
# Calculate fragment counts per precursor
frag_counts = calc_fragment_count(fragment_mz_df)
print(f"Average fragments per precursor: {frag_counts.mean():.1f}")
# Analyze fragment sharing across proteins
cardinality_df = calc_fragment_cardinality(
fragment_mz_df, group_by='proteins'
)
print(f"Fragment cardinality analysis:\n{cardinality_df.head()}")Install with Tessl CLI
npx tessl i tessl/pypi-alphabase