An infrastructure Python package of the AlphaX ecosystem for MS proteomics
npx @tessl/cli install tessl/pypi-alphabase@1.6.0An infrastructure Python package for the AlphaX ecosystem that provides essential functionalities for mass spectrometry (MS) proteomics. AlphaBase serves as the foundational library for peptide and protein analysis, spectral library management, PSM (Peptide-Spectrum Match) reading, quantification workflows, and data processing utilities across multiple MS data formats.
pip install alphabaseimport alphabaseCommon patterns for working with specific modules:
# Chemical constants and calculations
from alphabase.constants.aa import AA_ASCII_MASS, calc_AA_masses
from alphabase.constants.atom import MASS_PROTON, calc_mass_from_formula
from alphabase.constants.modification import MOD_DF, add_new_modifications
# Fragment and precursor calculations
from alphabase.peptide.fragment import get_charged_frag_types, create_fragment_mz_dataframe
from alphabase.peptide.precursor import update_precursor_mz, calc_precursor_isotope_info
from alphabase.peptide.mobility import ccs_to_mobility_for_df, mobility_to_ccs_for_df
# Spectral library operations
from alphabase.spectral_library.base import SpecLibBase
from alphabase.spectral_library.decoy import SpecLibDecoy, DIANNDecoyGenerator
from alphabase.spectral_library.flat import SpecLibFlat
# PSM reading from various search engines
from alphabase.psm_reader import MaxQuantReader, DiannReader, SpectronautReader
# Quantification data processing
from alphabase.quantification.quant_reader.quant_reader_manager import import_data
from alphabase.quantification.quant_reader.longformat_reader import LongFormatReader
# SMILES and cheminformatics
from alphabase.smiles.peptide import PeptideSmilesEncoder
from alphabase.smiles.smiles import AminoAcidModifier
# High-performance I/O
from alphabase.io.hdf import HDF_File
from alphabase.io.tempmmap import array, zerosimport pandas as pd
from alphabase.constants.aa import calc_AA_masses
from alphabase.constants.modification import calc_modification_mass
from alphabase.peptide.fragment import create_fragment_mz_dataframe
from alphabase.spectral_library.base import SpecLibBase
# Calculate amino acid masses for peptide sequences
sequences = ['PEPTIDE', 'SEQUENCE', 'EXAMPLE']
aa_masses = calc_AA_masses(sequences)
# Calculate modification masses
mod_sequences = ['PEPTIDE[Oxidation (M)]', 'SEQUENCE[Phospho (STY)]']
mod_masses = calc_modification_mass(mod_sequences)
# Create a spectral library
spec_lib = SpecLibBase()
# Load precursor data
precursor_df = pd.DataFrame({
'sequence': ['PEPTIDE', 'SEQUENCE'],
'mods': ['', 'Phospho (STY)@2'],
'charge': [2, 3],
'proteins': ['P12345', 'P67890']
})
spec_lib.precursor_df = precursor_df
spec_lib.refine_df()
# Calculate precursor m/z values
spec_lib.calc_precursor_mz()
# Generate fragment m/z dataframe
frag_types = ['b++', 'y++', 'b+', 'y+']
spec_lib.calc_fragment_mz_df(frag_types)
print(f"Created spectral library with {len(spec_lib.precursor_df)} precursors")AlphaBase is organized into functional modules that provide both high-level object-oriented interfaces and low-level array operations:
This modular design enables both rapid prototyping and high-throughput production workflows in mass spectrometry proteomics, with comprehensive coverage from raw data processing to advanced computational analysis.
Comprehensive databases of amino acids, chemical elements, modifications, and isotopes with vectorized mass calculations. Provides the foundation for all proteomics calculations with pre-computed lookup tables for performance.
# Core constants
AA_ASCII_MASS: np.ndarray # 128-length array of AA masses
MASS_PROTON: float = 1.00727646688
MOD_DF: pd.DataFrame # Complete modification database
# Mass calculation functions
def calc_AA_masses(sequences: List[str]) -> np.ndarray: ...
def calc_mass_from_formula(formula: str) -> float: ...
def calc_modification_mass(mod_sequences: List[str]) -> np.ndarray: ...Complete fragment ion series generation with support for multiple fragment types, neutral losses, and charge states. Enables creation of theoretical spectra for spectral library construction and peptide identification.
def get_charged_frag_types(frag_types: List[str], charges: List[int]) -> List[str]: ...
def create_fragment_mz_dataframe(precursor_df: pd.DataFrame, frag_types: List[str]) -> pd.DataFrame: ...
def calc_b_y_and_peptide_masses_for_same_len_seqs(sequences: List[str]) -> tuple: ...Full-featured spectral library class with comprehensive functionality for loading, processing, filtering, and exporting spectral libraries. Supports multiple formats and advanced operations like decoy generation and isotope calculations.
class SpecLibBase:
precursor_df: pd.DataFrame
fragment_mz_df: pd.DataFrame
fragment_intensity_df: pd.DataFrame
def copy(self) -> 'SpecLibBase': ...
def append(self, other: 'SpecLibBase') -> None: ...
def calc_precursor_mz(self) -> None: ...
def calc_fragment_mz_df(self, frag_types: List[str]) -> None: ...
def save_hdf(self, filepath: str) -> None: ...
def load_hdf(self, filepath: str) -> None: ...Unified interface for reading Peptide-Spectrum Match (PSM) files from multiple proteomics search engines. Standardizes column mappings and data formats across different tools for seamless data integration.
class PSMReaderBase:
def import_file(self, filepath: str) -> pd.DataFrame: ...
def get_modification_mapping(self) -> dict: ...
# Available readers
class MaxQuantReader(PSMReaderBase): ...
class DiannReader(PSMReaderBase): ...
class SpectronautReader(PSMReaderBase): ...
# ... and 7 more search engine readersAdvanced I/O utilities including HDF5 wrapper with attribute-style access and memory-mapped arrays for efficient handling of large proteomics datasets. Optimized for high-throughput workflows and memory efficiency.
class HDF_File:
def __init__(self, filepath: str, mode: str = 'r'): ...
def __getitem__(self, key: str): ...
def __setitem__(self, key: str, value): ...
def array(shape: tuple, dtype=np.float64) -> np.ndarray: ...
def zeros(shape: tuple, dtype=np.float64) -> np.ndarray: ...
def clear() -> None: ...Comprehensive quantification data processing capabilities for handling multi-format quantified peptide and protein data from various proteomics platforms. Provides unified interfaces for reading, reformatting, and processing quantification results from DIA-NN, Spectronaut, MaxQuant, and other proteomics tools.
def import_data(data_path: str, data_type: str = None, config_dict: dict = None) -> pd.DataFrame: ...
class LongFormatReader: ...
class WideFormatReader: ...
class ConfigDictLoader: ...Comprehensive peptide processing capabilities including precursor calculations, mass calculations, ion mobility transformations, and advanced algorithmic operations. Provides high-performance functions for large-scale peptide analysis, isotope modeling, and multi-dimensional separations integration.
def update_precursor_mz(precursor_df: pd.DataFrame, batch_size: int = 100000) -> None: ...
def calc_precursor_isotope_info(precursor_df: pd.DataFrame, max_isotope: int = 6) -> None: ...
def ccs_to_mobility_for_df(precursor_df: pd.DataFrame, vendor_type: str = 'bruker') -> None: ...
def hash_precursor_df(precursor_df: pd.DataFrame, seed: int = 42) -> None: ...Extended spectral library functionality including decoy generation, format conversion, library validation, and specialized library formats. Provides comprehensive tools for spectral library manipulation, quality control, and integration with various proteomics workflows and search engines.
class SpecLibDecoy: ...
class DIANNDecoyGenerator: ...
class SpecLibFlat: ...
class LibraryReaderBase: ...
class Schema: ...Comprehensive cheminformatics capabilities for peptide and amino acid SMILES (Simplified Molecular-Input Line-Entry System) representations. Provides tools for chemical structure encoding, modification representation, and integration with computational chemistry workflows in proteomics.
class AminoAcidModifier: ...
class PeptideSmilesEncoder: ...
def calculate_molecular_descriptors(smiles: str) -> dict: ...
def predict_retention_time_from_smiles(smiles: str, model_type: str = 'krokhin') -> float: ...FASTA file processing, protein sequence analysis, and enzymatic digestion utilities. Supports protein inference workflows and integration with proteomics identification pipelines.
def read_fasta_file(filepath: str) -> Iterator[tuple[str, str]]: ...
def get_uniprot_gene_name(description: str) -> str: ...