An infrastructure Python package of the AlphaX ecosystem for MS proteomics
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Full-featured spectral library management with comprehensive functionality for loading, processing, filtering, and exporting spectral libraries. Supports multiple formats, advanced operations like decoy generation, isotope calculations, and integration with various mass spectrometry workflows.
The main SpecLibBase class provides comprehensive spectral library functionality with integrated DataFrame management and processing capabilities.
class SpecLibBase:
"""
Main spectral library class with comprehensive functionality.
Properties:
- precursor_df: DataFrame with precursor information (sequence, mods, charge, proteins)
- peptide_df: DataFrame with unique peptide information
- fragment_mz_df: DataFrame with fragment m/z values
- fragment_intensity_df: DataFrame with fragment intensities
"""
# Core properties
precursor_df: pd.DataFrame
peptide_df: pd.DataFrame
fragment_mz_df: pd.DataFrame
fragment_intensity_df: pd.DataFrame
def __init__(self):
"""Initialize empty spectral library."""
def copy(self) -> 'SpecLibBase':
"""
Create deep copy of spectral library.
Returns:
New SpecLibBase instance with copied data
"""
def append(self, other: 'SpecLibBase') -> None:
"""
Append another spectral library to this one.
Parameters:
- other: Another SpecLibBase instance to append
"""
def refine_df(self) -> None:
"""
Sort and optimize all DataFrames for performance.
Sets proper indexing and memory layout.
"""
def append_decoy_sequence(self, decoy_sequence: str,
decoy_proteins: str = "decoy") -> None:
"""
Add decoy sequences to the library.
Parameters:
- decoy_sequence: Decoy sequence string
- decoy_proteins: Protein identifier for decoys
"""Methods for calculating precursor and fragment m/z values with support for modifications and charge states.
class SpecLibBase:
def calc_precursor_mz(self) -> None:
"""
Calculate precursor m/z values from mass and charge.
Updates precursor_df with 'mz' column.
"""
def calc_fragment_mz_df(self, frag_types: List[str] = None) -> None:
"""
Generate fragment m/z DataFrame for all precursors.
Parameters:
- frag_types: List of fragment types like ['b+', 'y+', 'b++', 'y++']
If None, uses default fragment types
"""
def update_precursor_mz(self) -> None:
"""
Update precursor m/z values after modifications.
Alias for calc_precursor_mz() for backwards compatibility.
"""Methods for generating hash codes for fast precursor lookup and deduplication.
class SpecLibBase:
def hash_precursor_df(self) -> None:
"""
Add hash columns to precursor DataFrame.
Adds 'seq_hash' and 'prec_hash' columns for fast lookup.
"""
def get_mod_seq_hash(self) -> pd.Series:
"""
Generate hash codes for modified peptide sequences.
Returns:
Series with hash codes for each sequence
"""
def get_mod_seq_charge_hash(self) -> pd.Series:
"""
Generate hash codes for precursors (sequence + charge).
Returns:
Series with hash codes for each precursor
"""Methods for calculating isotope patterns and intensities for precursors.
class SpecLibBase:
def calc_precursor_isotope_info(self, max_isotope: int = 6) -> None:
"""
Calculate isotope envelope information for precursors.
Parameters:
- max_isotope: Maximum number of isotope peaks to calculate
"""
def calc_precursor_isotope_info_mp(self, max_isotope: int = 6,
n_jobs: int = 8) -> None:
"""
Multiprocessing isotope information calculation.
Parameters:
- max_isotope: Maximum isotope peaks
- n_jobs: Number of parallel processes
"""
def calc_precursor_isotope_intensity(self, max_isotope: int = 6) -> None:
"""
Calculate isotope pattern intensities for precursors.
Parameters:
- max_isotope: Maximum isotope peaks to calculate
"""
def calc_precursor_isotope_intensity_mp(self, max_isotope: int = 6,
n_jobs: int = 8) -> None:
"""
Multiprocessing isotope intensity calculation.
Parameters:
- max_isotope: Maximum isotope peaks
- n_jobs: Number of parallel processes
"""Methods for processing and optimizing fragment data within the spectral library.
class SpecLibBase:
def remove_unused_fragments(self) -> None:
"""
Remove fragment entries with zero intensity across all precursors.
Compresses fragment DataFrames to save memory.
"""
def calc_fragment_count(self) -> pd.Series:
"""
Count number of fragments per precursor.
Returns:
Series with fragment counts indexed by precursor
"""
def filter_fragment_number(self, top_k: int = 100) -> None:
"""
Keep only top-k fragments per precursor by intensity.
Parameters:
- top_k: Number of top fragments to retain per precursor
"""
def sort_fragment_by_intensity(self, ascending: bool = False) -> None:
"""
Sort fragments by intensity within each precursor.
Parameters:
- ascending: Sort order (False for highest intensity first)
"""Comprehensive I/O methods supporting multiple spectral library formats.
class SpecLibBase:
def save_hdf(self, filepath: str, **kwargs) -> None:
"""
Save spectral library to HDF5 format.
Parameters:
- filepath: Output HDF5 file path
- **kwargs: Additional HDF5 options
"""
def load_hdf(self, filepath: str, **kwargs) -> None:
"""
Load spectral library from HDF5 format.
Parameters:
- filepath: Input HDF5 file path
- **kwargs: Additional loading options
"""
# Note: Additional export formats may be available through external functions
# Check the alphabase.spectral_library module for format-specific export utilitiesMethods for analyzing spectral library content and quality metrics.
class SpecLibBase:
# Note: Statistical analysis and validation methods may be available
# through external functions in the alphabase.spectral_library module
passStandalone functions for spectral library operations and annotations.
def annotate_fragments_from_speclib(target_lib: SpecLibBase,
donor_lib: SpecLibBase,
match_tolerance: float = 0.02) -> None:
"""
Annotate fragments using donor spectral library.
Parameters:
- target_lib: Target library to annotate
- donor_lib: Donor library with reference spectra
- match_tolerance: Mass tolerance for matching (Da)
"""
def get_available_columns(spec_lib: SpecLibBase) -> dict:
"""
Get available DataFrame columns across all library components.
Parameters:
- spec_lib: Spectral library instance
Returns:
Dictionary with available columns for each DataFrame
"""
# Note: Additional utility functions for library merging and filtering
# may be available in the alphabase.spectral_library modulefrom alphabase.spectral_library.base import SpecLibBase
import pandas as pd
# Create new spectral library
spec_lib = SpecLibBase()
# Add precursor data
precursor_df = pd.DataFrame({
'sequence': ['PEPTIDE', 'SEQUENCE', 'EXAMPLE'],
'mods': ['', 'Phospho (STY)@2', 'Oxidation (M)@1'],
'charge': [2, 3, 2],
'proteins': ['P12345', 'P67890', 'P11111'],
'rt': [25.5, 32.1, 28.7] # retention times
})
spec_lib.precursor_df = precursor_df
# Optimize DataFrame structure
spec_lib.refine_df()
# Calculate precursor m/z values
spec_lib.calc_precursor_mz()
# Generate fragment m/z values
frag_types = ['b+', 'y+', 'b++', 'y++']
spec_lib.calc_fragment_mz_df(frag_types)
print(f"Library contains {len(spec_lib.precursor_df)} precursors")
print(f"Generated {len(spec_lib.fragment_mz_df)} fragment entries")# Save library in HDF5 format
spec_lib.save_hdf('my_library.hdf5')
# Load library from HDF5
new_lib = SpecLibBase()
new_lib.load_hdf('my_library.hdf5')
# Additional export formats may be available through external functions
# Check alphabase.spectral_library module for format-specific exporters# Add hash codes for fast lookup
spec_lib.hash_precursor_df()
# Calculate isotope patterns
spec_lib.calc_precursor_isotope_info(max_isotope=6)
# Remove low-intensity fragments
spec_lib.filter_fragment_number(top_k=50)
# Remove unused fragment entries
spec_lib.remove_unused_fragments()
# Library statistics can be calculated manually:
print(f"Precursors: {len(spec_lib.precursor_df)}")
print(f"Fragments: {len(spec_lib.fragment_mz_df)}")# Merge multiple libraries using append method
lib1 = SpecLibBase()
lib2 = SpecLibBase()
# ... populate libraries ...
# Merge libraries
merged_lib = lib1.copy()
merged_lib.append(lib2)
# Filter by specific proteins using pandas operations
target_proteins = ['P12345', 'P67890']
filtered_precursors = merged_lib.precursor_df[
merged_lib.precursor_df['proteins'].isin(target_proteins)
]
print(f"Merged library: {len(merged_lib.precursor_df)} precursors")
print(f"Filtered precursors: {len(filtered_precursors)} precursors")# Manual validation and quality control
print(f"Library integrity check:")
print(f" Precursors: {len(spec_lib.precursor_df)}")
print(f" Fragment m/z entries: {len(spec_lib.fragment_mz_df)}")
print(f" Fragment intensity entries: {len(spec_lib.fragment_intensity_df)}")
# Get fragment count statistics
frag_counts = spec_lib.calc_fragment_count()
print(f"Average fragments per precursor: {frag_counts.mean():.1f}")
print(f"Min fragments: {frag_counts.min()}, Max fragments: {frag_counts.max()}")
# Check available columns
available_cols = get_available_columns(spec_lib)
print(f"Available columns: {available_cols}")# Create a copy for decoy generation
decoy_lib = spec_lib.copy()
# Add decoy sequences (typically done with specialized decoy generation)
for idx, row in spec_lib.precursor_df.iterrows():
# Reverse sequence as simple decoy strategy
decoy_seq = row['sequence'][::-1]
decoy_lib.append_decoy_sequence(decoy_seq, decoy_proteins="DECOY_" + row['proteins'])
print(f"Original library: {len(spec_lib.precursor_df)} precursors")
print(f"With decoys: {len(decoy_lib.precursor_df)} precursors")Install with Tessl CLI
npx tessl i tessl/pypi-alphabase