tessl/pypi-alphabase

An infrastructure Python package of the AlphaX ecosystem for MS proteomics

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Advanced Spectral Library Operations

Name: tessl/pypi-alphabase
Author: tessl

Extended spectral library functionality including decoy generation, format conversion, library validation, and specialized library formats. Provides comprehensive tools for spectral library manipulation, quality control, and integration with various proteomics workflows and search engines.

Capabilities

Decoy Generation and Management

Comprehensive decoy generation capabilities supporting multiple strategies and integration with target-decoy search workflows.

class SpecLibDecoy:
    """Extended spectral library with integrated decoy generation and management."""
    
    def __init__(self, target_lib: SpecLibBase = None):
        """
        Initialize spectral library with decoy capabilities.
        
        Parameters:
        - target_lib: Target spectral library to extend with decoys
        """
    
    def generate_decoys(self, method: str = 'diann',
                       decoy_prefix: str = 'DECOY_',
                       keep_peptide_types: bool = True) -> None:
        """
        Generate decoy sequences using specified method.
        
        Parameters:
        - method: Decoy generation method ('diann', 'pseudo_reverse', 'shuffle')
        - decoy_prefix: Prefix for decoy protein identifiers
        - keep_peptide_types: Preserve peptide characteristics in decoys
        """
    
    def validate_decoy_quality(self) -> dict:
        """
        Assess quality of generated decoy sequences.
        
        Returns:
        Dictionary with decoy quality metrics and statistics
        """
    
    def get_target_decoy_ratio(self) -> float:
        """
        Calculate ratio of target to decoy sequences.
        
        Returns:
        Target-to-decoy ratio
        """
    
    def separate_targets_and_decoys(self) -> tuple['SpecLibBase', 'SpecLibBase']:
        """
        Split library into separate target and decoy libraries.
        
        Returns:
        Tuple of (target_library, decoy_library)
        """

class DIANNDecoyGenerator:
    """DIANN-style decoy generation with advanced sequence manipulation."""
    
    def __init__(self, keep_peptide_types: bool = True,
                 min_peptide_length: int = 6,
                 max_peptide_length: int = 30):
        """
        Initialize DIANN decoy generator.
        
        Parameters:
        - keep_peptide_types: Preserve tryptic characteristics
        - min_peptide_length: Minimum length for generated decoys
        - max_peptide_length: Maximum length for generated decoys
        """
    
    def generate_decoy_sequence(self, target_sequence: str,
                               target_proteins: str) -> tuple[str, str]:
        """
        Generate single decoy sequence from target.
        
        Parameters:
        - target_sequence: Target peptide sequence
        - target_proteins: Target protein identifiers
        
        Returns:
        Tuple of (decoy_sequence, decoy_proteins)
        """
    
    def generate_decoy_library(self, target_lib: SpecLibBase,
                              decoy_prefix: str = 'DECOY_') -> SpecLibBase:
        """
        Generate complete decoy library from target library.
        
        Parameters:
        - target_lib: Target spectral library
        - decoy_prefix: Prefix for decoy identifiers
        
        Returns:
        New spectral library with decoy sequences
        """
    
    def validate_sequence_properties(self, target_seq: str,
                                   decoy_seq: str) -> dict:
        """
        Compare properties between target and decoy sequences.
        
        Parameters:
        - target_seq: Original target sequence
        - decoy_seq: Generated decoy sequence
        
        Returns:
        Dictionary with property comparisons
        """

class PseudoReverseDecoyGenerator:
    """Pseudo-reverse decoy generation with tryptic preservation."""
    
    def __init__(self, cleavage_rule: str = 'trypsin'):
        """
        Initialize pseudo-reverse generator.
        
        Parameters:
        - cleavage_rule: Enzyme cleavage specificity to preserve
        """
    
    def generate_pseudo_reverse(self, sequence: str) -> str:
        """
        Generate pseudo-reverse sequence preserving cleavage sites.
        
        Parameters:
        - sequence: Target peptide sequence
        
        Returns:
        Pseudo-reverse decoy sequence
        """
    
    def preserve_cleavage_specificity(self, sequence: str,
                                    enzyme: str = 'trypsin') -> str:
        """
        Ensure decoy maintains enzymatic cleavage characteristics.
        
        Parameters:
        - sequence: Input sequence
        - enzyme: Enzyme specificity to preserve
        
        Returns:
        Modified sequence with preserved cleavage sites
        """

class BaseDecoyGenerator:
    """Base class for custom decoy generation strategies."""
    
    def __init__(self):
        """Initialize base decoy generator."""
    
    def generate_decoy(self, target_sequence: str,
                      target_proteins: str,
                      **kwargs) -> tuple[str, str]:
        """
        Generate decoy sequence (to be implemented by subclasses).
        
        Parameters:
        - target_sequence: Target peptide sequence
        - target_proteins: Target protein identifiers
        - **kwargs: Strategy-specific parameters
        
        Returns:
        Tuple of (decoy_sequence, decoy_proteins)
        """
        raise NotImplementedError("Subclasses must implement generate_decoy")
    
    def validate_decoy(self, target_seq: str, decoy_seq: str) -> bool:
        """
        Validate generated decoy sequence.
        
        Parameters:
        - target_seq: Original target sequence
        - decoy_seq: Generated decoy sequence
        
        Returns:
        True if decoy passes validation checks
        """
        return True

class SpecLibDecoyProvider:
    """Provider system for decoy generation strategies."""
    
    @staticmethod
    def get_generator(method: str, **kwargs) -> BaseDecoyGenerator:
        """
        Get decoy generator instance by method name.
        
        Parameters:
        - method: Generator method ('diann', 'pseudo_reverse', 'shuffle')
        - **kwargs: Method-specific parameters
        
        Returns:
        Configured decoy generator instance
        """
    
    @staticmethod
    def list_available_methods() -> List[str]:
        """
        List all available decoy generation methods.
        
        Returns:
        List of method names
        """
    
    @staticmethod
    def register_custom_generator(name: str, 
                                 generator_class: type) -> None:
        """
        Register custom decoy generation method.
        
        Parameters:
        - name: Name for the custom method
        - generator_class: Class implementing BaseDecoyGenerator
        """

Flat Spectral Library Format

Specialized flat format for efficient storage and retrieval of large spectral libraries.

class SpecLibFlat:
    """Flat spectral library format optimized for large-scale storage."""
    
    def __init__(self):
        """Initialize flat spectral library."""
    
    def from_spec_lib(self, spec_lib: SpecLibBase) -> None:
        """
        Convert standard spectral library to flat format.
        
        Parameters:
        - spec_lib: Standard SpecLibBase to convert
        """
    
    def to_spec_lib(self) -> SpecLibBase:
        """
        Convert flat library back to standard format.
        
        Returns:
        Standard SpecLibBase instance
        """
    
    def save_flat(self, filepath: str, 
                 compression: str = 'gzip') -> None:
        """
        Save flat library to compressed file.
        
        Parameters:
        - filepath: Output file path
        - compression: Compression method ('gzip', 'bz2', 'xz')
        """
    
    def load_flat(self, filepath: str) -> None:
        """
        Load flat library from compressed file.
        
        Parameters:
        - filepath: Input file path
        """
    
    def get_precursor_range(self, start_idx: int, 
                           end_idx: int) -> pd.DataFrame:
        """
        Get precursor range without loading full library.
        
        Parameters:
        - start_idx: Starting precursor index
        - end_idx: Ending precursor index
        
        Returns:
        DataFrame with precursor range
        """
    
    def query_by_mz_range(self, min_mz: float, 
                         max_mz: float) -> pd.DataFrame:
        """
        Query precursors by m/z range efficiently.
        
        Parameters:
        - min_mz: Minimum m/z value
        - max_mz: Maximum m/z value
        
        Returns:
        DataFrame with precursors in m/z range
        """
    
    def create_index(self, index_type: str = 'mz') -> None:
        """
        Create optimized index for fast queries.
        
        Parameters:
        - index_type: Type of index ('mz', 'rt', 'sequence')
        """
    
    def optimize_storage(self) -> dict:
        """
        Optimize storage layout and compression.
        
        Returns:
        Dictionary with optimization statistics
        """

Library Readers and Format Conversion

Comprehensive readers for various spectral library formats and conversion utilities.

class LibraryReaderBase:
    """Base class for spectral library format readers."""
    
    def __init__(self):
        """Initialize library reader."""
    
    def read_library(self, filepath: str, **kwargs) -> SpecLibBase:
        """
        Read spectral library from file.
        
        Parameters:
        - filepath: Path to library file
        - **kwargs: Format-specific options
        
        Returns:
        Loaded spectral library
        """
        raise NotImplementedError("Subclasses must implement read_library")
    
    def validate_format(self, filepath: str) -> bool:
        """
        Validate if file matches expected format.
        
        Parameters:
        - filepath: File path to validate
        
        Returns:
        True if format is compatible
        """
        return True
    
    def get_library_info(self, filepath: str) -> dict:
        """
        Get library metadata without full loading.
        
        Parameters:
        - filepath: Library file path
        
        Returns:
        Dictionary with library information
        """
        return {}

class CSVLibraryReader(LibraryReaderBase):
    """Reader for CSV-format spectral libraries."""
    
    def __init__(self, delimiter: str = ','):
        """
        Initialize CSV reader.
        
        Parameters:
        - delimiter: CSV delimiter character
        """
    
    def read_library(self, filepath: str, **kwargs) -> SpecLibBase:
        """
        Read spectral library from CSV file.
        
        Parameters:
        - filepath: Path to CSV library file
        - **kwargs: CSV reading options
        
        Returns:
        Loaded spectral library
        """
    
    def set_column_mapping(self, mapping: dict) -> None:
        """
        Set custom column name mappings.
        
        Parameters:
        - mapping: Dictionary mapping CSV columns to standard names
        """

class TSVLibraryReader(LibraryReaderBase):
    """Reader for TSV-format spectral libraries."""
    
    def __init__(self):
        """Initialize TSV reader."""
    
    def read_library(self, filepath: str, **kwargs) -> SpecLibBase:
        """
        Read spectral library from TSV file.
        
        Parameters:
        - filepath: Path to TSV library file
        - **kwargs: TSV reading options
        
        Returns:
        Loaded spectral library
        """

class MSPLibraryReader(LibraryReaderBase):
    """Reader for MSP-format spectral libraries."""
    
    def __init__(self):
        """Initialize MSP reader."""
    
    def read_library(self, filepath: str, **kwargs) -> SpecLibBase:
        """
        Read spectral library from MSP file.
        
        Parameters:
        - filepath: Path to MSP library file
        - **kwargs: MSP reading options
        
        Returns:
        Loaded spectral library
        """
    
    def parse_msp_entry(self, entry_text: str) -> dict:
        """
        Parse individual MSP library entry.
        
        Parameters:
        - entry_text: Raw MSP entry text
        
        Returns:
        Dictionary with parsed entry information
        """

def get_library_reader(filepath: str) -> LibraryReaderBase:
    """
    Auto-detect and return appropriate library reader.
    
    Parameters:
    - filepath: Path to library file
    
    Returns:
    Appropriate reader instance for the file format
    """

def convert_library_format(input_path: str, 
                          output_path: str,
                          input_format: str = None,
                          output_format: str = 'hdf5') -> None:
    """
    Convert spectral library between formats.
    
    Parameters:
    - input_path: Input library file path
    - output_path: Output library file path
    - input_format: Input format (auto-detected if None)
    - output_format: Output format ('hdf5', 'csv', 'msp')
    """

Library Translation and Format Support

Utilities for translating between different spectral library formats and search engine requirements.

class WritingProcess:
    """Multiprocessing writer for efficient library export."""
    
    def __init__(self, n_processes: int = 4):
        """
        Initialize multiprocessing writer.
        
        Parameters:
        - n_processes: Number of worker processes
        """
    
    def write_library_parallel(self, spec_lib: SpecLibBase,
                              output_path: str,
                              format_type: str = 'tsv',
                              chunk_size: int = 10000) -> None:
        """
        Write library using parallel processing.
        
        Parameters:
        - spec_lib: Spectral library to write
        - output_path: Output file path
        - format_type: Output format
        - chunk_size: Number of precursors per chunk
        """
    
    def write_multiple_formats(self, spec_lib: SpecLibBase,
                              base_path: str,
                              formats: List[str]) -> dict:
        """
        Write library in multiple formats simultaneously.
        
        Parameters:
        - spec_lib: Spectral library to write
        - base_path: Base output path (extensions added automatically)
        - formats: List of output formats
        
        Returns:
        Dictionary mapping formats to output file paths
        """

def translate_to_diann_format(spec_lib: SpecLibBase,
                             output_path: str) -> None:
    """
    Translate library to DIA-NN compatible format.
    
    Parameters:
    - spec_lib: Input spectral library
    - output_path: Output file path for DIA-NN library
    """

def translate_to_spectronaut_format(spec_lib: SpecLibBase,
                                   output_path: str) -> None:
    """
    Translate library to Spectronaut compatible format.
    
    Parameters:
    - spec_lib: Input spectral library
    - output_path: Output file path for Spectronaut library
    """

def translate_to_openswath_format(spec_lib: SpecLibBase,
                                 output_path: str) -> None:
    """
    Translate library to OpenSWATH compatible format.
    
    Parameters:
    - spec_lib: Input spectral library
    - output_path: Output file path for OpenSWATH library
    """

def translate_to_skyline_format(spec_lib: SpecLibBase,
                               output_path: str) -> None:
    """
    Translate library to Skyline compatible format.
    
    Parameters:
    - spec_lib: Input spectral library
    - output_path: Output file path for Skyline library
    """

def create_search_engine_libraries(spec_lib: SpecLibBase,
                                  output_dir: str,
                                  engines: List[str] = None) -> dict:
    """
    Create libraries for multiple search engines.
    
    Parameters:
    - spec_lib: Input spectral library
    - output_dir: Directory for output files
    - engines: List of search engines ('diann', 'spectronaut', 'openswath')
    
    Returns:
    Dictionary mapping engines to output file paths
    """

Library Validation and Quality Control

Comprehensive validation system for assessing spectral library quality and completeness.

class Schema:
    """Schema validation system for spectral libraries."""
    
    def __init__(self, required_columns: List[str] = None,
                 optional_columns: List[str] = None):
        """
        Initialize schema validator.
        
        Parameters:
        - required_columns: List of required column names
        - optional_columns: List of optional column names
        """
    
    def validate_library(self, spec_lib: SpecLibBase) -> dict:
        """
        Validate spectral library against schema.
        
        Parameters:
        - spec_lib: Spectral library to validate
        
        Returns:
        Dictionary with validation results and issues
        """
    
    def add_column_requirement(self, column: str, 
                              requirement_type: str,
                              **kwargs) -> None:
        """
        Add column validation requirement.
        
        Parameters:
        - column: Column name
        - requirement_type: Type of requirement ('required', 'optional', 'forbidden')
        - **kwargs: Additional requirement parameters
        """

class Required:
    """Required column specification for schema validation."""
    
    def __init__(self, column_name: str, 
                 data_type: type = None,
                 validation_func: callable = None):
        """
        Define required column.
        
        Parameters:
        - column_name: Name of required column
        - data_type: Expected data type
        - validation_func: Custom validation function
        """
    
    def validate(self, df: pd.DataFrame) -> dict:
        """
        Validate column presence and properties.
        
        Parameters:
        - df: DataFrame to validate
        
        Returns:
        Validation result dictionary
        """

class Optional:
    """Optional column specification for schema validation."""
    
    def __init__(self, column_name: str,
                 data_type: type = None,
                 default_value=None):
        """
        Define optional column.
        
        Parameters:
        - column_name: Name of optional column
        - data_type: Expected data type if present
        - default_value: Default value if column missing
        """
    
    def validate(self, df: pd.DataFrame) -> dict:
        """
        Validate optional column if present.
        
        Parameters:
        - df: DataFrame to validate
        
        Returns:
        Validation result dictionary
        """

class Column:
    """Generic column specification with flexible validation."""
    
    def __init__(self, name: str, 
                 required: bool = True,
                 data_type: type = None,
                 min_value=None,
                 max_value=None,
                 allowed_values: List = None):
        """
        Define column specification.
        
        Parameters:
        - name: Column name
        - required: Whether column is required
        - data_type: Expected data type
        - min_value: Minimum allowed value
        - max_value: Maximum allowed value
        - allowed_values: List of allowed values
        """
    
    def validate(self, df: pd.DataFrame) -> dict:
        """
        Perform comprehensive column validation.
        
        Parameters:
        - df: DataFrame to validate
        
        Returns:
        Detailed validation results
        """

def validate_spectral_library_completeness(spec_lib: SpecLibBase) -> dict:
    """
    Validate spectral library completeness and consistency.
    
    Parameters:
    - spec_lib: Spectral library to validate
    
    Returns:
    Dictionary with completeness assessment
    """

def assess_library_quality_metrics(spec_lib: SpecLibBase) -> dict:
    """
    Calculate comprehensive library quality metrics.
    
    Parameters:
    - spec_lib: Spectral library to assess
    
    Returns:
    Dictionary with quality metrics and statistics
    """

def check_library_integrity(spec_lib: SpecLibBase) -> dict:
    """
    Check spectral library data integrity.
    
    Parameters:
    - spec_lib: Spectral library to check
    
    Returns:
    Dictionary with integrity check results
    """

def generate_library_report(spec_lib: SpecLibBase,
                           output_path: str = None) -> dict:
    """
    Generate comprehensive library quality report.
    
    Parameters:
    - spec_lib: Spectral library to analyze
    - output_path: Optional path to save HTML report
    
    Returns:
    Dictionary with report data and statistics
    """

Usage Examples

Decoy Generation and Management

from alphabase.spectral_library.decoy import SpecLibDecoy, DIANNDecoyGenerator
from alphabase.spectral_library.base import SpecLibBase
import pandas as pd

# Create target library
target_lib = SpecLibBase()
target_lib.precursor_df = pd.DataFrame({
    'sequence': ['PEPTIDE', 'SEQUENCE', 'EXAMPLE'],
    'mods': ['', 'Phospho (STY)@2', ''],
    'charge': [2, 3, 2],
    'proteins': ['P12345', 'P67890', 'P11111']
})
target_lib.refine_df()

# Create decoy library using DIANN method
decoy_lib = SpecLibDecoy(target_lib)
decoy_lib.generate_decoys(method='diann', decoy_prefix='DECOY_')

print(f"Target precursors: {len(target_lib.precursor_df)}")
print(f"Total with decoys: {len(decoy_lib.precursor_df)}")
print(f"Target-decoy ratio: {decoy_lib.get_target_decoy_ratio():.1f}")

# Validate decoy quality
quality_metrics = decoy_lib.validate_decoy_quality()
print(f"Decoy quality metrics: {quality_metrics}")

# Separate targets and decoys
targets, decoys = decoy_lib.separate_targets_and_decoys()
print(f"Separated: {len(targets.precursor_df)} targets, {len(decoys.precursor_df)} decoys")

Advanced Decoy Generation

from alphabase.spectral_library.decoy import (
    DIANNDecoyGenerator, PseudoReverseDecoyGenerator, SpecLibDecoyProvider
)

# Use DIANN decoy generator directly
diann_gen = DIANNDecoyGenerator(keep_peptide_types=True)
target_seq = "PEPTIDE"
decoy_seq, decoy_proteins = diann_gen.generate_decoy_sequence(
    target_seq, "P12345"
)
print(f"DIANN decoy: {target_seq} -> {decoy_seq}")

# Validate sequence properties
properties = diann_gen.validate_sequence_properties(target_seq, decoy_seq)
print(f"Property comparison: {properties}")

# Use pseudo-reverse generator
pseudo_gen = PseudoReverseDecoyGenerator(cleavage_rule='trypsin')
pseudo_decoy = pseudo_gen.generate_pseudo_reverse(target_seq)
print(f"Pseudo-reverse decoy: {target_seq} -> {pseudo_decoy}")

# Use provider system
generator = SpecLibDecoyProvider.get_generator('diann', keep_peptide_types=True)
print(f"Available methods: {SpecLibDecoyProvider.list_available_methods()}")

Flat Library Format Operations

from alphabase.spectral_library.flat import SpecLibFlat

# Convert standard library to flat format
flat_lib = SpecLibFlat()
flat_lib.from_spec_lib(target_lib)

# Save in compressed format
flat_lib.save_flat('library_flat.gz', compression='gzip')

# Load flat library
new_flat = SpecLibFlat()
new_flat.load_flat('library_flat.gz')

# Efficient range queries
precursor_range = new_flat.get_precursor_range(0, 10)
print(f"First 10 precursors: {len(precursor_range)}")

# Query by m/z range
mz_range = new_flat.query_by_mz_range(400.0, 500.0)
print(f"Precursors in m/z 400-500: {len(mz_range)}")

# Create index for fast queries
new_flat.create_index(index_type='mz')

# Optimize storage
optimization_stats = new_flat.optimize_storage()
print(f"Storage optimization: {optimization_stats}")

Library Format Conversion

from alphabase.spectral_library.reader import (
    get_library_reader, convert_library_format
)
from alphabase.spectral_library.translate import (
    translate_to_diann_format, create_search_engine_libraries
)

# Auto-detect and read library format
reader = get_library_reader('unknown_library.tsv')
loaded_lib = reader.read_library('unknown_library.tsv')
print(f"Loaded library: {len(loaded_lib.precursor_df)} precursors")

# Convert between formats
convert_library_format(
    input_path='library.csv',
    output_path='library.h5',
    input_format='csv',
    output_format='hdf5'
)

# Translate to specific search engine formats
translate_to_diann_format(loaded_lib, 'library_diann.tsv')
print("Translated to DIA-NN format")

# Create libraries for multiple search engines
engine_libraries = create_search_engine_libraries(
    loaded_lib,
    output_dir='./libraries/',
    engines=['diann', 'spectronaut', 'openswath']
)
print(f"Created libraries: {list(engine_libraries.keys())}")

Library Validation and Quality Control

from alphabase.spectral_library.validate import (
    Schema, Required, Optional, validate_spectral_library_completeness,
    assess_library_quality_metrics, generate_library_report
)

# Create validation schema
schema = Schema()
schema.add_column_requirement('sequence', 'required', data_type=str)
schema.add_column_requirement('charge', 'required', data_type=int)
schema.add_column_requirement('proteins', 'required', data_type=str)
schema.add_column_requirement('rt', 'optional', data_type=float)

# Validate library against schema
validation_results = schema.validate_library(loaded_lib)
print(f"Schema validation: {validation_results['passed']}")
if not validation_results['passed']:
    print(f"Issues: {validation_results['issues']}")

# Check library completeness
completeness = validate_spectral_library_completeness(loaded_lib)
print(f"Library completeness:")
print(f"  Precursor completeness: {completeness['precursor_completeness']:.1%}")
print(f"  Fragment completeness: {completeness['fragment_completeness']:.1%}")

# Assess quality metrics
quality_metrics = assess_library_quality_metrics(loaded_lib)
print(f"Quality metrics:")
print(f"  Average fragments per precursor: {quality_metrics['avg_fragments_per_precursor']:.1f}")
print(f"  m/z range: {quality_metrics['mz_range']}")
print(f"  Charge distribution: {quality_metrics['charge_distribution']}")

# Generate comprehensive report
report_data = generate_library_report(loaded_lib, 'library_report.html')
print(f"Generated report with {len(report_data['sections'])} sections")

Parallel Library Processing

from alphabase.spectral_library.translate import WritingProcess

# Process large library with multiple workers
writer = WritingProcess(n_processes=8)

# Write library in parallel
writer.write_library_parallel(
    spec_lib=loaded_lib,
    output_path='large_library.tsv',
    format_type='tsv',
    chunk_size=50000
)

# Write multiple formats simultaneously
format_paths = writer.write_multiple_formats(
    spec_lib=loaded_lib,
    base_path='library',
    formats=['tsv', 'csv', 'msp']
)
print(f"Created formats: {format_paths}")

Advanced Validation Workflows

from alphabase.spectral_library.validate import Required, Optional, Column

# Create detailed column specifications
columns = [
    Required('sequence', data_type=str),
    Required('charge', data_type=int),
    Required('proteins', data_type=str),
    Optional('rt', data_type=float, default_value=0.0),
    Column('mz', required=True, data_type=float, min_value=100.0, max_value=2000.0),
    Column('intensity', required=False, data_type=float, min_value=0.0)
]

# Validate each column specification
validation_results = []
for col_spec in columns:
    result = col_spec.validate(loaded_lib.precursor_df)
    validation_results.append(result)
    print(f"Column {col_spec.name}: {'PASS' if result['valid'] else 'FAIL'}")

# Custom validation workflow
def validate_library_for_dia_analysis(spec_lib):
    """Custom validation for DIA analysis requirements."""
    issues = []
    
    # Check for minimum precursors
    if len(spec_lib.precursor_df) < 1000:
        issues.append("Insufficient precursors for DIA analysis")
    
    # Check charge distribution
    charge_dist = spec_lib.precursor_df['charge'].value_counts()
    if charge_dist.get(2, 0) / len(spec_lib.precursor_df) < 0.3:
        issues.append("Low proportion of doubly charged precursors")
    
    # Check m/z coverage
    mz_min = spec_lib.precursor_df['mz'].min()
    mz_max = spec_lib.precursor_df['mz'].max()
    if mz_max - mz_min < 500:
        issues.append("Limited m/z range coverage")
    
    return {
        'suitable_for_dia': len(issues) == 0,
        'issues': issues,
        'precursor_count': len(spec_lib.precursor_df),
        'mz_range': (mz_min, mz_max),
        'charge_distribution': charge_dist.to_dict()
    }

# Apply custom validation
dia_validation = validate_library_for_dia_analysis(loaded_lib)
print(f"DIA suitability: {dia_validation}")

Install with Tessl CLI