Tessl Tile for pypi/alphabase@1.6.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

advanced-peptide-operations.md advanced-spectral-libraries.md chemical-constants.md fragment-ions.md index.md io-utilities.md protein-analysis.md psm-readers.md quantification.md smiles-chemistry.md spectral-libraries.md

advanced-spectral-libraries.mddocs/

0
# Advanced Spectral Library Operations
1

2
Extended spectral library functionality including decoy generation, format conversion, library validation, and specialized library formats. Provides comprehensive tools for spectral library manipulation, quality control, and integration with various proteomics workflows and search engines.
3

4
## Capabilities
5

6
### Decoy Generation and Management
7

8
Comprehensive decoy generation capabilities supporting multiple strategies and integration with target-decoy search workflows.
9

10
```python { .api }
11
class SpecLibDecoy:
12
    """Extended spectral library with integrated decoy generation and management."""
13
    
14
    def __init__(self, target_lib: SpecLibBase = None):
15
        """
16
        Initialize spectral library with decoy capabilities.
17
        
18
        Parameters:
19
        - target_lib: Target spectral library to extend with decoys
20
        """
21
    
22
    def generate_decoys(self, method: str = 'diann',
23
                       decoy_prefix: str = 'DECOY_',
24
                       keep_peptide_types: bool = True) -> None:
25
        """
26
        Generate decoy sequences using specified method.
27
        
28
        Parameters:
29
        - method: Decoy generation method ('diann', 'pseudo_reverse', 'shuffle')
30
        - decoy_prefix: Prefix for decoy protein identifiers
31
        - keep_peptide_types: Preserve peptide characteristics in decoys
32
        """
33
    
34
    def validate_decoy_quality(self) -> dict:
35
        """
36
        Assess quality of generated decoy sequences.
37
        
38
        Returns:
39
        Dictionary with decoy quality metrics and statistics
40
        """
41
    
42
    def get_target_decoy_ratio(self) -> float:
43
        """
44
        Calculate ratio of target to decoy sequences.
45
        
46
        Returns:
47
        Target-to-decoy ratio
48
        """
49
    
50
    def separate_targets_and_decoys(self) -> tuple['SpecLibBase', 'SpecLibBase']:
51
        """
52
        Split library into separate target and decoy libraries.
53
        
54
        Returns:
55
        Tuple of (target_library, decoy_library)
56
        """
57

58
class DIANNDecoyGenerator:
59
    """DIANN-style decoy generation with advanced sequence manipulation."""
60
    
61
    def __init__(self, keep_peptide_types: bool = True,
62
                 min_peptide_length: int = 6,
63
                 max_peptide_length: int = 30):
64
        """
65
        Initialize DIANN decoy generator.
66
        
67
        Parameters:
68
        - keep_peptide_types: Preserve tryptic characteristics
69
        - min_peptide_length: Minimum length for generated decoys
70
        - max_peptide_length: Maximum length for generated decoys
71
        """
72
    
73
    def generate_decoy_sequence(self, target_sequence: str,
74
                               target_proteins: str) -> tuple[str, str]:
75
        """
76
        Generate single decoy sequence from target.
77
        
78
        Parameters:
79
        - target_sequence: Target peptide sequence
80
        - target_proteins: Target protein identifiers
81
        
82
        Returns:
83
        Tuple of (decoy_sequence, decoy_proteins)
84
        """
85
    
86
    def generate_decoy_library(self, target_lib: SpecLibBase,
87
                              decoy_prefix: str = 'DECOY_') -> SpecLibBase:
88
        """
89
        Generate complete decoy library from target library.
90
        
91
        Parameters:
92
        - target_lib: Target spectral library
93
        - decoy_prefix: Prefix for decoy identifiers
94
        
95
        Returns:
96
        New spectral library with decoy sequences
97
        """
98
    
99
    def validate_sequence_properties(self, target_seq: str,
100
                                   decoy_seq: str) -> dict:
101
        """
102
        Compare properties between target and decoy sequences.
103
        
104
        Parameters:
105
        - target_seq: Original target sequence
106
        - decoy_seq: Generated decoy sequence
107
        
108
        Returns:
109
        Dictionary with property comparisons
110
        """
111

112
class PseudoReverseDecoyGenerator:
113
    """Pseudo-reverse decoy generation with tryptic preservation."""
114
    
115
    def __init__(self, cleavage_rule: str = 'trypsin'):
116
        """
117
        Initialize pseudo-reverse generator.
118
        
119
        Parameters:
120
        - cleavage_rule: Enzyme cleavage specificity to preserve
121
        """
122
    
123
    def generate_pseudo_reverse(self, sequence: str) -> str:
124
        """
125
        Generate pseudo-reverse sequence preserving cleavage sites.
126
        
127
        Parameters:
128
        - sequence: Target peptide sequence
129
        
130
        Returns:
131
        Pseudo-reverse decoy sequence
132
        """
133
    
134
    def preserve_cleavage_specificity(self, sequence: str,
135
                                    enzyme: str = 'trypsin') -> str:
136
        """
137
        Ensure decoy maintains enzymatic cleavage characteristics.
138
        
139
        Parameters:
140
        - sequence: Input sequence
141
        - enzyme: Enzyme specificity to preserve
142
        
143
        Returns:
144
        Modified sequence with preserved cleavage sites
145
        """
146

147
class BaseDecoyGenerator:
148
    """Base class for custom decoy generation strategies."""
149
    
150
    def __init__(self):
151
        """Initialize base decoy generator."""
152
    
153
    def generate_decoy(self, target_sequence: str,
154
                      target_proteins: str,
155
                      **kwargs) -> tuple[str, str]:
156
        """
157
        Generate decoy sequence (to be implemented by subclasses).
158
        
159
        Parameters:
160
        - target_sequence: Target peptide sequence
161
        - target_proteins: Target protein identifiers
162
        - **kwargs: Strategy-specific parameters
163
        
164
        Returns:
165
        Tuple of (decoy_sequence, decoy_proteins)
166
        """
167
        raise NotImplementedError("Subclasses must implement generate_decoy")
168
    
169
    def validate_decoy(self, target_seq: str, decoy_seq: str) -> bool:
170
        """
171
        Validate generated decoy sequence.
172
        
173
        Parameters:
174
        - target_seq: Original target sequence
175
        - decoy_seq: Generated decoy sequence
176
        
177
        Returns:
178
        True if decoy passes validation checks
179
        """
180
        return True
181

182
class SpecLibDecoyProvider:
183
    """Provider system for decoy generation strategies."""
184
    
185
    @staticmethod
186
    def get_generator(method: str, **kwargs) -> BaseDecoyGenerator:
187
        """
188
        Get decoy generator instance by method name.
189
        
190
        Parameters:
191
        - method: Generator method ('diann', 'pseudo_reverse', 'shuffle')
192
        - **kwargs: Method-specific parameters
193
        
194
        Returns:
195
        Configured decoy generator instance
196
        """
197
    
198
    @staticmethod
199
    def list_available_methods() -> List[str]:
200
        """
201
        List all available decoy generation methods.
202
        
203
        Returns:
204
        List of method names
205
        """
206
    
207
    @staticmethod
208
    def register_custom_generator(name: str, 
209
                                 generator_class: type) -> None:
210
        """
211
        Register custom decoy generation method.
212
        
213
        Parameters:
214
        - name: Name for the custom method
215
        - generator_class: Class implementing BaseDecoyGenerator
216
        """
217
```
218

219
### Flat Spectral Library Format
220

221
Specialized flat format for efficient storage and retrieval of large spectral libraries.
222

223
```python { .api }
224
class SpecLibFlat:
225
    """Flat spectral library format optimized for large-scale storage."""
226
    
227
    def __init__(self):
228
        """Initialize flat spectral library."""
229
    
230
    def from_spec_lib(self, spec_lib: SpecLibBase) -> None:
231
        """
232
        Convert standard spectral library to flat format.
233
        
234
        Parameters:
235
        - spec_lib: Standard SpecLibBase to convert
236
        """
237
    
238
    def to_spec_lib(self) -> SpecLibBase:
239
        """
240
        Convert flat library back to standard format.
241
        
242
        Returns:
243
        Standard SpecLibBase instance
244
        """
245
    
246
    def save_flat(self, filepath: str, 
247
                 compression: str = 'gzip') -> None:
248
        """
249
        Save flat library to compressed file.
250
        
251
        Parameters:
252
        - filepath: Output file path
253
        - compression: Compression method ('gzip', 'bz2', 'xz')
254
        """
255
    
256
    def load_flat(self, filepath: str) -> None:
257
        """
258
        Load flat library from compressed file.
259
        
260
        Parameters:
261
        - filepath: Input file path
262
        """
263
    
264
    def get_precursor_range(self, start_idx: int, 
265
                           end_idx: int) -> pd.DataFrame:
266
        """
267
        Get precursor range without loading full library.
268
        
269
        Parameters:
270
        - start_idx: Starting precursor index
271
        - end_idx: Ending precursor index
272
        
273
        Returns:
274
        DataFrame with precursor range
275
        """
276
    
277
    def query_by_mz_range(self, min_mz: float, 
278
                         max_mz: float) -> pd.DataFrame:
279
        """
280
        Query precursors by m/z range efficiently.
281
        
282
        Parameters:
283
        - min_mz: Minimum m/z value
284
        - max_mz: Maximum m/z value
285
        
286
        Returns:
287
        DataFrame with precursors in m/z range
288
        """
289
    
290
    def create_index(self, index_type: str = 'mz') -> None:
291
        """
292
        Create optimized index for fast queries.
293
        
294
        Parameters:
295
        - index_type: Type of index ('mz', 'rt', 'sequence')
296
        """
297
    
298
    def optimize_storage(self) -> dict:
299
        """
300
        Optimize storage layout and compression.
301
        
302
        Returns:
303
        Dictionary with optimization statistics
304
        """
305
```
306

307
### Library Readers and Format Conversion
308

309
Comprehensive readers for various spectral library formats and conversion utilities.
310

311
```python { .api }
312
class LibraryReaderBase:
313
    """Base class for spectral library format readers."""
314
    
315
    def __init__(self):
316
        """Initialize library reader."""
317
    
318
    def read_library(self, filepath: str, **kwargs) -> SpecLibBase:
319
        """
320
        Read spectral library from file.
321
        
322
        Parameters:
323
        - filepath: Path to library file
324
        - **kwargs: Format-specific options
325
        
326
        Returns:
327
        Loaded spectral library
328
        """
329
        raise NotImplementedError("Subclasses must implement read_library")
330
    
331
    def validate_format(self, filepath: str) -> bool:
332
        """
333
        Validate if file matches expected format.
334
        
335
        Parameters:
336
        - filepath: File path to validate
337
        
338
        Returns:
339
        True if format is compatible
340
        """
341
        return True
342
    
343
    def get_library_info(self, filepath: str) -> dict:
344
        """
345
        Get library metadata without full loading.
346
        
347
        Parameters:
348
        - filepath: Library file path
349
        
350
        Returns:
351
        Dictionary with library information
352
        """
353
        return {}
354

355
class CSVLibraryReader(LibraryReaderBase):
356
    """Reader for CSV-format spectral libraries."""
357
    
358
    def __init__(self, delimiter: str = ','):
359
        """
360
        Initialize CSV reader.
361
        
362
        Parameters:
363
        - delimiter: CSV delimiter character
364
        """
365
    
366
    def read_library(self, filepath: str, **kwargs) -> SpecLibBase:
367
        """
368
        Read spectral library from CSV file.
369
        
370
        Parameters:
371
        - filepath: Path to CSV library file
372
        - **kwargs: CSV reading options
373
        
374
        Returns:
375
        Loaded spectral library
376
        """
377
    
378
    def set_column_mapping(self, mapping: dict) -> None:
379
        """
380
        Set custom column name mappings.
381
        
382
        Parameters:
383
        - mapping: Dictionary mapping CSV columns to standard names
384
        """
385

386
class TSVLibraryReader(LibraryReaderBase):
387
    """Reader for TSV-format spectral libraries."""
388
    
389
    def __init__(self):
390
        """Initialize TSV reader."""
391
    
392
    def read_library(self, filepath: str, **kwargs) -> SpecLibBase:
393
        """
394
        Read spectral library from TSV file.
395
        
396
        Parameters:
397
        - filepath: Path to TSV library file
398
        - **kwargs: TSV reading options
399
        
400
        Returns:
401
        Loaded spectral library
402
        """
403

404
class MSPLibraryReader(LibraryReaderBase):
405
    """Reader for MSP-format spectral libraries."""
406
    
407
    def __init__(self):
408
        """Initialize MSP reader."""
409
    
410
    def read_library(self, filepath: str, **kwargs) -> SpecLibBase:
411
        """
412
        Read spectral library from MSP file.
413
        
414
        Parameters:
415
        - filepath: Path to MSP library file
416
        - **kwargs: MSP reading options
417
        
418
        Returns:
419
        Loaded spectral library
420
        """
421
    
422
    def parse_msp_entry(self, entry_text: str) -> dict:
423
        """
424
        Parse individual MSP library entry.
425
        
426
        Parameters:
427
        - entry_text: Raw MSP entry text
428
        
429
        Returns:
430
        Dictionary with parsed entry information
431
        """
432

433
def get_library_reader(filepath: str) -> LibraryReaderBase:
434
    """
435
    Auto-detect and return appropriate library reader.
436
    
437
    Parameters:
438
    - filepath: Path to library file
439
    
440
    Returns:
441
    Appropriate reader instance for the file format
442
    """
443

444
def convert_library_format(input_path: str, 
445
                          output_path: str,
446
                          input_format: str = None,
447
                          output_format: str = 'hdf5') -> None:
448
    """
449
    Convert spectral library between formats.
450
    
451
    Parameters:
452
    - input_path: Input library file path
453
    - output_path: Output library file path
454
    - input_format: Input format (auto-detected if None)
455
    - output_format: Output format ('hdf5', 'csv', 'msp')
456
    """
457
```
458

459
### Library Translation and Format Support
460

461
Utilities for translating between different spectral library formats and search engine requirements.
462

463
```python { .api }
464
class WritingProcess:
465
    """Multiprocessing writer for efficient library export."""
466
    
467
    def __init__(self, n_processes: int = 4):
468
        """
469
        Initialize multiprocessing writer.
470
        
471
        Parameters:
472
        - n_processes: Number of worker processes
473
        """
474
    
475
    def write_library_parallel(self, spec_lib: SpecLibBase,
476
                              output_path: str,
477
                              format_type: str = 'tsv',
478
                              chunk_size: int = 10000) -> None:
479
        """
480
        Write library using parallel processing.
481
        
482
        Parameters:
483
        - spec_lib: Spectral library to write
484
        - output_path: Output file path
485
        - format_type: Output format
486
        - chunk_size: Number of precursors per chunk
487
        """
488
    
489
    def write_multiple_formats(self, spec_lib: SpecLibBase,
490
                              base_path: str,
491
                              formats: List[str]) -> dict:
492
        """
493
        Write library in multiple formats simultaneously.
494
        
495
        Parameters:
496
        - spec_lib: Spectral library to write
497
        - base_path: Base output path (extensions added automatically)
498
        - formats: List of output formats
499
        
500
        Returns:
501
        Dictionary mapping formats to output file paths
502
        """
503

504
def translate_to_diann_format(spec_lib: SpecLibBase,
505
                             output_path: str) -> None:
506
    """
507
    Translate library to DIA-NN compatible format.
508
    
509
    Parameters:
510
    - spec_lib: Input spectral library
511
    - output_path: Output file path for DIA-NN library
512
    """
513

514
def translate_to_spectronaut_format(spec_lib: SpecLibBase,
515
                                   output_path: str) -> None:
516
    """
517
    Translate library to Spectronaut compatible format.
518
    
519
    Parameters:
520
    - spec_lib: Input spectral library
521
    - output_path: Output file path for Spectronaut library
522
    """
523

524
def translate_to_openswath_format(spec_lib: SpecLibBase,
525
                                 output_path: str) -> None:
526
    """
527
    Translate library to OpenSWATH compatible format.
528
    
529
    Parameters:
530
    - spec_lib: Input spectral library
531
    - output_path: Output file path for OpenSWATH library
532
    """
533

534
def translate_to_skyline_format(spec_lib: SpecLibBase,
535
                               output_path: str) -> None:
536
    """
537
    Translate library to Skyline compatible format.
538
    
539
    Parameters:
540
    - spec_lib: Input spectral library
541
    - output_path: Output file path for Skyline library
542
    """
543

544
def create_search_engine_libraries(spec_lib: SpecLibBase,
545
                                  output_dir: str,
546
                                  engines: List[str] = None) -> dict:
547
    """
548
    Create libraries for multiple search engines.
549
    
550
    Parameters:
551
    - spec_lib: Input spectral library
552
    - output_dir: Directory for output files
553
    - engines: List of search engines ('diann', 'spectronaut', 'openswath')
554
    
555
    Returns:
556
    Dictionary mapping engines to output file paths
557
    """
558
```
559

560
### Library Validation and Quality Control
561

562
Comprehensive validation system for assessing spectral library quality and completeness.
563

564
```python { .api }
565
class Schema:
566
    """Schema validation system for spectral libraries."""
567
    
568
    def __init__(self, required_columns: List[str] = None,
569
                 optional_columns: List[str] = None):
570
        """
571
        Initialize schema validator.
572
        
573
        Parameters:
574
        - required_columns: List of required column names
575
        - optional_columns: List of optional column names
576
        """
577
    
578
    def validate_library(self, spec_lib: SpecLibBase) -> dict:
579
        """
580
        Validate spectral library against schema.
581
        
582
        Parameters:
583
        - spec_lib: Spectral library to validate
584
        
585
        Returns:
586
        Dictionary with validation results and issues
587
        """
588
    
589
    def add_column_requirement(self, column: str, 
590
                              requirement_type: str,
591
                              **kwargs) -> None:
592
        """
593
        Add column validation requirement.
594
        
595
        Parameters:
596
        - column: Column name
597
        - requirement_type: Type of requirement ('required', 'optional', 'forbidden')
598
        - **kwargs: Additional requirement parameters
599
        """
600

601
class Required:
602
    """Required column specification for schema validation."""
603
    
604
    def __init__(self, column_name: str, 
605
                 data_type: type = None,
606
                 validation_func: callable = None):
607
        """
608
        Define required column.
609
        
610
        Parameters:
611
        - column_name: Name of required column
612
        - data_type: Expected data type
613
        - validation_func: Custom validation function
614
        """
615
    
616
    def validate(self, df: pd.DataFrame) -> dict:
617
        """
618
        Validate column presence and properties.
619
        
620
        Parameters:
621
        - df: DataFrame to validate
622
        
623
        Returns:
624
        Validation result dictionary
625
        """
626

627
class Optional:
628
    """Optional column specification for schema validation."""
629
    
630
    def __init__(self, column_name: str,
631
                 data_type: type = None,
632
                 default_value=None):
633
        """
634
        Define optional column.
635
        
636
        Parameters:
637
        - column_name: Name of optional column
638
        - data_type: Expected data type if present
639
        - default_value: Default value if column missing
640
        """
641
    
642
    def validate(self, df: pd.DataFrame) -> dict:
643
        """
644
        Validate optional column if present.
645
        
646
        Parameters:
647
        - df: DataFrame to validate
648
        
649
        Returns:
650
        Validation result dictionary
651
        """
652

653
class Column:
654
    """Generic column specification with flexible validation."""
655
    
656
    def __init__(self, name: str, 
657
                 required: bool = True,
658
                 data_type: type = None,
659
                 min_value=None,
660
                 max_value=None,
661
                 allowed_values: List = None):
662
        """
663
        Define column specification.
664
        
665
        Parameters:
666
        - name: Column name
667
        - required: Whether column is required
668
        - data_type: Expected data type
669
        - min_value: Minimum allowed value
670
        - max_value: Maximum allowed value
671
        - allowed_values: List of allowed values
672
        """
673
    
674
    def validate(self, df: pd.DataFrame) -> dict:
675
        """
676
        Perform comprehensive column validation.
677
        
678
        Parameters:
679
        - df: DataFrame to validate
680
        
681
        Returns:
682
        Detailed validation results
683
        """
684

685
def validate_spectral_library_completeness(spec_lib: SpecLibBase) -> dict:
686
    """
687
    Validate spectral library completeness and consistency.
688
    
689
    Parameters:
690
    - spec_lib: Spectral library to validate
691
    
692
    Returns:
693
    Dictionary with completeness assessment
694
    """
695

696
def assess_library_quality_metrics(spec_lib: SpecLibBase) -> dict:
697
    """
698
    Calculate comprehensive library quality metrics.
699
    
700
    Parameters:
701
    - spec_lib: Spectral library to assess
702
    
703
    Returns:
704
    Dictionary with quality metrics and statistics
705
    """
706

707
def check_library_integrity(spec_lib: SpecLibBase) -> dict:
708
    """
709
    Check spectral library data integrity.
710
    
711
    Parameters:
712
    - spec_lib: Spectral library to check
713
    
714
    Returns:
715
    Dictionary with integrity check results
716
    """
717

718
def generate_library_report(spec_lib: SpecLibBase,
719
                           output_path: str = None) -> dict:
720
    """
721
    Generate comprehensive library quality report.
722
    
723
    Parameters:
724
    - spec_lib: Spectral library to analyze
725
    - output_path: Optional path to save HTML report
726
    
727
    Returns:
728
    Dictionary with report data and statistics
729
    """
730
```
731

732
## Usage Examples
733

734
### Decoy Generation and Management
735

736
```python
737
from alphabase.spectral_library.decoy import SpecLibDecoy, DIANNDecoyGenerator
738
from alphabase.spectral_library.base import SpecLibBase
739
import pandas as pd
740

741
# Create target library
742
target_lib = SpecLibBase()
743
target_lib.precursor_df = pd.DataFrame({
744
    'sequence': ['PEPTIDE', 'SEQUENCE', 'EXAMPLE'],
745
    'mods': ['', 'Phospho (STY)@2', ''],
746
    'charge': [2, 3, 2],
747
    'proteins': ['P12345', 'P67890', 'P11111']
748
})
749
target_lib.refine_df()
750

751
# Create decoy library using DIANN method
752
decoy_lib = SpecLibDecoy(target_lib)
753
decoy_lib.generate_decoys(method='diann', decoy_prefix='DECOY_')
754

755
print(f"Target precursors: {len(target_lib.precursor_df)}")
756
print(f"Total with decoys: {len(decoy_lib.precursor_df)}")
757
print(f"Target-decoy ratio: {decoy_lib.get_target_decoy_ratio():.1f}")
758

759
# Validate decoy quality
760
quality_metrics = decoy_lib.validate_decoy_quality()
761
print(f"Decoy quality metrics: {quality_metrics}")
762

763
# Separate targets and decoys
764
targets, decoys = decoy_lib.separate_targets_and_decoys()
765
print(f"Separated: {len(targets.precursor_df)} targets, {len(decoys.precursor_df)} decoys")
766
```
767

768
### Advanced Decoy Generation
769

770
```python
771
from alphabase.spectral_library.decoy import (
772
    DIANNDecoyGenerator, PseudoReverseDecoyGenerator, SpecLibDecoyProvider
773
)
774

775
# Use DIANN decoy generator directly
776
diann_gen = DIANNDecoyGenerator(keep_peptide_types=True)
777
target_seq = "PEPTIDE"
778
decoy_seq, decoy_proteins = diann_gen.generate_decoy_sequence(
779
    target_seq, "P12345"
780
)
781
print(f"DIANN decoy: {target_seq} -> {decoy_seq}")
782

783
# Validate sequence properties
784
properties = diann_gen.validate_sequence_properties(target_seq, decoy_seq)
785
print(f"Property comparison: {properties}")
786

787
# Use pseudo-reverse generator
788
pseudo_gen = PseudoReverseDecoyGenerator(cleavage_rule='trypsin')
789
pseudo_decoy = pseudo_gen.generate_pseudo_reverse(target_seq)
790
print(f"Pseudo-reverse decoy: {target_seq} -> {pseudo_decoy}")
791

792
# Use provider system
793
generator = SpecLibDecoyProvider.get_generator('diann', keep_peptide_types=True)
794
print(f"Available methods: {SpecLibDecoyProvider.list_available_methods()}")
795
```
796

797
### Flat Library Format Operations
798

799
```python
800
from alphabase.spectral_library.flat import SpecLibFlat
801

802
# Convert standard library to flat format
803
flat_lib = SpecLibFlat()
804
flat_lib.from_spec_lib(target_lib)
805

806
# Save in compressed format
807
flat_lib.save_flat('library_flat.gz', compression='gzip')
808

809
# Load flat library
810
new_flat = SpecLibFlat()
811
new_flat.load_flat('library_flat.gz')
812

813
# Efficient range queries
814
precursor_range = new_flat.get_precursor_range(0, 10)
815
print(f"First 10 precursors: {len(precursor_range)}")
816

817
# Query by m/z range
818
mz_range = new_flat.query_by_mz_range(400.0, 500.0)
819
print(f"Precursors in m/z 400-500: {len(mz_range)}")
820

821
# Create index for fast queries
822
new_flat.create_index(index_type='mz')
823

824
# Optimize storage
825
optimization_stats = new_flat.optimize_storage()
826
print(f"Storage optimization: {optimization_stats}")
827
```
828

829
### Library Format Conversion
830

831
```python
832
from alphabase.spectral_library.reader import (
833
    get_library_reader, convert_library_format
834
)
835
from alphabase.spectral_library.translate import (
836
    translate_to_diann_format, create_search_engine_libraries
837
)
838

839
# Auto-detect and read library format
840
reader = get_library_reader('unknown_library.tsv')
841
loaded_lib = reader.read_library('unknown_library.tsv')
842
print(f"Loaded library: {len(loaded_lib.precursor_df)} precursors")
843

844
# Convert between formats
845
convert_library_format(
846
    input_path='library.csv',
847
    output_path='library.h5',
848
    input_format='csv',
849
    output_format='hdf5'
850
)
851

852
# Translate to specific search engine formats
853
translate_to_diann_format(loaded_lib, 'library_diann.tsv')
854
print("Translated to DIA-NN format")
855

856
# Create libraries for multiple search engines
857
engine_libraries = create_search_engine_libraries(
858
    loaded_lib,
859
    output_dir='./libraries/',
860
    engines=['diann', 'spectronaut', 'openswath']
861
)
862
print(f"Created libraries: {list(engine_libraries.keys())}")
863
```
864

865
### Library Validation and Quality Control
866

867
```python
868
from alphabase.spectral_library.validate import (
869
    Schema, Required, Optional, validate_spectral_library_completeness,
870
    assess_library_quality_metrics, generate_library_report
871
)
872

873
# Create validation schema
874
schema = Schema()
875
schema.add_column_requirement('sequence', 'required', data_type=str)
876
schema.add_column_requirement('charge', 'required', data_type=int)
877
schema.add_column_requirement('proteins', 'required', data_type=str)
878
schema.add_column_requirement('rt', 'optional', data_type=float)
879

880
# Validate library against schema
881
validation_results = schema.validate_library(loaded_lib)
882
print(f"Schema validation: {validation_results['passed']}")
883
if not validation_results['passed']:
884
    print(f"Issues: {validation_results['issues']}")
885

886
# Check library completeness
887
completeness = validate_spectral_library_completeness(loaded_lib)
888
print(f"Library completeness:")
889
print(f"  Precursor completeness: {completeness['precursor_completeness']:.1%}")
890
print(f"  Fragment completeness: {completeness['fragment_completeness']:.1%}")
891

892
# Assess quality metrics
893
quality_metrics = assess_library_quality_metrics(loaded_lib)
894
print(f"Quality metrics:")
895
print(f"  Average fragments per precursor: {quality_metrics['avg_fragments_per_precursor']:.1f}")
896
print(f"  m/z range: {quality_metrics['mz_range']}")
897
print(f"  Charge distribution: {quality_metrics['charge_distribution']}")
898

899
# Generate comprehensive report
900
report_data = generate_library_report(loaded_lib, 'library_report.html')
901
print(f"Generated report with {len(report_data['sections'])} sections")
902
```
903

904
### Parallel Library Processing
905

906
```python
907
from alphabase.spectral_library.translate import WritingProcess
908

909
# Process large library with multiple workers
910
writer = WritingProcess(n_processes=8)
911

912
# Write library in parallel
913
writer.write_library_parallel(
914
    spec_lib=loaded_lib,
915
    output_path='large_library.tsv',
916
    format_type='tsv',
917
    chunk_size=50000
918
)
919

920
# Write multiple formats simultaneously
921
format_paths = writer.write_multiple_formats(
922
    spec_lib=loaded_lib,
923
    base_path='library',
924
    formats=['tsv', 'csv', 'msp']
925
)
926
print(f"Created formats: {format_paths}")
927
```
928

929
### Advanced Validation Workflows
930

931
```python
932
from alphabase.spectral_library.validate import Required, Optional, Column
933

934
# Create detailed column specifications
935
columns = [
936
    Required('sequence', data_type=str),
937
    Required('charge', data_type=int),
938
    Required('proteins', data_type=str),
939
    Optional('rt', data_type=float, default_value=0.0),
940
    Column('mz', required=True, data_type=float, min_value=100.0, max_value=2000.0),
941
    Column('intensity', required=False, data_type=float, min_value=0.0)
942
]
943

944
# Validate each column specification
945
validation_results = []
946
for col_spec in columns:
947
    result = col_spec.validate(loaded_lib.precursor_df)
948
    validation_results.append(result)
949
    print(f"Column {col_spec.name}: {'PASS' if result['valid'] else 'FAIL'}")
950

951
# Custom validation workflow
952
def validate_library_for_dia_analysis(spec_lib):
953
    """Custom validation for DIA analysis requirements."""
954
    issues = []
955
    
956
    # Check for minimum precursors
957
    if len(spec_lib.precursor_df) < 1000:
958
        issues.append("Insufficient precursors for DIA analysis")
959
    
960
    # Check charge distribution
961
    charge_dist = spec_lib.precursor_df['charge'].value_counts()
962
    if charge_dist.get(2, 0) / len(spec_lib.precursor_df) < 0.3:
963
        issues.append("Low proportion of doubly charged precursors")
964
    
965
    # Check m/z coverage
966
    mz_min = spec_lib.precursor_df['mz'].min()
967
    mz_max = spec_lib.precursor_df['mz'].max()
968
    if mz_max - mz_min < 500:
969
        issues.append("Limited m/z range coverage")
970
    
971
    return {
972
        'suitable_for_dia': len(issues) == 0,
973
        'issues': issues,
974
        'precursor_count': len(spec_lib.precursor_df),
975
        'mz_range': (mz_min, mz_max),
976
        'charge_distribution': charge_dist.to_dict()
977
    }
978

979
# Apply custom validation
980
dia_validation = validate_library_for_dia_analysis(loaded_lib)
981
print(f"DIA suitability: {dia_validation}")
982
```

Version

Tile

Files

advanced-spectral-libraries.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

advanced-spectral-libraries.mddocs/