0
# Fragment Ion Generation
1
2
Complete fragment ion series generation with support for multiple fragment types, neutral losses, and charge states. Enables creation of theoretical spectra for spectral library construction, peptide identification, and mass spectrometry data analysis workflows.
3
4
## Capabilities
5
6
### Fragment Type Definitions
7
8
Core classes and constants that define the available fragment ion types and their properties.
9
10
```python { .api }
11
class Direction:
12
"""Fragment direction constants."""
13
FORWARD: str = "forward" # N-terminal fragments (a, b, c)
14
REVERSE: str = "reverse" # C-terminal fragments (x, y, z)
15
16
class Loss:
17
"""Fragment loss type constants."""
18
MODLOSS: int = 0 # Modification loss
19
H2O: int = 1 # Water loss (-18.01056 Da)
20
NH3: int = 2 # Ammonia loss (-17.02655 Da)
21
MODLOSS_H2O: int = 3 # Modification + water loss
22
MODLOSS_NH3: int = 4 # Modification + ammonia loss
23
24
class Series:
25
"""Fragment series constants."""
26
A: int = 0 # a-ions (N-terminal, -CO)
27
B: int = 1 # b-ions (N-terminal)
28
C: int = 2 # c-ions (N-terminal, +NH3)
29
X: int = 3 # x-ions (C-terminal, +CO)
30
Y: int = 4 # y-ions (C-terminal)
31
Z: int = 5 # z-ions (C-terminal, -NH3)
32
33
class FragmentType:
34
"""Dataclass defining complete fragment type."""
35
series: int # Fragment series (A, B, C, X, Y, Z)
36
loss: int # Loss type (MODLOSS, H2O, NH3, etc.)
37
direction: int # Direction (FORWARD, REVERSE)
38
charge: int # Fragment charge state
39
40
# Available fragment types
41
FRAGMENT_TYPES: dict # Dictionary of all fragment type definitions
42
DIRECTION_MAPPING: dict = {"forward": 'forward', "reverse": 'reverse'}
43
LOSS_MAPPING: dict = {0: 'noloss', 1: 'H2O', 2: 'NH3', 3: 'modloss_H2O', 4: 'modloss_NH3'}
44
SERIES_MAPPING: dict = {0: 'a', 1: 'b', 2: 'c', 3: 'x', 4: 'y', 5: 'z'}
45
```
46
47
### Fragment Type Generation and Validation
48
49
Functions for creating, parsing, and validating fragment type combinations with charge states.
50
51
```python { .api }
52
def get_charged_frag_types(frag_types: List[str], charges: List[int]) -> List[str]:
53
"""
54
Generate charged fragment type combinations.
55
56
Parameters:
57
- frag_types: List of fragment types like ['b', 'y', 'b-H2O']
58
- charges: List of charge states like [1, 2, 3]
59
60
Returns:
61
List of charged fragment types like ['b+', 'b++', 'y+', 'y++', 'b-H2O+']
62
"""
63
64
def sort_charged_frag_types(frag_types: List[str]) -> List[str]:
65
"""
66
Sort fragment types by loss/no-loss categories.
67
68
Parameters:
69
- frag_types: List of charged fragment types
70
71
Returns:
72
Sorted list with no-loss fragments first, then losses
73
"""
74
75
def filter_valid_charged_frag_types(frag_types: List[str]) -> List[str]:
76
"""
77
Validate and filter fragment type list.
78
79
Parameters:
80
- frag_types: List of fragment type strings
81
82
Returns:
83
Filtered list with only valid fragment types
84
"""
85
86
def parse_charged_frag_type(frag_type: str) -> tuple[str, int]:
87
"""
88
Parse fragment type string and extract charge.
89
90
Parameters:
91
- frag_type: Fragment type like 'b++' or 'y-H2O+'
92
93
Returns:
94
Tuple of (base_type, charge) like ('b', 2) or ('y-H2O', 1)
95
"""
96
97
def sort_charged_frag_types(frag_types: List[str]) -> List[str]:
98
"""
99
Sort fragment types by loss/no-loss categories.
100
101
Parameters:
102
- frag_types: List of charged fragment types
103
104
Returns:
105
Sorted list with no-loss fragments first, then losses
106
"""
107
108
def filter_valid_charged_frag_types(frag_types: List[str]) -> List[str]:
109
"""
110
Validate and filter fragment type list.
111
112
Parameters:
113
- frag_types: List of fragment type strings
114
115
Returns:
116
Filtered list with only valid fragment types
117
"""
118
```
119
120
### Fragment DataFrame Creation
121
122
Functions for creating and initializing fragment DataFrames with proper structure and indexing.
123
124
```python { .api }
125
def init_zero_fragment_dataframe(precursor_df: pd.DataFrame,
126
frag_types: List[str]) -> pd.DataFrame:
127
"""
128
Initialize empty fragment DataFrame with zero intensities.
129
130
Parameters:
131
- precursor_df: Precursor DataFrame with sequence and charge info
132
- frag_types: List of fragment types to include
133
134
Returns:
135
DataFrame with fragment structure and zero intensities
136
"""
137
138
def init_fragment_dataframe_from_other(template_df: pd.DataFrame,
139
frag_types: List[str]) -> pd.DataFrame:
140
"""
141
Initialize fragment DataFrame from reference template.
142
143
Parameters:
144
- template_df: Template DataFrame with proper structure
145
- frag_types: List of fragment types
146
147
Returns:
148
New DataFrame with same structure but specified fragment types
149
"""
150
151
def init_fragment_by_precursor_dataframe(precursor_df: pd.DataFrame,
152
frag_types: List[str],
153
max_frag_charge: int = 2) -> pd.DataFrame:
154
"""
155
Initialize fragment DataFrame for precursor list.
156
157
Parameters:
158
- precursor_df: Precursor DataFrame
159
- frag_types: Fragment types to generate
160
- max_frag_charge: Maximum fragment charge to consider
161
162
Returns:
163
Complete fragment DataFrame with m/z calculations
164
"""
165
166
def create_fragment_mz_dataframe(precursor_df: pd.DataFrame,
167
frag_types: List[str],
168
max_frag_charge: int = 2) -> pd.DataFrame:
169
"""
170
Generate fragment m/z values for spectral library.
171
172
Parameters:
173
- precursor_df: Precursor DataFrame with sequences and modifications
174
- frag_types: List of fragment types like ['b+', 'y+', 'b++', 'y++']
175
- max_frag_charge: Maximum fragment charge state
176
177
Returns:
178
DataFrame with fragment m/z values and metadata
179
"""
180
```
181
182
### Fragment Processing and Optimization
183
184
Functions for processing, filtering, and optimizing fragment DataFrames for spectral libraries.
185
186
```python { .api }
187
def flatten_fragments(fragment_df: pd.DataFrame) -> pd.DataFrame:
188
"""
189
Convert tabular fragment data to linear format.
190
191
Parameters:
192
- fragment_df: Fragment DataFrame in tabular format
193
194
Returns:
195
Flattened DataFrame with one row per fragment
196
"""
197
198
def remove_unused_fragments(fragment_mz_df: pd.DataFrame,
199
fragment_intensity_df: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
200
"""
201
Compress fragment libraries by removing unused entries.
202
203
Parameters:
204
- fragment_mz_df: Fragment m/z DataFrame
205
- fragment_intensity_df: Fragment intensity DataFrame
206
207
Returns:
208
Tuple of (compressed_mz_df, compressed_intensity_df)
209
"""
210
211
def calc_fragment_count(fragment_df: pd.DataFrame) -> pd.Series:
212
"""
213
Count fragments per precursor.
214
215
Parameters:
216
- fragment_df: Fragment DataFrame
217
218
Returns:
219
Series with fragment counts indexed by precursor
220
"""
221
222
def filter_fragment_number(fragment_mz_df: pd.DataFrame,
223
fragment_intensity_df: pd.DataFrame,
224
top_k: int = 100) -> tuple[pd.DataFrame, pd.DataFrame]:
225
"""
226
Filter top-k fragments per precursor by intensity.
227
228
Parameters:
229
- fragment_mz_df: Fragment m/z DataFrame
230
- fragment_intensity_df: Fragment intensity DataFrame
231
- top_k: Number of top fragments to keep per precursor
232
233
Returns:
234
Tuple of (filtered_mz_df, filtered_intensity_df)
235
"""
236
237
def calc_fragment_cardinality(fragment_df: pd.DataFrame,
238
group_by: str = 'proteins') -> pd.DataFrame:
239
"""
240
Calculate fragment sharing statistics across groups.
241
242
Parameters:
243
- fragment_df: Fragment DataFrame
244
- group_by: Column to group by for cardinality calculation
245
246
Returns:
247
DataFrame with fragment sharing statistics
248
"""
249
```
250
251
### Mass Calculations
252
253
Direct mass calculation functions for peptide fragments and b/y ion series.
254
255
```python { .api }
256
def calc_b_y_and_peptide_mass(sequences: List[str],
257
mod_masses: np.ndarray = None) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
258
"""
259
Calculate b/y fragment ions and peptide mass simultaneously.
260
261
Parameters:
262
- sequences: List of peptide sequences
263
- mod_masses: Optional modification masses array
264
265
Returns:
266
Tuple of (b_masses, y_masses, peptide_masses)
267
"""
268
269
def calc_b_y_and_peptide_masses_for_same_len_seqs(sequences: List[str],
270
mod_masses: np.ndarray = None) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
271
"""
272
Batch b/y ion and peptide mass calculation for equal-length sequences.
273
274
Parameters:
275
- sequences: List of equal-length peptide sequences
276
- mod_masses: Optional modification masses array
277
278
Returns:
279
Tuple of (b_masses, y_masses, peptide_masses) with optimized layout
280
"""
281
282
def calc_peptide_masses_for_same_len_seqs(sequences: List[str],
283
mod_masses: np.ndarray = None) -> np.ndarray:
284
"""
285
Calculate peptide masses for equal-length sequences.
286
287
Parameters:
288
- sequences: List of equal-length peptide sequences
289
- mod_masses: Optional modification masses array
290
291
Returns:
292
1D numpy array with peptide masses
293
"""
294
295
def calc_diff_modification_mass(mod_sequences: List[str]) -> np.ndarray:
296
"""
297
Calculate mass differences for open search workflows.
298
299
Parameters:
300
- mod_sequences: List of modified sequences
301
302
Returns:
303
Array with mass differences from unmodified peptides
304
"""
305
```
306
307
## Usage Examples
308
309
### Basic Fragment Generation
310
311
```python
312
from alphabase.peptide.fragment import get_charged_frag_types, create_fragment_mz_dataframe
313
import pandas as pd
314
315
# Define fragment types and charges
316
base_types = ['b', 'y', 'b-H2O', 'y-H2O']
317
charges = [1, 2]
318
frag_types = get_charged_frag_types(base_types, charges)
319
print(f"Fragment types: {frag_types}")
320
# Output: ['b+', 'b++', 'y+', 'y++', 'b-H2O+', 'b-H2O++', 'y-H2O+', 'y-H2O++']
321
322
# Create precursor DataFrame
323
precursor_df = pd.DataFrame({
324
'sequence': ['PEPTIDE', 'SEQUENCE', 'EXAMPLE'],
325
'mods': ['', 'Phospho (STY)@2', ''],
326
'charge': [2, 3, 2],
327
'proteins': ['P12345', 'P67890', 'P11111']
328
})
329
330
# Generate fragment m/z values
331
fragment_mz_df = create_fragment_mz_dataframe(
332
precursor_df=precursor_df,
333
frag_types=frag_types,
334
max_frag_charge=2
335
)
336
337
print(f"Generated {len(fragment_mz_df)} fragment entries")
338
```
339
340
### Working with Fragment Types
341
342
```python
343
from alphabase.peptide.fragment import FragmentType, Series, Loss, Direction
344
345
# Create specific fragment type
346
frag_type = FragmentType(
347
series=Series.B, # b-ion
348
loss=Loss.H2O, # with water loss
349
direction=Direction.FORWARD, # N-terminal
350
charge=2 # doubly charged
351
)
352
353
print(f"Fragment: {frag_type}")
354
355
# Parse fragment type string
356
from alphabase.peptide.fragment import parse_charged_frag_type
357
358
base_type, charge = parse_charged_frag_type('y-NH3++')
359
print(f"Base type: {base_type}, Charge: {charge}")
360
# Output: Base type: y-NH3, Charge: 2
361
```
362
363
### Mass Calculations
364
365
```python
366
from alphabase.peptide.fragment import calc_b_y_and_peptide_mass
367
import numpy as np
368
369
# Calculate b/y ions and peptide masses
370
sequences = ['PEPTIDE', 'SEQUENCE']
371
b_masses, y_masses, peptide_masses = calc_b_y_and_peptide_mass(sequences)
372
373
print(f"B-ion masses shape: {b_masses.shape}")
374
print(f"Y-ion masses shape: {y_masses.shape}")
375
print(f"Peptide masses: {peptide_masses}")
376
```
377
378
### Fragment Library Processing
379
380
```python
381
from alphabase.peptide.fragment import filter_fragment_number, remove_unused_fragments
382
383
# Assume we have fragment DataFrames
384
# fragment_mz_df and fragment_intensity_df from previous steps
385
386
# Keep only top 50 fragments per precursor
387
top_mz_df, top_intensity_df = filter_fragment_number(
388
fragment_mz_df, fragment_intensity_df, top_k=50
389
)
390
391
# Remove unused fragment entries
392
compressed_mz_df, compressed_intensity_df = remove_unused_fragments(
393
top_mz_df, top_intensity_df
394
)
395
396
print(f"Original fragments: {len(fragment_mz_df)}")
397
print(f"After filtering: {len(top_mz_df)}")
398
print(f"After compression: {len(compressed_mz_df)}")
399
```
400
401
### Advanced Fragment Analysis
402
403
```python
404
from alphabase.peptide.fragment import calc_fragment_cardinality, calc_fragment_count
405
406
# Calculate fragment counts per precursor
407
frag_counts = calc_fragment_count(fragment_mz_df)
408
print(f"Average fragments per precursor: {frag_counts.mean():.1f}")
409
410
# Analyze fragment sharing across proteins
411
cardinality_df = calc_fragment_cardinality(
412
fragment_mz_df, group_by='proteins'
413
)
414
print(f"Fragment cardinality analysis:\n{cardinality_df.head()}")
415
```