0
# Feature Detection and Quantification
1
2
Advanced algorithms for detecting LC-MS features including peak picking, feature finding, and quantitative analysis. These algorithms form the foundation for label-free quantification and comparative proteomics/metabolomics studies.
3
4
## Capabilities
5
6
### Peak Picking
7
8
#### High-Resolution Peak Picking
9
10
```python { .api }
11
class PeakPickerHiRes:
12
def __init__(self) -> None: ...
13
14
def pickExperiment(self, input: MSExperiment, output: MSExperiment) -> None:
15
"""
16
Pick peaks in entire MS experiment.
17
18
Args:
19
input (MSExperiment): Input experiment with profile spectra
20
output (MSExperiment): Output experiment with picked peaks
21
"""
22
23
def pick(self, input: MSSpectrum, output: MSSpectrum) -> None:
24
"""
25
Pick peaks in single spectrum.
26
27
Args:
28
input (MSSpectrum): Input profile spectrum
29
output (MSSpectrum): Output picked spectrum
30
"""
31
32
def getParameters(self) -> Param:
33
"""
34
Get algorithm parameters.
35
36
Returns:
37
Param: Parameter object for configuration
38
"""
39
40
def setParameters(self, param: Param) -> None:
41
"""
42
Set algorithm parameters.
43
44
Args:
45
param (Param): Parameter configuration
46
"""
47
48
class PeakPickerMaxima:
49
def __init__(self) -> None: ...
50
51
def pick(self, input: MSSpectrum, output: MSSpectrum) -> None:
52
"""
53
Pick peaks using local maxima method.
54
55
Args:
56
input (MSSpectrum): Input profile spectrum
57
output (MSSpectrum): Output picked spectrum
58
"""
59
60
def getParameters(self) -> Param:
61
"""Get algorithm parameters."""
62
63
def setParameters(self, param: Param) -> None:
64
"""Set algorithm parameters."""
65
```
66
67
### Feature Detection Algorithms
68
69
#### Centroided Data Feature Detection
70
71
```python { .api }
72
class FeatureFinderAlgorithmPicked:
73
def __init__(self) -> None: ...
74
75
def run(self, input: MSExperiment, features: FeatureMap,
76
params: Param, seeds: FeatureMap) -> None:
77
"""
78
Detect features in picked/centroided MS data.
79
80
Args:
81
input (MSExperiment): Input experiment with picked peaks
82
features (FeatureMap): Output feature map
83
params (Param): Algorithm parameters
84
seeds (FeatureMap): Feature seeds (optional)
85
"""
86
87
def getParameters(self) -> Param:
88
"""
89
Get default parameters.
90
91
Returns:
92
Param: Default parameter configuration
93
"""
94
95
class FeatureFinderAlgorithmIsotopeWavelet:
96
def __init__(self) -> None: ...
97
98
def run(self, input: MSExperiment, features: FeatureMap,
99
params: Param, seeds: FeatureMap) -> None:
100
"""
101
Detect features using isotope wavelet method.
102
103
Args:
104
input (MSExperiment): Input experiment
105
features (FeatureMap): Output feature map
106
params (Param): Algorithm parameters
107
seeds (FeatureMap): Feature seeds (optional)
108
"""
109
```
110
111
#### Advanced Feature Detection
112
113
```python { .api }
114
class FeatureFinderMultiplex:
115
def __init__(self) -> None: ...
116
117
def run(self, input: MSExperiment, features: FeatureMap,
118
params: Param) -> None:
119
"""
120
Detect multiplex-labeled features.
121
122
Args:
123
input (MSExperiment): Input experiment
124
features (FeatureMap): Output feature map
125
params (Param): Algorithm parameters including label information
126
"""
127
128
class FeatureFinderIdentification:
129
def __init__(self) -> None: ...
130
131
def run(self, input: MSExperiment, peptide_ids: list,
132
features: FeatureMap, params: Param) -> None:
133
"""
134
Extract features based on peptide identifications.
135
136
Args:
137
input (MSExperiment): Input experiment
138
peptide_ids (list[PeptideIdentification]): Peptide identifications
139
features (FeatureMap): Output feature map
140
params (Param): Algorithm parameters
141
"""
142
```
143
144
### Feature Data Structures
145
146
#### Feature
147
148
Individual detected feature with quantitative information.
149
150
```python { .api }
151
class Feature:
152
def __init__(self) -> None: ...
153
154
def getRT(self) -> float:
155
"""
156
Get retention time of feature centroid.
157
158
Returns:
159
float: Retention time in seconds
160
"""
161
162
def setRT(self, rt: float) -> None:
163
"""
164
Set retention time.
165
166
Args:
167
rt (float): Retention time in seconds
168
"""
169
170
def getMZ(self) -> float:
171
"""
172
Get m/z of feature centroid.
173
174
Returns:
175
float: m/z value
176
"""
177
178
def setMZ(self, mz: float) -> None:
179
"""
180
Set m/z value.
181
182
Args:
183
mz (float): m/z value
184
"""
185
186
def getIntensity(self) -> float:
187
"""
188
Get feature intensity.
189
190
Returns:
191
float: Feature intensity
192
"""
193
194
def setIntensity(self, intensity: float) -> None:
195
"""
196
Set feature intensity.
197
198
Args:
199
intensity (float): Feature intensity
200
"""
201
202
def getOverallQuality(self) -> float:
203
"""
204
Get overall feature quality score.
205
206
Returns:
207
float: Quality score (0-1)
208
"""
209
210
def setOverallQuality(self, quality: float) -> None:
211
"""
212
Set overall quality score.
213
214
Args:
215
quality (float): Quality score
216
"""
217
218
def getCharge(self) -> int:
219
"""
220
Get feature charge state.
221
222
Returns:
223
int: Charge state
224
"""
225
226
def setCharge(self, charge: int) -> None:
227
"""
228
Set charge state.
229
230
Args:
231
charge (int): Charge state
232
"""
233
234
def getConvexHull(self) -> ConvexHull2D:
235
"""
236
Get feature convex hull boundary.
237
238
Returns:
239
ConvexHull2D: 2D convex hull in RT/m/z space
240
"""
241
242
def setConvexHull(self, hull: ConvexHull2D) -> None:
243
"""
244
Set convex hull boundary.
245
246
Args:
247
hull (ConvexHull2D): 2D convex hull
248
"""
249
250
def getSubordinates(self) -> list[Feature]:
251
"""
252
Get subordinate features (isotope peaks, etc.).
253
254
Returns:
255
list[Feature]: List of subordinate features
256
"""
257
258
def getPeptideIdentifications(self) -> list[PeptideIdentification]:
259
"""
260
Get peptide identifications assigned to feature.
261
262
Returns:
263
list[PeptideIdentification]: Assigned identifications
264
"""
265
266
def setPeptideIdentifications(self, ids: list[PeptideIdentification]) -> None:
267
"""
268
Set peptide identifications.
269
270
Args:
271
ids (list[PeptideIdentification]): Identifications to assign
272
"""
273
274
def getUniqueId(self) -> int:
275
"""
276
Get unique feature identifier.
277
278
Returns:
279
int: Unique ID
280
"""
281
```
282
283
#### FeatureMap
284
285
Container for multiple features with metadata.
286
287
```python { .api }
288
class FeatureMap:
289
def __init__(self) -> None: ...
290
291
def size(self) -> int:
292
"""
293
Get number of features.
294
295
Returns:
296
int: Number of features
297
"""
298
299
def empty(self) -> bool:
300
"""
301
Check if feature map is empty.
302
303
Returns:
304
bool: True if no features
305
"""
306
307
def push_back(self, feature: Feature) -> None:
308
"""
309
Add feature to map.
310
311
Args:
312
feature (Feature): Feature to add
313
"""
314
315
def __getitem__(self, index: int) -> Feature:
316
"""
317
Get feature by index.
318
319
Args:
320
index (int): Feature index
321
322
Returns:
323
Feature: The feature at given index
324
"""
325
326
def clear(self) -> None:
327
"""Remove all features."""
328
329
def updateRanges(self) -> None:
330
"""Update RT/m/z ranges from features."""
331
332
def getMinRT(self) -> float:
333
"""Get minimum retention time."""
334
335
def getMaxRT(self) -> float:
336
"""Get maximum retention time."""
337
338
def getMinMZ(self) -> float:
339
"""Get minimum m/z."""
340
341
def getMaxMZ(self) -> float:
342
"""Get maximum m/z."""
343
344
def sortByRT(self) -> None:
345
"""Sort features by retention time."""
346
347
def sortByMZ(self) -> None:
348
"""Sort features by m/z."""
349
350
def sortByIntensity(self, reverse: bool = True) -> None:
351
"""
352
Sort features by intensity.
353
354
Args:
355
reverse (bool): Sort in descending order
356
"""
357
358
def getProteinIdentifications(self) -> list[ProteinIdentification]:
359
"""
360
Get protein identifications.
361
362
Returns:
363
list[ProteinIdentification]: Protein IDs
364
"""
365
366
def setProteinIdentifications(self, ids: list[ProteinIdentification]) -> None:
367
"""
368
Set protein identifications.
369
370
Args:
371
ids (list[ProteinIdentification]): Protein IDs
372
"""
373
374
def get_df(self, meta_values: list[str] = None,
375
export_peptide_identifications: bool = True) -> DataFrame:
376
"""
377
Export features to pandas DataFrame.
378
379
Args:
380
meta_values (list[str]): Meta values to include
381
export_peptide_identifications (bool): Include peptide ID info
382
383
Returns:
384
DataFrame: Feature data in tabular format
385
"""
386
387
def get_assigned_peptide_identifications(self) -> list[PeptideIdentification]:
388
"""
389
Get all peptide identifications assigned to features.
390
391
Returns:
392
list[PeptideIdentification]: Assigned peptide IDs
393
"""
394
```
395
396
### Signal Processing
397
398
#### Filtering and Smoothing
399
400
```python { .api }
401
class GaussFilter:
402
def __init__(self) -> None: ...
403
404
def filter(self, input: MSExperiment, output: MSExperiment) -> None:
405
"""
406
Apply Gaussian smoothing to experiment.
407
408
Args:
409
input (MSExperiment): Input experiment
410
output (MSExperiment): Smoothed output experiment
411
"""
412
413
def getParameters(self) -> Param:
414
"""Get filter parameters."""
415
416
def setParameters(self, param: Param) -> None:
417
"""Set filter parameters."""
418
419
class SavitzkyGolayFilter:
420
def __init__(self) -> None: ...
421
422
def filter(self, input: MSSpectrum, output: MSSpectrum) -> None:
423
"""
424
Apply Savitzky-Golay smoothing to spectrum.
425
426
Args:
427
input (MSSpectrum): Input spectrum
428
output (MSSpectrum): Smoothed spectrum
429
"""
430
431
class LinearResampler:
432
def __init__(self) -> None: ...
433
434
def rasterize(self, input: MSExperiment, output: MSExperiment,
435
min_mz: float, max_mz: float, spacing: float) -> None:
436
"""
437
Resample spectra to regular m/z grid.
438
439
Args:
440
input (MSExperiment): Input experiment
441
output (MSExperiment): Resampled experiment
442
min_mz (float): Minimum m/z
443
max_mz (float): Maximum m/z
444
spacing (float): m/z spacing
445
"""
446
```
447
448
### Quality Assessment
449
450
#### Feature Quality Metrics
451
452
```python { .api }
453
class FeatureQuality:
454
def __init__(self) -> None: ...
455
456
def compute(self, feature: Feature, experiment: MSExperiment) -> None:
457
"""
458
Compute quality metrics for feature.
459
460
Args:
461
feature (Feature): Feature to assess
462
experiment (MSExperiment): Source experiment
463
"""
464
465
def getParameters(self) -> Param:
466
"""Get quality assessment parameters."""
467
468
class QcBase:
469
def __init__(self) -> None: ...
470
471
def compute(self, feature_map: FeatureMap, experiment: MSExperiment,
472
protein_ids: list, peptide_ids: list) -> None:
473
"""
474
Compute quality control metrics.
475
476
Args:
477
feature_map (FeatureMap): Detected features
478
experiment (MSExperiment): Source experiment
479
protein_ids (list[ProteinIdentification]): Protein IDs
480
peptide_ids (list[PeptideIdentification]): Peptide IDs
481
"""
482
```
483
484
## Usage Examples
485
486
### Complete Feature Detection Workflow
487
488
```python
489
import pyopenms
490
491
# Load raw data
492
exp = pyopenms.MSExperiment()
493
pyopenms.MzMLFile().load("raw_data.mzML", exp)
494
495
# Peak picking
496
picker = pyopenms.PeakPickerHiRes()
497
picked_exp = pyopenms.MSExperiment()
498
picker.pickExperiment(exp, picked_exp)
499
500
# Feature detection
501
features = pyopenms.FeatureMap()
502
finder = pyopenms.FeatureFinderAlgorithmPicked()
503
504
# Configure parameters
505
params = finder.getParameters()
506
params.setValue("mass_trace:mz_tolerance", 0.004)
507
params.setValue("mass_trace:min_spectra", 10)
508
params.setValue("isotopic_pattern:charge_low", 1)
509
params.setValue("isotopic_pattern:charge_high", 4)
510
511
# Run feature detection
512
seeds = pyopenms.FeatureMap() # Empty seed map
513
finder.run(picked_exp, features, params, seeds)
514
515
print(f"Detected {features.size()} features")
516
517
# Sort by intensity and show top features
518
features.sortByIntensity(reverse=True)
519
for i in range(min(10, features.size())):
520
feature = features[i]
521
print(f"Feature {i+1}: RT={feature.getRT():.2f}, "
522
f"m/z={feature.getMZ():.4f}, "
523
f"Intensity={feature.getIntensity():.0f}, "
524
f"Charge={feature.getCharge()}")
525
526
# Save results
527
pyopenms.FeatureXMLFile().store("features.featureXML", features)
528
```
529
530
### Feature-Based Quantification
531
532
```python
533
import pyopenms
534
535
# Load features from multiple runs
536
feature_maps = []
537
file_names = ["run1_features.featureXML", "run2_features.featureXML", "run3_features.featureXML"]
538
539
for filename in file_names:
540
features = pyopenms.FeatureMap()
541
pyopenms.FeatureXMLFile().load(filename, features)
542
feature_maps.append(features)
543
544
# Convert to DataFrames for analysis
545
dfs = []
546
for i, features in enumerate(feature_maps):
547
df = features.get_df()
548
df['run'] = f'run_{i+1}'
549
dfs.append(df)
550
551
# Combine all runs
552
import pandas as pd
553
combined_df = pd.concat(dfs, ignore_index=True)
554
555
# Basic statistics
556
print("Feature count per run:")
557
print(combined_df.groupby('run').size())
558
559
print("\nIntensity statistics:")
560
print(combined_df.groupby('run')['intensity'].describe())
561
```
562
563
### Identification-Guided Feature Extraction
564
565
```python
566
import pyopenms
567
568
# Load raw data and identifications
569
exp = pyopenms.MSExperiment()
570
pyopenms.MzMLFile().load("data.mzML", exp)
571
572
protein_ids = []
573
peptide_ids = []
574
pyopenms.IdXMLFile().load("identifications.idXML", protein_ids, peptide_ids)
575
576
# Use identifications to guide feature detection
577
finder = pyopenms.FeatureFinderIdentification()
578
features = pyopenms.FeatureMap()
579
580
# Configure parameters for ID-based extraction
581
params = finder.getParameters()
582
params.setValue("extract:mz_window", 0.01)
583
params.setValue("extract:rt_window", 60.0)
584
585
# Run ID-guided feature extraction
586
finder.run(exp, peptide_ids, features, params)
587
588
print(f"Extracted {features.size()} features based on {len(peptide_ids)} identifications")
589
590
# Export combined feature and identification data
591
df = features.get_df(export_peptide_identifications=True)
592
print(df[['RT', 'mz', 'intensity', 'peptide_sequence', 'peptide_score']].head())
593
```
594
595
### Advanced Peak Picking Configuration
596
597
```python
598
import pyopenms
599
600
# Configure high-resolution peak picker
601
picker = pyopenms.PeakPickerHiRes()
602
params = picker.getParameters()
603
604
# Fine-tune parameters for high-resolution data
605
params.setValue("signal_to_noise", 1.0)
606
params.setValue("spacing_difference_gap", 4.0)
607
params.setValue("spacing_difference", 1.5)
608
params.setValue("missing", 1)
609
params.setValue("ms_levels", [1, 2]) # Pick both MS1 and MS2
610
611
picker.setParameters(params)
612
613
# Apply to experiment
614
exp = pyopenms.MSExperiment()
615
pyopenms.MzMLFile().load("profile_data.mzML", exp)
616
617
picked_exp = pyopenms.MSExperiment()
618
picker.pickExperiment(exp, picked_exp)
619
620
print(f"Input: {sum(s.size() for s in exp)} total peaks")
621
print(f"Output: {sum(s.size() for s in picked_exp)} picked peaks")
622
623
# Save picked data
624
pyopenms.MzMLFile().store("picked_data.mzML", picked_exp)
625
```