0
# File I/O and Data Formats
1
2
Comprehensive support for mass spectrometry file formats with full metadata preservation. pyOpenMS handles the most common formats in proteomics and metabolomics including vendor-neutral standards and identification results.
3
4
## Capabilities
5
6
### Primary Data Formats
7
8
#### mzML Files
9
10
Standard format for raw mass spectrometry data with complete metadata support.
11
12
```python { .api }
13
class MzMLFile:
14
def __init__(self) -> None: ...
15
16
def load(self, filename: str, exp: MSExperiment) -> None:
17
"""
18
Load mzML file into MSExperiment.
19
20
Args:
21
filename (str): Path to mzML file
22
exp (MSExperiment): MSExperiment object to populate
23
"""
24
25
def store(self, filename: str, exp: MSExperiment) -> None:
26
"""
27
Store MSExperiment to mzML file.
28
29
Args:
30
filename (str): Output file path
31
exp (MSExperiment): MSExperiment to save
32
"""
33
34
def loadBuffer(self, buffer: str, exp: MSExperiment) -> None:
35
"""
36
Load mzML from string buffer.
37
38
Args:
39
buffer (str): mzML content as string
40
exp (MSExperiment): MSExperiment object to populate
41
"""
42
43
def storeBuffer(self, exp: MSExperiment) -> str:
44
"""
45
Store MSExperiment to string buffer.
46
47
Args:
48
exp (MSExperiment): MSExperiment to serialize
49
50
Returns:
51
str: mzML content as string
52
"""
53
54
def loadSize(self, filename: str) -> tuple[int, int]:
55
"""
56
Count spectra and chromatograms without full loading.
57
58
Args:
59
filename (str): Path to mzML file
60
61
Returns:
62
tuple: (number_of_spectra, number_of_chromatograms)
63
"""
64
65
def getOptions(self) -> PeakFileOptions:
66
"""
67
Get file loading/storing options.
68
69
Returns:
70
PeakFileOptions: Current file options
71
"""
72
73
def setOptions(self, options: PeakFileOptions) -> None:
74
"""
75
Set file loading/storing options.
76
77
Args:
78
options (PeakFileOptions): File options to set
79
"""
80
81
def isSemanticallyValid(self, filename: str, errors: list, warnings: list) -> bool:
82
"""
83
Validate mzML file semantics.
84
85
Args:
86
filename (str): Path to mzML file
87
errors (list): List to populate with error messages
88
warnings (list): List to populate with warning messages
89
90
Returns:
91
bool: True if valid, False otherwise
92
"""
93
```
94
95
#### mzXML Files
96
97
Legacy format still widely used for mass spectrometry data.
98
99
```python { .api }
100
class MzXMLFile:
101
def __init__(self) -> None: ...
102
103
def load(self, filename: str, exp: MSExperiment) -> None:
104
"""
105
Load mzXML file into MSExperiment.
106
107
Args:
108
filename (str): Path to mzXML file
109
exp (MSExperiment): MSExperiment object to populate
110
"""
111
112
def store(self, filename: str, exp: MSExperiment) -> None:
113
"""
114
Store MSExperiment to mzXML file.
115
116
Args:
117
filename (str): Output file path
118
exp (MSExperiment): MSExperiment to save
119
"""
120
```
121
122
### Feature Data Formats
123
124
#### Feature Detection Results
125
126
```python { .api }
127
class FeatureXMLFile:
128
def __init__(self) -> None: ...
129
130
def load(self, filename: str, features: FeatureMap) -> None:
131
"""
132
Load feature detection results from featureXML file.
133
134
Args:
135
filename (str): Path to featureXML file
136
features (FeatureMap): FeatureMap to populate
137
"""
138
139
def store(self, filename: str, features: FeatureMap) -> None:
140
"""
141
Store FeatureMap to featureXML file.
142
143
Args:
144
filename (str): Output file path
145
features (FeatureMap): FeatureMap to save
146
"""
147
148
def loadSize(self, filename: str) -> int:
149
"""
150
Count features without full loading.
151
152
Args:
153
filename (str): Path to featureXML file
154
155
Returns:
156
int: Number of features in file
157
"""
158
159
def getOptions(self) -> FeatureFileOptions:
160
"""
161
Get file loading/storing options.
162
163
Returns:
164
FeatureFileOptions: Current file options
165
"""
166
167
def setOptions(self, options: FeatureFileOptions) -> None:
168
"""
169
Set file loading/storing options.
170
171
Args:
172
options (FeatureFileOptions): File options to set
173
"""
174
```
175
176
#### Consensus Features
177
178
```python { .api }
179
class ConsensusXMLFile:
180
def __init__(self) -> None: ...
181
182
def load(self, filename: str, consensus: ConsensusMap) -> None:
183
"""
184
Load consensus features from consensusXML file.
185
186
Args:
187
filename (str): Path to consensusXML file
188
consensus (ConsensusMap): ConsensusMap to populate
189
"""
190
191
def store(self, filename: str, consensus: ConsensusMap) -> None:
192
"""
193
Store ConsensusMap to consensusXML file.
194
195
Args:
196
filename (str): Output file path
197
consensus (ConsensusMap): ConsensusMap to save
198
"""
199
```
200
201
### Identification Formats
202
203
#### OpenMS Identification Format
204
205
```python { .api }
206
class IdXMLFile:
207
def __init__(self) -> None: ...
208
209
def load(self, filename: str, protein_ids: list, peptide_ids: list) -> None:
210
"""
211
Load identification results from idXML file.
212
213
Args:
214
filename (str): Path to idXML file
215
protein_ids (list[ProteinIdentification]): List to populate with protein IDs
216
peptide_ids (list[PeptideIdentification]): List to populate with peptide IDs
217
"""
218
219
def store(self, filename: str, protein_ids: list, peptide_ids: list) -> None:
220
"""
221
Store identification results to idXML file.
222
223
Args:
224
filename (str): Output file path
225
protein_ids (list[ProteinIdentification]): Protein identifications
226
peptide_ids (list[PeptideIdentification]): Peptide identifications
227
"""
228
```
229
230
#### Standard Identification Formats
231
232
```python { .api }
233
class MzIdentMLFile:
234
def __init__(self) -> None: ...
235
236
def load(self, filename: str, protein_ids: list, peptide_ids: list) -> None:
237
"""
238
Load mzIdentML identification file.
239
240
Args:
241
filename (str): Path to mzIdentML file
242
protein_ids (list[ProteinIdentification]): List to populate
243
peptide_ids (list[PeptideIdentification]): List to populate
244
"""
245
246
def store(self, filename: str, protein_ids: list, peptide_ids: list) -> None:
247
"""
248
Store to mzIdentML format.
249
250
Args:
251
filename (str): Output file path
252
protein_ids (list[ProteinIdentification]): Protein identifications
253
peptide_ids (list[PeptideIdentification]): Peptide identifications
254
"""
255
256
class PepXMLFile:
257
def __init__(self) -> None: ...
258
259
def load(self, filename: str, protein_ids: list, peptide_ids: list) -> None:
260
"""
261
Load pepXML identification file.
262
263
Args:
264
filename (str): Path to pepXML file
265
protein_ids (list[ProteinIdentification]): List to populate
266
peptide_ids (list[PeptideIdentification]): List to populate
267
"""
268
269
class ProtXMLFile:
270
def __init__(self) -> None: ...
271
272
def load(self, filename: str, protein_ids: list, peptide_ids: list) -> None:
273
"""
274
Load protXML protein identification file.
275
276
Args:
277
filename (str): Path to protXML file
278
protein_ids (list[ProteinIdentification]): List to populate
279
peptide_ids (list[PeptideIdentification]): List to populate
280
"""
281
```
282
283
### Spectral Data Formats
284
285
#### Simple Spectrum Formats
286
287
```python { .api }
288
class DTAFile:
289
def __init__(self) -> None: ...
290
291
def load(self, filename: str, spectrum: MSSpectrum) -> None:
292
"""
293
Load DTA spectrum file.
294
295
Args:
296
filename (str): Path to DTA file
297
spectrum (MSSpectrum): Spectrum to populate
298
"""
299
300
def store(self, filename: str, spectrum: MSSpectrum) -> None:
301
"""
302
Store spectrum to DTA file.
303
304
Args:
305
filename (str): Output file path
306
spectrum (MSSpectrum): Spectrum to save
307
"""
308
309
class MGFFile:
310
def __init__(self) -> None: ...
311
312
def load(self, filename: str, exp: MSExperiment) -> None:
313
"""
314
Load Mascot Generic Format file.
315
316
Args:
317
filename (str): Path to MGF file
318
exp (MSExperiment): Experiment to populate
319
"""
320
321
def store(self, filename: str, exp: MSExperiment) -> None:
322
"""
323
Store experiment to MGF file.
324
325
Args:
326
filename (str): Output file path
327
exp (MSExperiment): Experiment to save
328
"""
329
330
class MSPFile:
331
def __init__(self) -> None: ...
332
333
def load(self, filename: str, exp: MSExperiment) -> None:
334
"""
335
Load MSP spectral library file.
336
337
Args:
338
filename (str): Path to MSP file
339
exp (MSExperiment): Experiment to populate
340
"""
341
```
342
343
### Sequence Database Formats
344
345
#### FASTA Files
346
347
```python { .api }
348
class FASTAFile:
349
def __init__(self) -> None: ...
350
351
def load(self, filename: str, data: list) -> None:
352
"""
353
Load FASTA protein database.
354
355
Args:
356
filename (str): Path to FASTA file
357
data (list[FASTAEntry]): List to populate with entries
358
"""
359
360
def store(self, filename: str, data: list) -> None:
361
"""
362
Store protein sequences to FASTA file.
363
364
Args:
365
filename (str): Output file path
366
data (list[FASTAEntry]): FASTA entries to save
367
"""
368
369
class FASTAEntry:
370
def __init__(self, identifier: str = "", description: str = "", sequence: str = "") -> None: ...
371
def getIdentifier(self) -> str: ...
372
def getDescription(self) -> str: ...
373
def getSequence(self) -> str: ...
374
def setIdentifier(self, identifier: str) -> None: ...
375
def setDescription(self, description: str) -> None: ...
376
def setSequence(self, sequence: str) -> None: ...
377
```
378
379
### Quantification Formats
380
381
#### mzTab Format
382
383
```python { .api }
384
class MzTabFile:
385
def __init__(self) -> None: ...
386
387
def load(self, filename: str, mztab: MzTab) -> None:
388
"""
389
Load mzTab quantification file.
390
391
Args:
392
filename (str): Path to mzTab file
393
mztab (MzTab): MzTab object to populate
394
"""
395
396
def store(self, filename: str, mztab: MzTab) -> None:
397
"""
398
Store quantification results to mzTab file.
399
400
Args:
401
filename (str): Output file path
402
mztab (MzTab): MzTab data to save
403
"""
404
```
405
406
### Targeted Analysis Formats
407
408
#### Transition Lists
409
410
```python { .api }
411
class TraMLFile:
412
def __init__(self) -> None: ...
413
414
def load(self, filename: str, targeted_exp: TargetedExperiment) -> None:
415
"""
416
Load TraML transition list file.
417
418
Args:
419
filename (str): Path to TraML file
420
targeted_exp (TargetedExperiment): TargetedExperiment to populate
421
"""
422
423
def store(self, filename: str, targeted_exp: TargetedExperiment) -> None:
424
"""
425
Store transition list to TraML file.
426
427
Args:
428
filename (str): Output file path
429
targeted_exp (TargetedExperiment): TargetedExperiment to save
430
"""
431
```
432
433
### Cached and Indexed Access
434
435
#### Memory-Efficient File Access
436
437
```python { .api }
438
class CachedmzML:
439
def __init__(self, filename: str) -> None:
440
"""
441
Create cached mzML file handler for large files.
442
443
Args:
444
filename (str): Path to mzML file
445
"""
446
447
def getNrSpectra(self) -> int:
448
"""Get number of spectra in file."""
449
450
def getSpectrum(self, id: int) -> MSSpectrum:
451
"""
452
Get spectrum by index.
453
454
Args:
455
id (int): Spectrum index
456
457
Returns:
458
MSSpectrum: The requested spectrum
459
"""
460
461
def getNrChromatograms(self) -> int:
462
"""Get number of chromatograms in file."""
463
464
def getChromatogram(self, id: int) -> MSChromatogram:
465
"""
466
Get chromatogram by index.
467
468
Args:
469
id (int): Chromatogram index
470
471
Returns:
472
MSChromatogram: The requested chromatogram
473
"""
474
475
class IndexedMzMLHandler:
476
def __init__(self, filename: str) -> None:
477
"""
478
Create indexed mzML handler for random access.
479
480
Args:
481
filename (str): Path to indexed mzML file
482
"""
483
484
def getSpectrumByRT(self, rt: float) -> MSSpectrum:
485
"""
486
Get spectrum closest to retention time.
487
488
Args:
489
rt (float): Target retention time
490
491
Returns:
492
MSSpectrum: Closest spectrum
493
"""
494
```
495
496
### File Format Detection
497
498
#### Automatic Format Detection
499
500
```python { .api }
501
class FileHandler:
502
@staticmethod
503
def getType(filename: str) -> Type:
504
"""
505
Detect file type from filename or content.
506
507
Args:
508
filename (str): Path to file
509
510
Returns:
511
Type: Detected file type
512
"""
513
514
@staticmethod
515
def getTypeByContent(filename: str) -> Type:
516
"""
517
Detect file type by examining file content.
518
519
Args:
520
filename (str): Path to file
521
522
Returns:
523
Type: Detected file type
524
"""
525
526
class Type:
527
UNKNOWN = 0
528
MZML = 1
529
MZXML = 2
530
FEATUREXML = 3
531
CONSENSUSXML = 4
532
IDXML = 5
533
MZIDENTML = 6
534
PEPXML = 7
535
PROTXML = 8
536
FASTA = 9
537
DTA = 10
538
MGF = 11
539
MSP = 12
540
TRAML = 13
541
MZTAB = 14
542
```
543
544
## Usage Examples
545
546
### Basic File Loading
547
548
```python
549
import pyopenms
550
551
# Load mzML file
552
exp = pyopenms.MSExperiment()
553
pyopenms.MzMLFile().load("data.mzML", exp)
554
print(f"Loaded {exp.size()} spectra")
555
556
# Load features
557
features = pyopenms.FeatureMap()
558
pyopenms.FeatureXMLFile().load("features.featureXML", features)
559
print(f"Loaded {features.size()} features")
560
561
# Load identifications
562
protein_ids = []
563
peptide_ids = []
564
pyopenms.IdXMLFile().load("identifications.idXML", protein_ids, peptide_ids)
565
print(f"Loaded {len(protein_ids)} protein IDs, {len(peptide_ids)} peptide IDs")
566
```
567
568
### Cached File Access for Large Files
569
570
```python
571
import pyopenms
572
573
# Use cached access for large mzML files
574
cached_file = pyopenms.CachedmzML("large_file.mzML")
575
num_spectra = cached_file.getNrSpectra()
576
577
# Process spectra one by one without loading entire file
578
for i in range(num_spectra):
579
spectrum = cached_file.getSpectrum(i)
580
rt = spectrum.getRT()
581
ms_level = spectrum.getMSLevel()
582
583
if ms_level == 1: # Process only MS1 spectra
584
mz_array, intensity_array = spectrum.get_peaks()
585
# Process spectrum data...
586
```
587
588
### File Format Detection
589
590
```python
591
import pyopenms
592
593
# Automatically detect file format
594
file_type = pyopenms.FileHandler.getType("unknown_file.xml")
595
596
if file_type == pyopenms.FileHandler.Type.MZML:
597
exp = pyopenms.MSExperiment()
598
pyopenms.MzMLFile().load("unknown_file.xml", exp)
599
elif file_type == pyopenms.FileHandler.Type.FEATUREXML:
600
features = pyopenms.FeatureMap()
601
pyopenms.FeatureXMLFile().load("unknown_file.xml", features)
602
```
603
604
## Types
605
606
### File Options
607
608
```python { .api }
609
class PeakFileOptions:
610
"""Options for peak file (mzML, mzXML) loading and storing."""
611
def __init__(self) -> None: ...
612
613
def setMSLevels(self, levels: list[int]) -> None:
614
"""Set MS levels to load."""
615
616
def getMSLevels(self) -> list[int]:
617
"""Get MS levels to load."""
618
619
def setRTRange(self, min_rt: float, max_rt: float) -> None:
620
"""Set retention time range."""
621
622
def setMZRange(self, min_mz: float, max_mz: float) -> None:
623
"""Set m/z range."""
624
625
def setIntensityRange(self, min_intensity: float, max_intensity: float) -> None:
626
"""Set intensity range."""
627
628
def setWriteIndex(self, write_index: bool) -> None:
629
"""Set whether to write index."""
630
631
def getWriteIndex(self) -> bool:
632
"""Get whether to write index."""
633
634
def setCompression(self, compression: bool) -> None:
635
"""Set compression for binary data."""
636
637
def getCompression(self) -> bool:
638
"""Get compression setting."""
639
640
class FeatureFileOptions:
641
"""Options for feature file (featureXML) loading and storing."""
642
def __init__(self) -> None: ...
643
644
def setLoadConvexHull(self, load: bool) -> None:
645
"""Set whether to load convex hulls."""
646
647
def getLoadConvexHull(self) -> bool:
648
"""Get whether to load convex hulls."""
649
650
def setLoadSubordinates(self, load: bool) -> None:
651
"""Set whether to load subordinate features."""
652
653
def getLoadSubordinates(self) -> bool:
654
"""Get whether to load subordinate features."""
655
```