or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

alignment.mdchemistry.mdfeature-detection.mdfile-io.mdindex.mdms-data.mdpeptide-protein.mdtargeted-analysis.md

file-io.mddocs/

0

# File I/O and Data Formats

1

2

Comprehensive support for mass spectrometry file formats with full metadata preservation. pyOpenMS handles the most common formats in proteomics and metabolomics including vendor-neutral standards and identification results.

3

4

## Capabilities

5

6

### Primary Data Formats

7

8

#### mzML Files

9

10

Standard format for raw mass spectrometry data with complete metadata support.

11

12

```python { .api }

13

class MzMLFile:

14

def __init__(self) -> None: ...

15

16

def load(self, filename: str, exp: MSExperiment) -> None:

17

"""

18

Load mzML file into MSExperiment.

19

20

Args:

21

filename (str): Path to mzML file

22

exp (MSExperiment): MSExperiment object to populate

23

"""

24

25

def store(self, filename: str, exp: MSExperiment) -> None:

26

"""

27

Store MSExperiment to mzML file.

28

29

Args:

30

filename (str): Output file path

31

exp (MSExperiment): MSExperiment to save

32

"""

33

34

def loadBuffer(self, buffer: str, exp: MSExperiment) -> None:

35

"""

36

Load mzML from string buffer.

37

38

Args:

39

buffer (str): mzML content as string

40

exp (MSExperiment): MSExperiment object to populate

41

"""

42

43

def storeBuffer(self, exp: MSExperiment) -> str:

44

"""

45

Store MSExperiment to string buffer.

46

47

Args:

48

exp (MSExperiment): MSExperiment to serialize

49

50

Returns:

51

str: mzML content as string

52

"""

53

54

def loadSize(self, filename: str) -> tuple[int, int]:

55

"""

56

Count spectra and chromatograms without full loading.

57

58

Args:

59

filename (str): Path to mzML file

60

61

Returns:

62

tuple: (number_of_spectra, number_of_chromatograms)

63

"""

64

65

def getOptions(self) -> PeakFileOptions:

66

"""

67

Get file loading/storing options.

68

69

Returns:

70

PeakFileOptions: Current file options

71

"""

72

73

def setOptions(self, options: PeakFileOptions) -> None:

74

"""

75

Set file loading/storing options.

76

77

Args:

78

options (PeakFileOptions): File options to set

79

"""

80

81

def isSemanticallyValid(self, filename: str, errors: list, warnings: list) -> bool:

82

"""

83

Validate mzML file semantics.

84

85

Args:

86

filename (str): Path to mzML file

87

errors (list): List to populate with error messages

88

warnings (list): List to populate with warning messages

89

90

Returns:

91

bool: True if valid, False otherwise

92

"""

93

```

94

95

#### mzXML Files

96

97

Legacy format still widely used for mass spectrometry data.

98

99

```python { .api }

100

class MzXMLFile:

101

def __init__(self) -> None: ...

102

103

def load(self, filename: str, exp: MSExperiment) -> None:

104

"""

105

Load mzXML file into MSExperiment.

106

107

Args:

108

filename (str): Path to mzXML file

109

exp (MSExperiment): MSExperiment object to populate

110

"""

111

112

def store(self, filename: str, exp: MSExperiment) -> None:

113

"""

114

Store MSExperiment to mzXML file.

115

116

Args:

117

filename (str): Output file path

118

exp (MSExperiment): MSExperiment to save

119

"""

120

```

121

122

### Feature Data Formats

123

124

#### Feature Detection Results

125

126

```python { .api }

127

class FeatureXMLFile:

128

def __init__(self) -> None: ...

129

130

def load(self, filename: str, features: FeatureMap) -> None:

131

"""

132

Load feature detection results from featureXML file.

133

134

Args:

135

filename (str): Path to featureXML file

136

features (FeatureMap): FeatureMap to populate

137

"""

138

139

def store(self, filename: str, features: FeatureMap) -> None:

140

"""

141

Store FeatureMap to featureXML file.

142

143

Args:

144

filename (str): Output file path

145

features (FeatureMap): FeatureMap to save

146

"""

147

148

def loadSize(self, filename: str) -> int:

149

"""

150

Count features without full loading.

151

152

Args:

153

filename (str): Path to featureXML file

154

155

Returns:

156

int: Number of features in file

157

"""

158

159

def getOptions(self) -> FeatureFileOptions:

160

"""

161

Get file loading/storing options.

162

163

Returns:

164

FeatureFileOptions: Current file options

165

"""

166

167

def setOptions(self, options: FeatureFileOptions) -> None:

168

"""

169

Set file loading/storing options.

170

171

Args:

172

options (FeatureFileOptions): File options to set

173

"""

174

```

175

176

#### Consensus Features

177

178

```python { .api }

179

class ConsensusXMLFile:

180

def __init__(self) -> None: ...

181

182

def load(self, filename: str, consensus: ConsensusMap) -> None:

183

"""

184

Load consensus features from consensusXML file.

185

186

Args:

187

filename (str): Path to consensusXML file

188

consensus (ConsensusMap): ConsensusMap to populate

189

"""

190

191

def store(self, filename: str, consensus: ConsensusMap) -> None:

192

"""

193

Store ConsensusMap to consensusXML file.

194

195

Args:

196

filename (str): Output file path

197

consensus (ConsensusMap): ConsensusMap to save

198

"""

199

```

200

201

### Identification Formats

202

203

#### OpenMS Identification Format

204

205

```python { .api }

206

class IdXMLFile:

207

def __init__(self) -> None: ...

208

209

def load(self, filename: str, protein_ids: list, peptide_ids: list) -> None:

210

"""

211

Load identification results from idXML file.

212

213

Args:

214

filename (str): Path to idXML file

215

protein_ids (list[ProteinIdentification]): List to populate with protein IDs

216

peptide_ids (list[PeptideIdentification]): List to populate with peptide IDs

217

"""

218

219

def store(self, filename: str, protein_ids: list, peptide_ids: list) -> None:

220

"""

221

Store identification results to idXML file.

222

223

Args:

224

filename (str): Output file path

225

protein_ids (list[ProteinIdentification]): Protein identifications

226

peptide_ids (list[PeptideIdentification]): Peptide identifications

227

"""

228

```

229

230

#### Standard Identification Formats

231

232

```python { .api }

233

class MzIdentMLFile:

234

def __init__(self) -> None: ...

235

236

def load(self, filename: str, protein_ids: list, peptide_ids: list) -> None:

237

"""

238

Load mzIdentML identification file.

239

240

Args:

241

filename (str): Path to mzIdentML file

242

protein_ids (list[ProteinIdentification]): List to populate

243

peptide_ids (list[PeptideIdentification]): List to populate

244

"""

245

246

def store(self, filename: str, protein_ids: list, peptide_ids: list) -> None:

247

"""

248

Store to mzIdentML format.

249

250

Args:

251

filename (str): Output file path

252

protein_ids (list[ProteinIdentification]): Protein identifications

253

peptide_ids (list[PeptideIdentification]): Peptide identifications

254

"""

255

256

class PepXMLFile:

257

def __init__(self) -> None: ...

258

259

def load(self, filename: str, protein_ids: list, peptide_ids: list) -> None:

260

"""

261

Load pepXML identification file.

262

263

Args:

264

filename (str): Path to pepXML file

265

protein_ids (list[ProteinIdentification]): List to populate

266

peptide_ids (list[PeptideIdentification]): List to populate

267

"""

268

269

class ProtXMLFile:

270

def __init__(self) -> None: ...

271

272

def load(self, filename: str, protein_ids: list, peptide_ids: list) -> None:

273

"""

274

Load protXML protein identification file.

275

276

Args:

277

filename (str): Path to protXML file

278

protein_ids (list[ProteinIdentification]): List to populate

279

peptide_ids (list[PeptideIdentification]): List to populate

280

"""

281

```

282

283

### Spectral Data Formats

284

285

#### Simple Spectrum Formats

286

287

```python { .api }

288

class DTAFile:

289

def __init__(self) -> None: ...

290

291

def load(self, filename: str, spectrum: MSSpectrum) -> None:

292

"""

293

Load DTA spectrum file.

294

295

Args:

296

filename (str): Path to DTA file

297

spectrum (MSSpectrum): Spectrum to populate

298

"""

299

300

def store(self, filename: str, spectrum: MSSpectrum) -> None:

301

"""

302

Store spectrum to DTA file.

303

304

Args:

305

filename (str): Output file path

306

spectrum (MSSpectrum): Spectrum to save

307

"""

308

309

class MGFFile:

310

def __init__(self) -> None: ...

311

312

def load(self, filename: str, exp: MSExperiment) -> None:

313

"""

314

Load Mascot Generic Format file.

315

316

Args:

317

filename (str): Path to MGF file

318

exp (MSExperiment): Experiment to populate

319

"""

320

321

def store(self, filename: str, exp: MSExperiment) -> None:

322

"""

323

Store experiment to MGF file.

324

325

Args:

326

filename (str): Output file path

327

exp (MSExperiment): Experiment to save

328

"""

329

330

class MSPFile:

331

def __init__(self) -> None: ...

332

333

def load(self, filename: str, exp: MSExperiment) -> None:

334

"""

335

Load MSP spectral library file.

336

337

Args:

338

filename (str): Path to MSP file

339

exp (MSExperiment): Experiment to populate

340

"""

341

```

342

343

### Sequence Database Formats

344

345

#### FASTA Files

346

347

```python { .api }

348

class FASTAFile:

349

def __init__(self) -> None: ...

350

351

def load(self, filename: str, data: list) -> None:

352

"""

353

Load FASTA protein database.

354

355

Args:

356

filename (str): Path to FASTA file

357

data (list[FASTAEntry]): List to populate with entries

358

"""

359

360

def store(self, filename: str, data: list) -> None:

361

"""

362

Store protein sequences to FASTA file.

363

364

Args:

365

filename (str): Output file path

366

data (list[FASTAEntry]): FASTA entries to save

367

"""

368

369

class FASTAEntry:

370

def __init__(self, identifier: str = "", description: str = "", sequence: str = "") -> None: ...

371

def getIdentifier(self) -> str: ...

372

def getDescription(self) -> str: ...

373

def getSequence(self) -> str: ...

374

def setIdentifier(self, identifier: str) -> None: ...

375

def setDescription(self, description: str) -> None: ...

376

def setSequence(self, sequence: str) -> None: ...

377

```

378

379

### Quantification Formats

380

381

#### mzTab Format

382

383

```python { .api }

384

class MzTabFile:

385

def __init__(self) -> None: ...

386

387

def load(self, filename: str, mztab: MzTab) -> None:

388

"""

389

Load mzTab quantification file.

390

391

Args:

392

filename (str): Path to mzTab file

393

mztab (MzTab): MzTab object to populate

394

"""

395

396

def store(self, filename: str, mztab: MzTab) -> None:

397

"""

398

Store quantification results to mzTab file.

399

400

Args:

401

filename (str): Output file path

402

mztab (MzTab): MzTab data to save

403

"""

404

```

405

406

### Targeted Analysis Formats

407

408

#### Transition Lists

409

410

```python { .api }

411

class TraMLFile:

412

def __init__(self) -> None: ...

413

414

def load(self, filename: str, targeted_exp: TargetedExperiment) -> None:

415

"""

416

Load TraML transition list file.

417

418

Args:

419

filename (str): Path to TraML file

420

targeted_exp (TargetedExperiment): TargetedExperiment to populate

421

"""

422

423

def store(self, filename: str, targeted_exp: TargetedExperiment) -> None:

424

"""

425

Store transition list to TraML file.

426

427

Args:

428

filename (str): Output file path

429

targeted_exp (TargetedExperiment): TargetedExperiment to save

430

"""

431

```

432

433

### Cached and Indexed Access

434

435

#### Memory-Efficient File Access

436

437

```python { .api }

438

class CachedmzML:

439

def __init__(self, filename: str) -> None:

440

"""

441

Create cached mzML file handler for large files.

442

443

Args:

444

filename (str): Path to mzML file

445

"""

446

447

def getNrSpectra(self) -> int:

448

"""Get number of spectra in file."""

449

450

def getSpectrum(self, id: int) -> MSSpectrum:

451

"""

452

Get spectrum by index.

453

454

Args:

455

id (int): Spectrum index

456

457

Returns:

458

MSSpectrum: The requested spectrum

459

"""

460

461

def getNrChromatograms(self) -> int:

462

"""Get number of chromatograms in file."""

463

464

def getChromatogram(self, id: int) -> MSChromatogram:

465

"""

466

Get chromatogram by index.

467

468

Args:

469

id (int): Chromatogram index

470

471

Returns:

472

MSChromatogram: The requested chromatogram

473

"""

474

475

class IndexedMzMLHandler:

476

def __init__(self, filename: str) -> None:

477

"""

478

Create indexed mzML handler for random access.

479

480

Args:

481

filename (str): Path to indexed mzML file

482

"""

483

484

def getSpectrumByRT(self, rt: float) -> MSSpectrum:

485

"""

486

Get spectrum closest to retention time.

487

488

Args:

489

rt (float): Target retention time

490

491

Returns:

492

MSSpectrum: Closest spectrum

493

"""

494

```

495

496

### File Format Detection

497

498

#### Automatic Format Detection

499

500

```python { .api }

501

class FileHandler:

502

@staticmethod

503

def getType(filename: str) -> Type:

504

"""

505

Detect file type from filename or content.

506

507

Args:

508

filename (str): Path to file

509

510

Returns:

511

Type: Detected file type

512

"""

513

514

@staticmethod

515

def getTypeByContent(filename: str) -> Type:

516

"""

517

Detect file type by examining file content.

518

519

Args:

520

filename (str): Path to file

521

522

Returns:

523

Type: Detected file type

524

"""

525

526

class Type:

527

UNKNOWN = 0

528

MZML = 1

529

MZXML = 2

530

FEATUREXML = 3

531

CONSENSUSXML = 4

532

IDXML = 5

533

MZIDENTML = 6

534

PEPXML = 7

535

PROTXML = 8

536

FASTA = 9

537

DTA = 10

538

MGF = 11

539

MSP = 12

540

TRAML = 13

541

MZTAB = 14

542

```

543

544

## Usage Examples

545

546

### Basic File Loading

547

548

```python

549

import pyopenms

550

551

# Load mzML file

552

exp = pyopenms.MSExperiment()

553

pyopenms.MzMLFile().load("data.mzML", exp)

554

print(f"Loaded {exp.size()} spectra")

555

556

# Load features

557

features = pyopenms.FeatureMap()

558

pyopenms.FeatureXMLFile().load("features.featureXML", features)

559

print(f"Loaded {features.size()} features")

560

561

# Load identifications

562

protein_ids = []

563

peptide_ids = []

564

pyopenms.IdXMLFile().load("identifications.idXML", protein_ids, peptide_ids)

565

print(f"Loaded {len(protein_ids)} protein IDs, {len(peptide_ids)} peptide IDs")

566

```

567

568

### Cached File Access for Large Files

569

570

```python

571

import pyopenms

572

573

# Use cached access for large mzML files

574

cached_file = pyopenms.CachedmzML("large_file.mzML")

575

num_spectra = cached_file.getNrSpectra()

576

577

# Process spectra one by one without loading entire file

578

for i in range(num_spectra):

579

spectrum = cached_file.getSpectrum(i)

580

rt = spectrum.getRT()

581

ms_level = spectrum.getMSLevel()

582

583

if ms_level == 1: # Process only MS1 spectra

584

mz_array, intensity_array = spectrum.get_peaks()

585

# Process spectrum data...

586

```

587

588

### File Format Detection

589

590

```python

591

import pyopenms

592

593

# Automatically detect file format

594

file_type = pyopenms.FileHandler.getType("unknown_file.xml")

595

596

if file_type == pyopenms.FileHandler.Type.MZML:

597

exp = pyopenms.MSExperiment()

598

pyopenms.MzMLFile().load("unknown_file.xml", exp)

599

elif file_type == pyopenms.FileHandler.Type.FEATUREXML:

600

features = pyopenms.FeatureMap()

601

pyopenms.FeatureXMLFile().load("unknown_file.xml", features)

602

```

603

604

## Types

605

606

### File Options

607

608

```python { .api }

609

class PeakFileOptions:

610

"""Options for peak file (mzML, mzXML) loading and storing."""

611

def __init__(self) -> None: ...

612

613

def setMSLevels(self, levels: list[int]) -> None:

614

"""Set MS levels to load."""

615

616

def getMSLevels(self) -> list[int]:

617

"""Get MS levels to load."""

618

619

def setRTRange(self, min_rt: float, max_rt: float) -> None:

620

"""Set retention time range."""

621

622

def setMZRange(self, min_mz: float, max_mz: float) -> None:

623

"""Set m/z range."""

624

625

def setIntensityRange(self, min_intensity: float, max_intensity: float) -> None:

626

"""Set intensity range."""

627

628

def setWriteIndex(self, write_index: bool) -> None:

629

"""Set whether to write index."""

630

631

def getWriteIndex(self) -> bool:

632

"""Get whether to write index."""

633

634

def setCompression(self, compression: bool) -> None:

635

"""Set compression for binary data."""

636

637

def getCompression(self) -> bool:

638

"""Get compression setting."""

639

640

class FeatureFileOptions:

641

"""Options for feature file (featureXML) loading and storing."""

642

def __init__(self) -> None: ...

643

644

def setLoadConvexHull(self, load: bool) -> None:

645

"""Set whether to load convex hulls."""

646

647

def getLoadConvexHull(self) -> bool:

648

"""Get whether to load convex hulls."""

649

650

def setLoadSubordinates(self, load: bool) -> None:

651

"""Set whether to load subordinate features."""

652

653

def getLoadSubordinates(self) -> bool:

654

"""Get whether to load subordinate features."""

655

```