CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-pyopenms

Python wrapper for C++ LC-MS library OpenMS for comprehensive mass spectrometry data analysis

Pending
Overview
Eval results
Files

file-io.mddocs/

File I/O and Data Formats

Comprehensive support for mass spectrometry file formats with full metadata preservation. pyOpenMS handles the most common formats in proteomics and metabolomics including vendor-neutral standards and identification results.

Capabilities

Primary Data Formats

mzML Files

Standard format for raw mass spectrometry data with complete metadata support.

class MzMLFile:
    def __init__(self) -> None: ...
    
    def load(self, filename: str, exp: MSExperiment) -> None:
        """
        Load mzML file into MSExperiment.
        
        Args:
            filename (str): Path to mzML file
            exp (MSExperiment): MSExperiment object to populate
        """
    
    def store(self, filename: str, exp: MSExperiment) -> None:
        """
        Store MSExperiment to mzML file.
        
        Args:
            filename (str): Output file path
            exp (MSExperiment): MSExperiment to save
        """
    
    def loadBuffer(self, buffer: str, exp: MSExperiment) -> None:
        """
        Load mzML from string buffer.
        
        Args:
            buffer (str): mzML content as string
            exp (MSExperiment): MSExperiment object to populate
        """
    
    def storeBuffer(self, exp: MSExperiment) -> str:
        """
        Store MSExperiment to string buffer.
        
        Args:
            exp (MSExperiment): MSExperiment to serialize
            
        Returns:
            str: mzML content as string
        """
    
    def loadSize(self, filename: str) -> tuple[int, int]:
        """
        Count spectra and chromatograms without full loading.
        
        Args:
            filename (str): Path to mzML file
            
        Returns:
            tuple: (number_of_spectra, number_of_chromatograms)
        """
    
    def getOptions(self) -> PeakFileOptions:
        """
        Get file loading/storing options.
        
        Returns:
            PeakFileOptions: Current file options
        """
    
    def setOptions(self, options: PeakFileOptions) -> None:
        """
        Set file loading/storing options.
        
        Args:
            options (PeakFileOptions): File options to set
        """
    
    def isSemanticallyValid(self, filename: str, errors: list, warnings: list) -> bool:
        """
        Validate mzML file semantics.
        
        Args:
            filename (str): Path to mzML file
            errors (list): List to populate with error messages
            warnings (list): List to populate with warning messages
            
        Returns:
            bool: True if valid, False otherwise
        """

mzXML Files

Legacy format still widely used for mass spectrometry data.

class MzXMLFile:
    def __init__(self) -> None: ...
    
    def load(self, filename: str, exp: MSExperiment) -> None:
        """
        Load mzXML file into MSExperiment.
        
        Args:
            filename (str): Path to mzXML file
            exp (MSExperiment): MSExperiment object to populate
        """
    
    def store(self, filename: str, exp: MSExperiment) -> None:
        """
        Store MSExperiment to mzXML file.
        
        Args:
            filename (str): Output file path
            exp (MSExperiment): MSExperiment to save
        """

Feature Data Formats

Feature Detection Results

class FeatureXMLFile:
    def __init__(self) -> None: ...
    
    def load(self, filename: str, features: FeatureMap) -> None:
        """
        Load feature detection results from featureXML file.
        
        Args:
            filename (str): Path to featureXML file
            features (FeatureMap): FeatureMap to populate
        """
    
    def store(self, filename: str, features: FeatureMap) -> None:
        """
        Store FeatureMap to featureXML file.
        
        Args:
            filename (str): Output file path
            features (FeatureMap): FeatureMap to save
        """
    
    def loadSize(self, filename: str) -> int:
        """
        Count features without full loading.
        
        Args:
            filename (str): Path to featureXML file
            
        Returns:
            int: Number of features in file
        """
    
    def getOptions(self) -> FeatureFileOptions:
        """
        Get file loading/storing options.
        
        Returns:
            FeatureFileOptions: Current file options
        """
    
    def setOptions(self, options: FeatureFileOptions) -> None:
        """
        Set file loading/storing options.
        
        Args:
            options (FeatureFileOptions): File options to set
        """

Consensus Features

class ConsensusXMLFile:
    def __init__(self) -> None: ...
    
    def load(self, filename: str, consensus: ConsensusMap) -> None:
        """
        Load consensus features from consensusXML file.
        
        Args:
            filename (str): Path to consensusXML file
            consensus (ConsensusMap): ConsensusMap to populate
        """
    
    def store(self, filename: str, consensus: ConsensusMap) -> None:
        """
        Store ConsensusMap to consensusXML file.
        
        Args:
            filename (str): Output file path
            consensus (ConsensusMap): ConsensusMap to save
        """

Identification Formats

OpenMS Identification Format

class IdXMLFile:
    def __init__(self) -> None: ...
    
    def load(self, filename: str, protein_ids: list, peptide_ids: list) -> None:
        """
        Load identification results from idXML file.
        
        Args:
            filename (str): Path to idXML file
            protein_ids (list[ProteinIdentification]): List to populate with protein IDs
            peptide_ids (list[PeptideIdentification]): List to populate with peptide IDs
        """
    
    def store(self, filename: str, protein_ids: list, peptide_ids: list) -> None:
        """
        Store identification results to idXML file.
        
        Args:
            filename (str): Output file path
            protein_ids (list[ProteinIdentification]): Protein identifications
            peptide_ids (list[PeptideIdentification]): Peptide identifications
        """

Standard Identification Formats

class MzIdentMLFile:
    def __init__(self) -> None: ...
    
    def load(self, filename: str, protein_ids: list, peptide_ids: list) -> None:
        """
        Load mzIdentML identification file.
        
        Args:
            filename (str): Path to mzIdentML file
            protein_ids (list[ProteinIdentification]): List to populate
            peptide_ids (list[PeptideIdentification]): List to populate
        """
    
    def store(self, filename: str, protein_ids: list, peptide_ids: list) -> None:
        """
        Store to mzIdentML format.
        
        Args:
            filename (str): Output file path
            protein_ids (list[ProteinIdentification]): Protein identifications
            peptide_ids (list[PeptideIdentification]): Peptide identifications
        """

class PepXMLFile:
    def __init__(self) -> None: ...
    
    def load(self, filename: str, protein_ids: list, peptide_ids: list) -> None:
        """
        Load pepXML identification file.
        
        Args:
            filename (str): Path to pepXML file
            protein_ids (list[ProteinIdentification]): List to populate
            peptide_ids (list[PeptideIdentification]): List to populate
        """

class ProtXMLFile:
    def __init__(self) -> None: ...
    
    def load(self, filename: str, protein_ids: list, peptide_ids: list) -> None:
        """
        Load protXML protein identification file.
        
        Args:
            filename (str): Path to protXML file
            protein_ids (list[ProteinIdentification]): List to populate
            peptide_ids (list[PeptideIdentification]): List to populate
        """

Spectral Data Formats

Simple Spectrum Formats

class DTAFile:
    def __init__(self) -> None: ...
    
    def load(self, filename: str, spectrum: MSSpectrum) -> None:
        """
        Load DTA spectrum file.
        
        Args:
            filename (str): Path to DTA file
            spectrum (MSSpectrum): Spectrum to populate
        """
    
    def store(self, filename: str, spectrum: MSSpectrum) -> None:
        """
        Store spectrum to DTA file.
        
        Args:
            filename (str): Output file path
            spectrum (MSSpectrum): Spectrum to save
        """

class MGFFile:
    def __init__(self) -> None: ...
    
    def load(self, filename: str, exp: MSExperiment) -> None:
        """
        Load Mascot Generic Format file.
        
        Args:
            filename (str): Path to MGF file
            exp (MSExperiment): Experiment to populate
        """
    
    def store(self, filename: str, exp: MSExperiment) -> None:
        """
        Store experiment to MGF file.
        
        Args:
            filename (str): Output file path
            exp (MSExperiment): Experiment to save
        """

class MSPFile:
    def __init__(self) -> None: ...
    
    def load(self, filename: str, exp: MSExperiment) -> None:
        """
        Load MSP spectral library file.
        
        Args:
            filename (str): Path to MSP file
            exp (MSExperiment): Experiment to populate
        """

Sequence Database Formats

FASTA Files

class FASTAFile:
    def __init__(self) -> None: ...
    
    def load(self, filename: str, data: list) -> None:
        """
        Load FASTA protein database.
        
        Args:
            filename (str): Path to FASTA file
            data (list[FASTAEntry]): List to populate with entries
        """
    
    def store(self, filename: str, data: list) -> None:
        """
        Store protein sequences to FASTA file.
        
        Args:
            filename (str): Output file path
            data (list[FASTAEntry]): FASTA entries to save
        """

class FASTAEntry:
    def __init__(self, identifier: str = "", description: str = "", sequence: str = "") -> None: ...
    def getIdentifier(self) -> str: ...
    def getDescription(self) -> str: ...
    def getSequence(self) -> str: ...
    def setIdentifier(self, identifier: str) -> None: ...
    def setDescription(self, description: str) -> None: ...
    def setSequence(self, sequence: str) -> None: ...

Quantification Formats

mzTab Format

class MzTabFile:
    def __init__(self) -> None: ...
    
    def load(self, filename: str, mztab: MzTab) -> None:
        """
        Load mzTab quantification file.
        
        Args:
            filename (str): Path to mzTab file
            mztab (MzTab): MzTab object to populate
        """
    
    def store(self, filename: str, mztab: MzTab) -> None:
        """
        Store quantification results to mzTab file.
        
        Args:
            filename (str): Output file path
            mztab (MzTab): MzTab data to save
        """

Targeted Analysis Formats

Transition Lists

class TraMLFile:
    def __init__(self) -> None: ...
    
    def load(self, filename: str, targeted_exp: TargetedExperiment) -> None:
        """
        Load TraML transition list file.
        
        Args:
            filename (str): Path to TraML file
            targeted_exp (TargetedExperiment): TargetedExperiment to populate
        """
    
    def store(self, filename: str, targeted_exp: TargetedExperiment) -> None:
        """
        Store transition list to TraML file.
        
        Args:
            filename (str): Output file path
            targeted_exp (TargetedExperiment): TargetedExperiment to save
        """

Cached and Indexed Access

Memory-Efficient File Access

class CachedmzML:
    def __init__(self, filename: str) -> None:
        """
        Create cached mzML file handler for large files.
        
        Args:
            filename (str): Path to mzML file
        """
    
    def getNrSpectra(self) -> int:
        """Get number of spectra in file."""
    
    def getSpectrum(self, id: int) -> MSSpectrum:
        """
        Get spectrum by index.
        
        Args:
            id (int): Spectrum index
            
        Returns:
            MSSpectrum: The requested spectrum
        """
    
    def getNrChromatograms(self) -> int:
        """Get number of chromatograms in file."""
    
    def getChromatogram(self, id: int) -> MSChromatogram:
        """
        Get chromatogram by index.
        
        Args:
            id (int): Chromatogram index
            
        Returns:
            MSChromatogram: The requested chromatogram
        """

class IndexedMzMLHandler:
    def __init__(self, filename: str) -> None:
        """
        Create indexed mzML handler for random access.
        
        Args:
            filename (str): Path to indexed mzML file
        """
    
    def getSpectrumByRT(self, rt: float) -> MSSpectrum:
        """
        Get spectrum closest to retention time.
        
        Args:
            rt (float): Target retention time
            
        Returns:
            MSSpectrum: Closest spectrum
        """

File Format Detection

Automatic Format Detection

class FileHandler:
    @staticmethod
    def getType(filename: str) -> Type:
        """
        Detect file type from filename or content.
        
        Args:
            filename (str): Path to file
            
        Returns:
            Type: Detected file type
        """
    
    @staticmethod
    def getTypeByContent(filename: str) -> Type:
        """
        Detect file type by examining file content.
        
        Args:
            filename (str): Path to file
            
        Returns:
            Type: Detected file type
        """

    class Type:
        UNKNOWN = 0
        MZML = 1
        MZXML = 2
        FEATUREXML = 3
        CONSENSUSXML = 4
        IDXML = 5
        MZIDENTML = 6
        PEPXML = 7
        PROTXML = 8
        FASTA = 9
        DTA = 10
        MGF = 11
        MSP = 12
        TRAML = 13
        MZTAB = 14

Usage Examples

Basic File Loading

import pyopenms

# Load mzML file
exp = pyopenms.MSExperiment()
pyopenms.MzMLFile().load("data.mzML", exp)
print(f"Loaded {exp.size()} spectra")

# Load features
features = pyopenms.FeatureMap()
pyopenms.FeatureXMLFile().load("features.featureXML", features)
print(f"Loaded {features.size()} features")

# Load identifications
protein_ids = []
peptide_ids = []
pyopenms.IdXMLFile().load("identifications.idXML", protein_ids, peptide_ids)
print(f"Loaded {len(protein_ids)} protein IDs, {len(peptide_ids)} peptide IDs")

Cached File Access for Large Files

import pyopenms

# Use cached access for large mzML files
cached_file = pyopenms.CachedmzML("large_file.mzML")
num_spectra = cached_file.getNrSpectra()

# Process spectra one by one without loading entire file
for i in range(num_spectra):
    spectrum = cached_file.getSpectrum(i)
    rt = spectrum.getRT()
    ms_level = spectrum.getMSLevel()
    
    if ms_level == 1:  # Process only MS1 spectra
        mz_array, intensity_array = spectrum.get_peaks()
        # Process spectrum data...

File Format Detection

import pyopenms

# Automatically detect file format
file_type = pyopenms.FileHandler.getType("unknown_file.xml")

if file_type == pyopenms.FileHandler.Type.MZML:
    exp = pyopenms.MSExperiment()
    pyopenms.MzMLFile().load("unknown_file.xml", exp)
elif file_type == pyopenms.FileHandler.Type.FEATUREXML:
    features = pyopenms.FeatureMap()
    pyopenms.FeatureXMLFile().load("unknown_file.xml", features)

Types

File Options

class PeakFileOptions:
    """Options for peak file (mzML, mzXML) loading and storing."""
    def __init__(self) -> None: ...
    
    def setMSLevels(self, levels: list[int]) -> None:
        """Set MS levels to load."""
    
    def getMSLevels(self) -> list[int]:
        """Get MS levels to load."""
    
    def setRTRange(self, min_rt: float, max_rt: float) -> None:
        """Set retention time range."""
    
    def setMZRange(self, min_mz: float, max_mz: float) -> None:
        """Set m/z range."""
    
    def setIntensityRange(self, min_intensity: float, max_intensity: float) -> None:
        """Set intensity range."""
    
    def setWriteIndex(self, write_index: bool) -> None:
        """Set whether to write index."""
    
    def getWriteIndex(self) -> bool:
        """Get whether to write index."""
    
    def setCompression(self, compression: bool) -> None:
        """Set compression for binary data."""
    
    def getCompression(self) -> bool:
        """Get compression setting."""

class FeatureFileOptions:
    """Options for feature file (featureXML) loading and storing."""
    def __init__(self) -> None: ...
    
    def setLoadConvexHull(self, load: bool) -> None:
        """Set whether to load convex hulls."""
    
    def getLoadConvexHull(self) -> bool:
        """Get whether to load convex hulls."""
    
    def setLoadSubordinates(self, load: bool) -> None:
        """Set whether to load subordinate features."""
    
    def getLoadSubordinates(self) -> bool:
        """Get whether to load subordinate features."""

Install with Tessl CLI

npx tessl i tessl/pypi-pyopenms

docs

alignment.md

chemistry.md

feature-detection.md

file-io.md

index.md

ms-data.md

peptide-protein.md

targeted-analysis.md

tile.json