tessl/pypi-mdanalysis

An object-oriented toolkit to analyze molecular dynamics trajectories.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

File I/O and Format Support

Name: tessl/pypi-mdanalysis
Author: tessl

MDAnalysis provides comprehensive support for reading and writing molecular structure and trajectory data across many file formats commonly used in molecular dynamics simulations.

Overview

The I/O system in MDAnalysis is built around three main components:

Readers: Read trajectory data with support for sequential and random access
Writers: Write coordinate data to various output formats
Parsers: Extract topology information from structure files

All I/O operations use a unified interface with automatic format detection based on file extensions.

Core I/O Functions

Reader Function

def reader(filename, format=None, **kwargs):
    """
    Get a trajectory reader for the specified file.
    
    Parameters
    ----------
    filename : str or file-like
        Path to trajectory file or file-like object.
    format : str, optional
        File format override. If None, format is guessed from file extension.
    **kwargs
        Additional arguments passed to format-specific reader.
        
    Returns
    -------
    ReaderBase
        Trajectory reader object appropriate for the file format.
        
    Examples
    --------
    >>> from MDAnalysis.coordinates import reader
    >>> traj = reader("trajectory.xtc")
    >>> for ts in traj:
    ...     print(f"Frame {ts.frame}, Time: {ts.time}")
    """

Writer Function

def Writer(filename, n_atoms=None, format=None, multiframe=None, **kwargs):
    """
    Create a trajectory writer for the specified file format.
    
    Parameters
    ----------
    filename : str or file-like
        Output filename or file-like object.
    n_atoms : int, optional
        Number of atoms in the system (required for some formats).
    format : str, optional
        Output format. If None, guessed from filename extension.
    multiframe : bool, optional
        Whether writer supports multiple frames. If None, determined automatically.
    bonds : str, optional
        How to handle bond information ('all', 'none', 'conect').
    **kwargs
        Additional format-specific arguments.
        
    Returns
    -------
    WriterBase
        Writer object for the specified format.
        
    Examples
    --------
    >>> W = Writer("output.xtc", n_atoms=1000)
    >>> for ts in u.trajectory:
    ...     W.write(u.atoms)
    >>> W.close()
    
    >>> # Context manager usage
    >>> with Writer("output.dcd", n_atoms=u.atoms.n_atoms) as W:
    ...     for ts in u.trajectory:
    ...         W.write(u.atoms)
    """

Supported File Formats

Structure Formats (Topology)

MDAnalysis supports reading topology information from these formats:

CHARMM Formats

# PSF (CHARMM/NAMD Topology)
u = mda.Universe("system.psf", "trajectory.dcd")

# CRD (CHARMM Coordinate) 
u = mda.Universe("coordinates.crd")

Capabilities:

PSF: Complete topology with bonds, angles, dihedrals, impropers
CRD: Coordinate data, limited topology information

GROMACS Formats

# TPR (GROMACS Binary Topology)
u = mda.Universe("topol.tpr", "trajectory.xtc")

# GRO (GROMACS Structure)
u = mda.Universe("system.gro")  

# TOP/ITP (GROMACS Text Topology) - limited support
u = mda.Universe("topol.top")

Capabilities:

TPR: Complete binary topology with all parameters
GRO: Coordinates, atom names, residue information
TOP: Basic connectivity (bonds only)

AMBER Formats

# PRMTOP (AMBER Topology)
u = mda.Universe("system.prmtop", "trajectory.nc")

# INPCRD (AMBER Coordinate)
u = mda.Universe("coordinates.inpcrd")

Capabilities:

PRMTOP: Complete topology with force field parameters
INPCRD: Coordinates, box information

Standard Formats

# PDB (Protein Data Bank)
u = mda.Universe("structure.pdb")

# PQR (PDB with Charges and Radii)  
u = mda.Universe("system.pqr")

# MOL2 (Tripos Molecular Structure)
u = mda.Universe("molecule.mol2")

# PDBQT (AutoDock format)
u = mda.Universe("protein.pdbqt")

Trajectory Formats

Binary Trajectory Formats

# DCD (CHARMM/NAMD/LAMMPS)
u = mda.Universe("topology.psf", "trajectory.dcd")

# XTC (GROMACS Compressed) 
u = mda.Universe("topol.tpr", "trajectory.xtc")

# TRR (GROMACS Full Precision)
u = mda.Universe("topol.tpr", "trajectory.trr")

# TNG (Trajectory Next Generation)
u = mda.Universe("topol.tpr", "trajectory.tng")

# NetCDF (AMBER NetCDF)
u = mda.Universe("system.prmtop", "trajectory.nc")

Text Trajectory Formats

# XYZ (Generic Coordinate)
u = mda.Universe("trajectory.xyz")

# LAMMPS Trajectory
u = mda.Universe("data.lammps", "dump.lammpstrj")

# AMBER ASCII Trajectory  
u = mda.Universe("system.prmtop", "mdcrd")

Reader Base Classes

ReaderBase

class ReaderBase:
    """
    Base class for trajectory readers supporting multiple frames.
    """
    
    @property
    def n_frames(self):
        """
        Total number of frames in trajectory.
        
        Returns
        -------
        int
            Number of trajectory frames.
        """
    
    @property  
    def dt(self):
        """
        Time step between frames.
        
        Returns
        -------
        float
            Time step in picoseconds.
        """
    
    @property
    def totaltime(self):
        """
        Total simulation time span.
        
        Returns
        -------  
        float
            Total time covered by trajectory in picoseconds.
        """
    
    def __iter__(self):
        """
        Iterate through all frames in trajectory.
        
        Yields
        ------
        Timestep
            Timestep object for each frame.
            
        Examples
        --------
        >>> for ts in u.trajectory:
        ...     print(f"Time: {ts.time}, Frame: {ts.frame}")
        """
    
    def __getitem__(self, frame):
        """
        Access specific frame(s) by index.
        
        Parameters
        ----------
        frame : int or slice
            Frame index or slice object.
            
        Returns
        -------
        Timestep  
            Timestep object for requested frame(s).
            
        Examples
        --------
        >>> ts = u.trajectory[0]      # First frame
        >>> ts = u.trajectory[-1]     # Last frame  
        >>> u.trajectory[10:20:2]     # Slice with step
        """
    
    def next(self):
        """
        Advance to next frame.
        
        Returns
        -------
        Timestep
            Timestep object for next frame.
        """
    
    def rewind(self):
        """
        Return to first frame of trajectory.
        
        Examples  
        --------
        >>> u.trajectory.rewind()
        >>> assert u.trajectory.frame == 0
        """
    
    def close(self):
        """
        Close trajectory file and free resources.
        """

SingleFrameReaderBase

class SingleFrameReaderBase:
    """
    Base class for single-frame coordinate readers (e.g., PDB, GRO).
    """
    
    @property
    def n_frames(self):
        """
        Always returns 1 for single-frame readers.
        
        Returns
        -------
        int
            Always 1.
        """

Writer Base Classes

WriterBase

class WriterBase:
    """
    Base class for coordinate writers.
    """
    
    def __init__(self, filename, n_atoms, **kwargs):
        """
        Initialize coordinate writer.
        
        Parameters
        ----------
        filename : str
            Output filename.
        n_atoms : int
            Number of atoms to write.
        **kwargs
            Format-specific arguments.
        """
    
    def write(self, selection, ts=None):
        """
        Write coordinates for selected atoms.
        
        Parameters
        ----------
        selection : AtomGroup
            Atoms to write to file.
        ts : Timestep, optional
            Timestep object with coordinate data. If None, uses
            current coordinates from selection.
            
        Examples
        --------
        >>> with Writer("output.pdb", n_atoms=protein.n_atoms) as W:
        ...     for ts in u.trajectory:
        ...         W.write(protein)
        """
    
    def close(self):
        """
        Close output file and finalize writing.
        """
    
    def __enter__(self):
        """
        Context manager entry.
        
        Returns
        -------
        WriterBase
            Self for context manager usage.
        """
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        """
        Context manager exit, automatically closes file.
        """

Timestep Class

class Timestep:
    """
    Container for coordinate data from a single trajectory frame.
    """
    
    def __init__(self, n_atoms, **kwargs):
        """
        Create timestep for specified number of atoms.
        
        Parameters
        ----------
        n_atoms : int
            Number of atoms in the system.
        positions : bool, optional
            Whether to allocate position array (default True).
        velocities : bool, optional  
            Whether to allocate velocity array (default False).
        forces : bool, optional
            Whether to allocate force array (default False).
        """
    
    @property
    def positions(self):
        """
        Atomic coordinates for current frame.
        
        Returns
        -------
        numpy.ndarray
            Array of shape (n_atoms, 3) with atomic coordinates.
        """
    
    @property
    def velocities(self):
        """
        Atomic velocities for current frame.
        
        Returns
        -------
        numpy.ndarray or None
            Array of shape (n_atoms, 3) with velocities if available.
        """
    
    @property  
    def forces(self):
        """
        Atomic forces for current frame.
        
        Returns
        -------
        numpy.ndarray or None
            Array of shape (n_atoms, 3) with forces if available.
        """
    
    @property
    def dimensions(self):
        """
        Unit cell dimensions.
        
        Returns
        -------
        numpy.ndarray or None
            Array [a, b, c, alpha, beta, gamma] with box parameters.
        """
    
    @property
    def volume(self):
        """
        Unit cell volume.
        
        Returns
        -------
        float or None
            Volume in cubic Angstroms, None if no box information.
        """
    
    @property
    def time(self):
        """
        Simulation time for this frame.
        
        Returns
        -------
        float
            Time in picoseconds.
        """
    
    @property
    def frame(self):
        """
        Frame number in trajectory.
        
        Returns
        -------
        int
            Zero-based frame index.
        """
    
    def copy(self):
        """
        Create independent copy of timestep.
        
        Returns
        -------
        Timestep
            Deep copy of timestep with independent arrays.
        """

Format-Specific Features

GROMACS XTC/TRR

# XTC compressed trajectories
u = mda.Universe("topol.tpr", "trajectory.xtc")

# Access precision information
print(f"XTC precision: {u.trajectory.precision}")

# TRR full precision with velocities/forces
u = mda.Universe("topol.tpr", "trajectory.trr") 
if hasattr(u.trajectory.ts, 'velocities'):
    velocities = u.trajectory.ts.velocities

CHARMM/NAMD DCD

u = mda.Universe("system.psf", "trajectory.dcd")

# DCD supports fixed atoms
if hasattr(u.trajectory, 'fixed'):
    fixed_atoms = u.trajectory.fixed

# Periodic boundary information
dimensions = u.trajectory.ts.dimensions

AMBER NetCDF

u = mda.Universe("system.prmtop", "trajectory.nc")

# NetCDF trajectories support metadata
print(f"NetCDF conventions: {u.trajectory.Conventions}")
print(f"Application: {u.trajectory.application}")

I/O Usage Patterns

Reading Multiple Trajectories

# Concatenate multiple trajectory files
u = mda.Universe("topology.psf", "part1.dcd", "part2.dcd", "part3.dcd")

# All files treated as continuous trajectory
print(f"Total frames: {u.trajectory.n_frames}")

# Or load sequentially
u = mda.Universe("topology.psf", "part1.dcd") 
for additional in ["part2.dcd", "part3.dcd"]:
    u.load_new(additional)

Writing Trajectories

# Write subset of atoms
protein = u.select_atoms("protein")

with mda.Writer("protein_only.xtc", n_atoms=protein.n_atoms) as W:
    for ts in u.trajectory:
        W.write(protein)

# Write specific frames
with mda.Writer("every_10th.dcd", n_atoms=u.atoms.n_atoms) as W:
    for ts in u.trajectory[::10]:  # Every 10th frame
        W.write(u.atoms)

# Single frame output
u.atoms.write("final_frame.pdb")  # Current frame
u.trajectory[-1]  # Go to last frame
u.atoms.write("last_frame.gro")

Memory-Efficient Processing

# Process large trajectories in chunks
def process_in_chunks(universe, chunk_size=1000):
    n_frames = universe.trajectory.n_frames
    
    for start in range(0, n_frames, chunk_size):
        end = min(start + chunk_size, n_frames)
        
        # Load chunk into memory for fast access
        universe.transfer_to_memory(start=start, stop=end)
        
        # Process chunk
        for ts in universe.trajectory[start:end]:
            # Perform analysis
            pass

Format Conversion

def convert_trajectory(input_files, output_file, selection="all"):
    """
    Convert trajectory between formats.
    
    Parameters
    ----------
    input_files : tuple
        (topology, trajectory) file paths.
    output_file : str
        Output trajectory file.
    selection : str, optional
        Atom selection to write (default "all").
    """
    u = mda.Universe(*input_files)
    atoms = u.select_atoms(selection)
    
    with mda.Writer(output_file, n_atoms=atoms.n_atoms) as W:
        for ts in u.trajectory:
            W.write(atoms)

# Example: Convert AMBER to GROMACS
convert_trajectory(("system.prmtop", "trajectory.nc"), "output.xtc")

# Example: Extract protein only
convert_trajectory(("system.psf", "trajectory.dcd"), "protein.xtc", "protein")

Handling File Streams

import gzip
import bz2

# Compressed files (automatic detection)
with gzip.open("trajectory.xtc.gz", 'rb') as f:
    u = mda.Universe("topology.tpr", f)

# Multiple compressed trajectories  
u = mda.Universe("topology.tpr", "traj1.xtc.bz2", "traj2.xtc.gz")

# In-memory trajectories
from io import BytesIO
data = BytesIO(compressed_trajectory_data)
u = mda.Universe("topology.tpr", data, format="XTC")

Error Handling

from MDAnalysis.exceptions import NoDataError

try:
    u = mda.Universe("topology.psf", "trajectory.dcd")
except FileNotFoundError:
    print("Trajectory file not found")
except NoDataError as e:
    print(f"Missing required data: {e}")

# Check for optional data
if u.trajectory.ts.has_velocities:
    velocities = u.atoms.velocities
else:
    print("No velocity data available")

# Validate trajectory compatibility
if u.atoms.n_atoms != u.trajectory.n_atoms:
    raise ValueError("Atom count mismatch between topology and trajectory")

Performance Considerations

Memory Usage

# Load trajectory into memory for repeated access
u.transfer_to_memory()  # Load all frames

# Partial loading for large trajectories
u.transfer_to_memory(start=0, stop=1000, step=10)  # Every 10th frame

# Memory-efficient single pass
for ts in u.trajectory:  # Streaming access
    # Process frame immediately
    pass

Random Access Performance

# Efficient for formats with index support (XTC, TRR, NetCDF)
u.trajectory[1000]  # Direct access to frame 1000

# Less efficient for sequential formats (DCD, ASCII)
# Consider loading into memory for random access
if u.trajectory.n_frames < 10000:  # Small enough for memory
    u.transfer_to_memory()
    
# Then random access is fast
u.trajectory[1000]

Install with Tessl CLI