An object-oriented toolkit to analyze molecular dynamics trajectories.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
MDAnalysis provides comprehensive support for reading and writing molecular structure and trajectory data across many file formats commonly used in molecular dynamics simulations.
The I/O system in MDAnalysis is built around three main components:
All I/O operations use a unified interface with automatic format detection based on file extensions.
def reader(filename, format=None, **kwargs):
"""
Get a trajectory reader for the specified file.
Parameters
----------
filename : str or file-like
Path to trajectory file or file-like object.
format : str, optional
File format override. If None, format is guessed from file extension.
**kwargs
Additional arguments passed to format-specific reader.
Returns
-------
ReaderBase
Trajectory reader object appropriate for the file format.
Examples
--------
>>> from MDAnalysis.coordinates import reader
>>> traj = reader("trajectory.xtc")
>>> for ts in traj:
... print(f"Frame {ts.frame}, Time: {ts.time}")
"""def Writer(filename, n_atoms=None, format=None, multiframe=None, **kwargs):
"""
Create a trajectory writer for the specified file format.
Parameters
----------
filename : str or file-like
Output filename or file-like object.
n_atoms : int, optional
Number of atoms in the system (required for some formats).
format : str, optional
Output format. If None, guessed from filename extension.
multiframe : bool, optional
Whether writer supports multiple frames. If None, determined automatically.
bonds : str, optional
How to handle bond information ('all', 'none', 'conect').
**kwargs
Additional format-specific arguments.
Returns
-------
WriterBase
Writer object for the specified format.
Examples
--------
>>> W = Writer("output.xtc", n_atoms=1000)
>>> for ts in u.trajectory:
... W.write(u.atoms)
>>> W.close()
>>> # Context manager usage
>>> with Writer("output.dcd", n_atoms=u.atoms.n_atoms) as W:
... for ts in u.trajectory:
... W.write(u.atoms)
"""MDAnalysis supports reading topology information from these formats:
# PSF (CHARMM/NAMD Topology)
u = mda.Universe("system.psf", "trajectory.dcd")
# CRD (CHARMM Coordinate)
u = mda.Universe("coordinates.crd")Capabilities:
# TPR (GROMACS Binary Topology)
u = mda.Universe("topol.tpr", "trajectory.xtc")
# GRO (GROMACS Structure)
u = mda.Universe("system.gro")
# TOP/ITP (GROMACS Text Topology) - limited support
u = mda.Universe("topol.top")Capabilities:
# PRMTOP (AMBER Topology)
u = mda.Universe("system.prmtop", "trajectory.nc")
# INPCRD (AMBER Coordinate)
u = mda.Universe("coordinates.inpcrd")Capabilities:
# PDB (Protein Data Bank)
u = mda.Universe("structure.pdb")
# PQR (PDB with Charges and Radii)
u = mda.Universe("system.pqr")
# MOL2 (Tripos Molecular Structure)
u = mda.Universe("molecule.mol2")
# PDBQT (AutoDock format)
u = mda.Universe("protein.pdbqt")# DCD (CHARMM/NAMD/LAMMPS)
u = mda.Universe("topology.psf", "trajectory.dcd")
# XTC (GROMACS Compressed)
u = mda.Universe("topol.tpr", "trajectory.xtc")
# TRR (GROMACS Full Precision)
u = mda.Universe("topol.tpr", "trajectory.trr")
# TNG (Trajectory Next Generation)
u = mda.Universe("topol.tpr", "trajectory.tng")
# NetCDF (AMBER NetCDF)
u = mda.Universe("system.prmtop", "trajectory.nc")# XYZ (Generic Coordinate)
u = mda.Universe("trajectory.xyz")
# LAMMPS Trajectory
u = mda.Universe("data.lammps", "dump.lammpstrj")
# AMBER ASCII Trajectory
u = mda.Universe("system.prmtop", "mdcrd")class ReaderBase:
"""
Base class for trajectory readers supporting multiple frames.
"""
@property
def n_frames(self):
"""
Total number of frames in trajectory.
Returns
-------
int
Number of trajectory frames.
"""
@property
def dt(self):
"""
Time step between frames.
Returns
-------
float
Time step in picoseconds.
"""
@property
def totaltime(self):
"""
Total simulation time span.
Returns
-------
float
Total time covered by trajectory in picoseconds.
"""
def __iter__(self):
"""
Iterate through all frames in trajectory.
Yields
------
Timestep
Timestep object for each frame.
Examples
--------
>>> for ts in u.trajectory:
... print(f"Time: {ts.time}, Frame: {ts.frame}")
"""
def __getitem__(self, frame):
"""
Access specific frame(s) by index.
Parameters
----------
frame : int or slice
Frame index or slice object.
Returns
-------
Timestep
Timestep object for requested frame(s).
Examples
--------
>>> ts = u.trajectory[0] # First frame
>>> ts = u.trajectory[-1] # Last frame
>>> u.trajectory[10:20:2] # Slice with step
"""
def next(self):
"""
Advance to next frame.
Returns
-------
Timestep
Timestep object for next frame.
"""
def rewind(self):
"""
Return to first frame of trajectory.
Examples
--------
>>> u.trajectory.rewind()
>>> assert u.trajectory.frame == 0
"""
def close(self):
"""
Close trajectory file and free resources.
"""class SingleFrameReaderBase:
"""
Base class for single-frame coordinate readers (e.g., PDB, GRO).
"""
@property
def n_frames(self):
"""
Always returns 1 for single-frame readers.
Returns
-------
int
Always 1.
"""class WriterBase:
"""
Base class for coordinate writers.
"""
def __init__(self, filename, n_atoms, **kwargs):
"""
Initialize coordinate writer.
Parameters
----------
filename : str
Output filename.
n_atoms : int
Number of atoms to write.
**kwargs
Format-specific arguments.
"""
def write(self, selection, ts=None):
"""
Write coordinates for selected atoms.
Parameters
----------
selection : AtomGroup
Atoms to write to file.
ts : Timestep, optional
Timestep object with coordinate data. If None, uses
current coordinates from selection.
Examples
--------
>>> with Writer("output.pdb", n_atoms=protein.n_atoms) as W:
... for ts in u.trajectory:
... W.write(protein)
"""
def close(self):
"""
Close output file and finalize writing.
"""
def __enter__(self):
"""
Context manager entry.
Returns
-------
WriterBase
Self for context manager usage.
"""
def __exit__(self, exc_type, exc_val, exc_tb):
"""
Context manager exit, automatically closes file.
"""class Timestep:
"""
Container for coordinate data from a single trajectory frame.
"""
def __init__(self, n_atoms, **kwargs):
"""
Create timestep for specified number of atoms.
Parameters
----------
n_atoms : int
Number of atoms in the system.
positions : bool, optional
Whether to allocate position array (default True).
velocities : bool, optional
Whether to allocate velocity array (default False).
forces : bool, optional
Whether to allocate force array (default False).
"""
@property
def positions(self):
"""
Atomic coordinates for current frame.
Returns
-------
numpy.ndarray
Array of shape (n_atoms, 3) with atomic coordinates.
"""
@property
def velocities(self):
"""
Atomic velocities for current frame.
Returns
-------
numpy.ndarray or None
Array of shape (n_atoms, 3) with velocities if available.
"""
@property
def forces(self):
"""
Atomic forces for current frame.
Returns
-------
numpy.ndarray or None
Array of shape (n_atoms, 3) with forces if available.
"""
@property
def dimensions(self):
"""
Unit cell dimensions.
Returns
-------
numpy.ndarray or None
Array [a, b, c, alpha, beta, gamma] with box parameters.
"""
@property
def volume(self):
"""
Unit cell volume.
Returns
-------
float or None
Volume in cubic Angstroms, None if no box information.
"""
@property
def time(self):
"""
Simulation time for this frame.
Returns
-------
float
Time in picoseconds.
"""
@property
def frame(self):
"""
Frame number in trajectory.
Returns
-------
int
Zero-based frame index.
"""
def copy(self):
"""
Create independent copy of timestep.
Returns
-------
Timestep
Deep copy of timestep with independent arrays.
"""# XTC compressed trajectories
u = mda.Universe("topol.tpr", "trajectory.xtc")
# Access precision information
print(f"XTC precision: {u.trajectory.precision}")
# TRR full precision with velocities/forces
u = mda.Universe("topol.tpr", "trajectory.trr")
if hasattr(u.trajectory.ts, 'velocities'):
velocities = u.trajectory.ts.velocitiesu = mda.Universe("system.psf", "trajectory.dcd")
# DCD supports fixed atoms
if hasattr(u.trajectory, 'fixed'):
fixed_atoms = u.trajectory.fixed
# Periodic boundary information
dimensions = u.trajectory.ts.dimensionsu = mda.Universe("system.prmtop", "trajectory.nc")
# NetCDF trajectories support metadata
print(f"NetCDF conventions: {u.trajectory.Conventions}")
print(f"Application: {u.trajectory.application}")# Concatenate multiple trajectory files
u = mda.Universe("topology.psf", "part1.dcd", "part2.dcd", "part3.dcd")
# All files treated as continuous trajectory
print(f"Total frames: {u.trajectory.n_frames}")
# Or load sequentially
u = mda.Universe("topology.psf", "part1.dcd")
for additional in ["part2.dcd", "part3.dcd"]:
u.load_new(additional)# Write subset of atoms
protein = u.select_atoms("protein")
with mda.Writer("protein_only.xtc", n_atoms=protein.n_atoms) as W:
for ts in u.trajectory:
W.write(protein)
# Write specific frames
with mda.Writer("every_10th.dcd", n_atoms=u.atoms.n_atoms) as W:
for ts in u.trajectory[::10]: # Every 10th frame
W.write(u.atoms)
# Single frame output
u.atoms.write("final_frame.pdb") # Current frame
u.trajectory[-1] # Go to last frame
u.atoms.write("last_frame.gro")# Process large trajectories in chunks
def process_in_chunks(universe, chunk_size=1000):
n_frames = universe.trajectory.n_frames
for start in range(0, n_frames, chunk_size):
end = min(start + chunk_size, n_frames)
# Load chunk into memory for fast access
universe.transfer_to_memory(start=start, stop=end)
# Process chunk
for ts in universe.trajectory[start:end]:
# Perform analysis
passdef convert_trajectory(input_files, output_file, selection="all"):
"""
Convert trajectory between formats.
Parameters
----------
input_files : tuple
(topology, trajectory) file paths.
output_file : str
Output trajectory file.
selection : str, optional
Atom selection to write (default "all").
"""
u = mda.Universe(*input_files)
atoms = u.select_atoms(selection)
with mda.Writer(output_file, n_atoms=atoms.n_atoms) as W:
for ts in u.trajectory:
W.write(atoms)
# Example: Convert AMBER to GROMACS
convert_trajectory(("system.prmtop", "trajectory.nc"), "output.xtc")
# Example: Extract protein only
convert_trajectory(("system.psf", "trajectory.dcd"), "protein.xtc", "protein")import gzip
import bz2
# Compressed files (automatic detection)
with gzip.open("trajectory.xtc.gz", 'rb') as f:
u = mda.Universe("topology.tpr", f)
# Multiple compressed trajectories
u = mda.Universe("topology.tpr", "traj1.xtc.bz2", "traj2.xtc.gz")
# In-memory trajectories
from io import BytesIO
data = BytesIO(compressed_trajectory_data)
u = mda.Universe("topology.tpr", data, format="XTC")from MDAnalysis.exceptions import NoDataError
try:
u = mda.Universe("topology.psf", "trajectory.dcd")
except FileNotFoundError:
print("Trajectory file not found")
except NoDataError as e:
print(f"Missing required data: {e}")
# Check for optional data
if u.trajectory.ts.has_velocities:
velocities = u.atoms.velocities
else:
print("No velocity data available")
# Validate trajectory compatibility
if u.atoms.n_atoms != u.trajectory.n_atoms:
raise ValueError("Atom count mismatch between topology and trajectory")# Load trajectory into memory for repeated access
u.transfer_to_memory() # Load all frames
# Partial loading for large trajectories
u.transfer_to_memory(start=0, stop=1000, step=10) # Every 10th frame
# Memory-efficient single pass
for ts in u.trajectory: # Streaming access
# Process frame immediately
pass# Efficient for formats with index support (XTC, TRR, NetCDF)
u.trajectory[1000] # Direct access to frame 1000
# Less efficient for sequential formats (DCD, ASCII)
# Consider loading into memory for random access
if u.trajectory.n_frames < 10000: # Small enough for memory
u.transfer_to_memory()
# Then random access is fast
u.trajectory[1000]Install with Tessl CLI
npx tessl i tessl/pypi-mdanalysis