An object-oriented toolkit to analyze molecular dynamics trajectories.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
MDAnalysis provides a comprehensive suite of analysis tools for studying molecular dynamics trajectories. These tools follow a consistent interface based on the AnalysisBase class and provide standardized results storage.
All analysis classes inherit from AnalysisBase, providing a consistent workflow:
from MDAnalysis.analysis.base import AnalysisBase
class AnalysisBase:
"""
Base class for trajectory analysis with standardized workflow.
Provides common functionality for frame iteration, progress reporting,
and results storage.
"""
def __init__(self, trajectory, verbose=False, **kwargs):
"""
Initialize analysis base.
Parameters
----------
trajectory : Universe or AtomGroup
MDAnalysis Universe or AtomGroup with trajectory information.
verbose : bool, optional
Whether to show progress bar during analysis (default False).
**kwargs
Additional arguments for specific analysis.
"""
def run(self, start=None, stop=None, step=None, frames=None, verbose=None):
"""
Execute the analysis over specified trajectory frames.
Parameters
----------
start : int, optional
First frame to analyze (default None for beginning).
stop : int, optional
Last frame to analyze (default None for end).
step : int, optional
Step size for frame iteration (default None for 1).
frames : array-like, optional
Specific frame indices to analyze.
verbose : bool, optional
Override verbose setting for this run.
Returns
-------
self
Returns self to allow method chaining.
Examples
--------
>>> analysis = SomeAnalysis(u, selection1, selection2)
>>> analysis.run(start=100, stop=500, step=10)
>>> results = analysis.results
"""
def _prepare(self):
"""
Prepare analysis before trajectory iteration.
Override in subclasses to initialize data structures
and validate input parameters.
"""
def _single_frame(self):
"""
Analyze a single trajectory frame.
Override in subclasses to implement per-frame calculations.
This method is called for each frame during run().
"""
def _conclude(self):
"""
Finalize analysis after trajectory iteration.
Override in subclasses to perform post-processing
and populate final results.
"""
# Standard usage pattern
analysis = SomeAnalysis(universe, parameters...)
analysis.run()
results = analysis.resultsfrom MDAnalysis.analysis.results import Results
class Results:
"""
Container for analysis results with attribute access.
Provides dictionary-like access to results with additional
functionality for data management.
"""
def __init__(self, *args, **kwargs):
"""
Create results container.
Parameters can be provided as dictionaries or keyword arguments.
"""
def __getattr__(self, name):
"""
Access results as attributes.
Examples
--------
>>> results.rmsd # Access RMSD array
>>> results.times # Access time points
"""
def __setattr__(self, name, value):
"""
Set result values as attributes.
"""
def __contains__(self, name):
"""
Check if result exists.
"""from MDAnalysis.analysis import align, rms
# Structural alignment
def alignto(mobile, reference, select="all", weights=None, subsetA=None,
subsetB=None, **kwargs):
"""
Align mobile AtomGroup to reference AtomGroup.
Parameters
----------
mobile : Universe or AtomGroup
Structure to be aligned (modified in place).
reference : Universe or AtomGroup
Reference structure for alignment.
select : str, optional
Selection string for atoms to use in alignment (default "all").
weights : str or array-like, optional
Weights for alignment fitting. Can be "mass" or array of weights.
subsetA : AtomGroup, optional
Specific atoms from mobile to use for fitting.
subsetB : AtomGroup, optional
Specific atoms from reference to use for fitting.
**kwargs
Additional arguments for fitting algorithm.
Returns
-------
dict
Dictionary containing transformation matrix and RMSD.
Examples
--------
>>> # Align protein backbone
>>> align.alignto(mobile_u, reference_u, select="backbone")
>>> # Mass-weighted alignment
>>> align.alignto(mobile_u, reference_u, select="name CA", weights="mass")
"""
class AlignTraj:
"""
Align trajectory to reference structure.
Performs structural alignment of each trajectory frame to a reference,
optionally writing aligned trajectory to file.
"""
def __init__(self, mobile, reference, select="all", filename=None,
weights=None, **kwargs):
"""
Initialize trajectory alignment.
Parameters
----------
mobile : Universe
Universe with trajectory to align.
reference : Universe or AtomGroup
Reference structure for alignment.
select : str, optional
Selection for alignment atoms (default "all").
filename : str, optional
Output file for aligned trajectory.
weights : str or array-like, optional
Weights for alignment ("mass" or custom weights).
**kwargs
Additional parameters.
Examples
--------
>>> aligner = align.AlignTraj(u, reference, select="backbone",
... filename="aligned.xtc")
>>> aligner.run()
"""
class RMSD:
"""
Calculate RMSD between trajectory and reference structure.
"""
def __init__(self, atomgroup, reference=None, select="all",
groupselections=None, **kwargs):
"""
Initialize RMSD calculation.
Parameters
----------
atomgroup : AtomGroup or Universe
Atoms for RMSD calculation.
reference : AtomGroup or Universe, optional
Reference structure (default first frame).
select : str, optional
Selection string for RMSD atoms (default "all").
groupselections : list, optional
List of selection strings for multiple group RMSD.
**kwargs
Additional parameters including mass weighting options.
Examples
--------
>>> R = rms.RMSD(u, select="backbone")
>>> R.run()
>>> rmsd_data = R.results.rmsd # Shape: (n_frames, 3) [frame, time, RMSD]
>>> # Multiple group RMSD
>>> R = rms.RMSD(u, groupselections=["backbone", "sidechain"])
>>> R.run()
>>> backbone_rmsd = R.results.rmsd[:, 2] # Backbone RMSD values
"""
@property
def results(self):
"""
RMSD results after running analysis.
Returns
-------
Results
Results object with rmsd attribute containing array of
shape (n_frames, 3 + n_groups) with columns:
[frame, time, rmsd_total, rmsd_group1, rmsd_group2, ...]
"""class RMSF:
"""
Calculate root mean square fluctuation (RMSF) of atoms.
RMSF measures atomic positional fluctuations around average positions.
"""
def __init__(self, atomgroup, **kwargs):
"""
Initialize RMSF calculation.
Parameters
----------
atomgroup : AtomGroup
Atoms for RMSF calculation.
**kwargs
Additional parameters for analysis setup.
Examples
--------
>>> ca_atoms = u.select_atoms("name CA")
>>> rmsf_analysis = rms.RMSF(ca_atoms)
>>> rmsf_analysis.run()
>>> rmsf_values = rmsf_analysis.results.rmsf
"""
@property
def results(self):
"""
RMSF results after running analysis.
Returns
-------
Results
Results object with rmsf attribute containing per-atom
RMSF values in Angstrom units.
"""from MDAnalysis.analysis import distances
def distance_array(reference, configuration, box=None, result=None, backend="serial"):
"""
Calculate distance array between two coordinate sets.
Parameters
----------
reference : array-like
Reference coordinates of shape (n, 3).
configuration : array-like
Configuration coordinates of shape (m, 3).
box : array-like, optional
Unit cell dimensions for periodic boundary conditions.
result : numpy.ndarray, optional
Pre-allocated result array of shape (n, m).
backend : str, optional
Computation backend ("serial" or "OpenMP").
Returns
-------
numpy.ndarray
Distance array of shape (n, m) containing all pairwise distances.
Examples
--------
>>> protein_pos = protein.positions
>>> water_pos = waters.positions
>>> dist_array = distances.distance_array(protein_pos, water_pos,
... box=u.dimensions)
"""
def self_distance_array(reference, box=None, result=None, backend="serial"):
"""
Calculate self-distance array (all pairwise distances within one set).
Parameters
----------
reference : array-like
Coordinates of shape (n, 3).
box : array-like, optional
Unit cell dimensions for periodic boundary conditions.
result : numpy.ndarray, optional
Pre-allocated result array of shape (n, n).
backend : str, optional
Computation backend ("serial" or "OpenMP").
Returns
-------
numpy.ndarray
Symmetric distance matrix of shape (n, n).
Examples
--------
>>> ca_positions = ca_atoms.positions
>>> ca_distances = distances.self_distance_array(ca_positions,
... box=u.dimensions)
"""
def contact_matrix(coordinates, cutoff=8.0, returntype="numpy", box=None):
"""
Calculate contact matrix based on distance cutoff.
Parameters
----------
coordinates : array-like
Atomic coordinates of shape (n, 3).
cutoff : float, optional
Distance cutoff for contacts in Angstrom (default 8.0).
returntype : str, optional
Return type ("numpy" for dense array, "sparse" for sparse matrix).
box : array-like, optional
Unit cell dimensions.
Returns
-------
numpy.ndarray or sparse matrix
Contact matrix where 1 indicates contact, 0 indicates no contact.
Examples
--------
>>> contacts = distances.contact_matrix(protein.positions, cutoff=6.0)
>>> contact_count = np.sum(contacts)
"""
class InterRDF:
"""
Calculate intermolecular radial distribution function (RDF).
Computes g(r) between two atom groups, measuring probability of
finding atoms at distance r.
"""
def __init__(self, g1, g2, nbins=75, range=(0.0, 15.0), norm="rdf",
exclusion_block=None, **kwargs):
"""
Initialize RDF calculation.
Parameters
----------
g1 : AtomGroup
First atom group.
g2 : AtomGroup
Second atom group.
nbins : int, optional
Number of histogram bins (default 75).
range : tuple, optional
Distance range for RDF as (min, max) in Angstrom (default (0, 15)).
norm : str, optional
Normalization method ("rdf" or "density").
exclusion_block : tuple, optional
Exclude distances between atoms (n, m) apart in topology.
**kwargs
Additional parameters.
Examples
--------
>>> water_O = u.select_atoms("name OH2")
>>> protein = u.select_atoms("protein")
>>> rdf = distances.InterRDF(water_O, protein, range=(0, 10))
>>> rdf.run()
>>> r = rdf.results.bins # Distance bins
>>> gr = rdf.results.rdf # RDF values
"""
@property
def results(self):
"""
RDF results after running analysis.
Returns
-------
Results
Results object with:
- bins: distance bin centers
- rdf: radial distribution function g(r)
- count: raw histogram counts
"""from MDAnalysis.analysis import contacts
class Contacts:
"""
Calculate contacts between two atom groups over trajectory.
Monitors formation and breaking of contacts based on distance cutoff.
"""
def __init__(self, selection_one, selection_two, radius=4.5,
method="hard_cut", **kwargs):
"""
Initialize contact analysis.
Parameters
----------
selection_one : AtomGroup
First group of atoms.
selection_two : AtomGroup
Second group of atoms.
radius : float, optional
Contact distance cutoff in Angstrom (default 4.5).
method : str, optional
Contact detection method ("hard_cut" or "soft_cut").
**kwargs
Additional parameters for contact detection.
Examples
--------
>>> protein = u.select_atoms("protein")
>>> ligand = u.select_atoms("resname LIG")
>>> contacts_analysis = contacts.Contacts(protein, ligand, radius=3.5)
>>> contacts_analysis.run()
>>> contact_matrix = contacts_analysis.results.contact_matrix
"""
@property
def results(self):
"""
Contact analysis results.
Returns
-------
Results
Results object with:
- contact_matrix: binary matrix indicating contacts per frame
- timeseries: contact time series data
"""from MDAnalysis.analysis.hydrogenbonds import HydrogenBondAnalysis
class HydrogenBondAnalysis:
"""
Analyze hydrogen bonds in molecular dynamics trajectories.
Identifies hydrogen bonds based on geometric criteria and tracks
their formation/breaking over time.
"""
def __init__(self, universe, donors_sel=None, hydrogens_sel=None,
acceptors_sel=None, d_h_cutoff=1.2, d_a_cutoff=3.0,
d_h_a_angle_cutoff=150, **kwargs):
"""
Initialize hydrogen bond analysis.
Parameters
----------
universe : Universe
MDAnalysis Universe object.
donors_sel : str, optional
Selection string for hydrogen bond donor atoms.
hydrogens_sel : str, optional
Selection string for hydrogen atoms.
acceptors_sel : str, optional
Selection string for hydrogen bond acceptor atoms.
d_h_cutoff : float, optional
Maximum donor-hydrogen distance in Angstrom (default 1.2).
d_a_cutoff : float, optional
Maximum donor-acceptor distance in Angstrom (default 3.0).
d_h_a_angle_cutoff : float, optional
Minimum donor-hydrogen-acceptor angle in degrees (default 150).
**kwargs
Additional parameters.
Examples
--------
>>> # Analyze protein-water hydrogen bonds
>>> hbond_analysis = HydrogenBondAnalysis(
... universe=u,
... donors_sel="protein",
... acceptors_sel="resname SOL"
... )
>>> hbond_analysis.run()
>>> hbond_data = hbond_analysis.results.hbonds
"""
def guess_donors(self, selection="protein", max_missing=1):
"""
Automatically identify hydrogen bond donors.
Parameters
----------
selection : str, optional
Selection string for donor search (default "protein").
max_missing : int, optional
Maximum missing hydrogen atoms per donor (default 1).
Returns
-------
str
Selection string for identified donors.
"""
def guess_acceptors(self, selection="protein"):
"""
Automatically identify hydrogen bond acceptors.
Parameters
----------
selection : str, optional
Selection string for acceptor search (default "protein").
Returns
-------
str
Selection string for identified acceptors.
"""
@property
def results(self):
"""
Hydrogen bond analysis results.
Returns
-------
Results
Results object with:
- hbonds: array of hydrogen bond data per frame
- pairs: unique donor-acceptor pairs
- times: time points for each frame
"""
class WaterBridgeAnalysis:
"""
Analyze water-mediated hydrogen bonds between two selections.
Identifies hydrogen bonds where water molecules bridge interactions
between two molecular groups.
"""
def __init__(self, universe, selection1, selection2, water_selection="resname SOL",
order=1, **kwargs):
"""
Initialize water bridge analysis.
Parameters
----------
universe : Universe
MDAnalysis Universe object.
selection1 : str
First selection (e.g., "protein").
selection2 : str
Second selection (e.g., "resname LIG").
water_selection : str, optional
Selection for water molecules (default "resname SOL").
order : int, optional
Maximum number of water molecules in bridge (default 1).
**kwargs
Additional hydrogen bond criteria.
Examples
--------
>>> wb_analysis = WaterBridgeAnalysis(
... universe=u,
... selection1="protein",
... selection2="resname LIG",
... order=2 # Up to 2 water molecules
... )
>>> wb_analysis.run()
>>> bridges = wb_analysis.results.water_bridges
"""from MDAnalysis.analysis import pca
class PCA:
"""
Principal Component Analysis of atomic coordinate fluctuations.
Performs PCA on coordinate data to identify principal modes of motion.
"""
def __init__(self, universe, select="all", align=False, mean=None,
n_components=None, **kwargs):
"""
Initialize PCA calculation.
Parameters
----------
universe : Universe or AtomGroup
System for PCA analysis.
select : str, optional
Selection string for atoms (default "all").
align : bool, optional
Whether to align structures before PCA (default False).
mean : array-like, optional
Pre-computed mean structure for centering.
n_components : int, optional
Number of principal components to compute.
**kwargs
Additional parameters.
Examples
--------
>>> # PCA of backbone motion
>>> pca_analysis = pca.PCA(u, select="backbone", align=True)
>>> pca_analysis.run()
>>> eigenvalues = pca_analysis.results.variance
>>> eigenvectors = pca_analysis.results.p_components
"""
def transform(self, atomgroup, n_components=None):
"""
Project coordinates onto principal components.
Parameters
----------
atomgroup : AtomGroup
Atoms to project (must match PCA selection).
n_components : int, optional
Number of components for projection.
Returns
-------
numpy.ndarray
Projected coordinates in PC space.
"""
@property
def results(self):
"""
PCA analysis results.
Returns
-------
Results
Results object with:
- variance: eigenvalues (variance explained by each PC)
- p_components: principal component vectors
- variance_ratio: fraction of variance explained
"""from MDAnalysis.analysis import leaflet
class LeafletFinder:
"""
Identify membrane leaflets in lipid bilayer systems.
Uses phosphate positions and connectivity to determine upper
and lower leaflets of lipid bilayers.
"""
def __init__(self, universe, select="name P*", cutoff=15.0, pbc=True):
"""
Initialize leaflet identification.
Parameters
----------
universe : Universe
Universe containing membrane system.
select : str, optional
Selection for phosphate atoms (default "name P*").
cutoff : float, optional
Distance cutoff for leaflet assignment (default 15.0).
pbc : bool, optional
Whether to use periodic boundary conditions (default True).
Examples
--------
>>> leaflets = leaflet.LeafletFinder(u, select="name P", cutoff=12.0)
>>> upper_leaflet = leaflets.groups(0)
>>> lower_leaflet = leaflets.groups(1)
"""
def groups(self, leaflet_id=None):
"""
Get atoms belonging to specified leaflet.
Parameters
----------
leaflet_id : int, optional
Leaflet identifier. If None, returns list of all leaflets.
Returns
-------
AtomGroup or list
Atoms in specified leaflet or list of leaflet AtomGroups.
"""from MDAnalysis.analysis import polymer
class PersistenceLength:
"""
Calculate persistence length of polymer chains.
Measures the characteristic length scale over which chain
correlations decay exponentially.
"""
def __init__(self, atomgroups, **kwargs):
"""
Initialize persistence length calculation.
Parameters
----------
atomgroups : list of AtomGroup
List of AtomGroups representing polymer backbone atoms.
**kwargs
Additional parameters for calculation.
Examples
--------
>>> # DNA backbone analysis
>>> backbone_atoms = [u.select_atoms(f"segid {seg} and name P")
... for seg in ["DNA1", "DNA2"]]
>>> lp_analysis = polymer.PersistenceLength(backbone_atoms)
>>> lp_analysis.run()
>>> persistence_length = lp_analysis.results.lp
"""from MDAnalysis.analysis import msd
class MeanSquaredDisplacement:
"""
Calculate mean squared displacement for diffusion analysis.
Computes MSD as a function of lag time to analyze diffusive motion.
"""
def __init__(self, universe, select="all", msd_type="xyz", fft=True, **kwargs):
"""
Initialize MSD calculation.
Parameters
----------
universe : Universe
Universe with trajectory for MSD analysis.
select : str, optional
Selection string for atoms (default "all").
msd_type : str, optional
Type of MSD calculation ("xyz", "xy", "yz", "xz", "x", "y", "z").
fft : bool, optional
Whether to use FFT-based algorithm for efficiency (default True).
**kwargs
Additional parameters.
Examples
--------
>>> # Water diffusion analysis
>>> water_O = u.select_atoms("name OH2")
>>> msd_analysis = msd.MeanSquaredDisplacement(u, select="name OH2")
>>> msd_analysis.run()
>>> msd_data = msd_analysis.results.msd
>>> lag_times = msd_analysis.results.lagtimes
"""
@property
def results(self):
"""
MSD analysis results.
Returns
-------
Results
Results object with:
- msd: mean squared displacement values
- lagtimes: lag time values
"""# Standard analysis workflow
def analyze_trajectory(universe, output_prefix="analysis"):
"""
Comprehensive trajectory analysis example.
"""
# Structural analysis
protein = universe.select_atoms("protein")
# RMSD analysis
rmsd_analysis = rms.RMSD(protein, select="backbone")
rmsd_analysis.run()
# RMSF analysis
ca_atoms = protein.select_atoms("name CA")
rmsf_analysis = rms.RMSF(ca_atoms)
rmsf_analysis.run()
# Hydrogen bond analysis
hb_analysis = HydrogenBondAnalysis(universe)
hb_analysis.run()
# Save results
np.savetxt(f"{output_prefix}_rmsd.dat", rmsd_analysis.results.rmsd)
np.savetxt(f"{output_prefix}_rmsf.dat", rmsf_analysis.results.rmsf)
return {
'rmsd': rmsd_analysis.results,
'rmsf': rmsf_analysis.results,
'hbonds': hb_analysis.results
}
# Run analysis
results = analyze_trajectory(u, "protein_analysis")# Parallel analysis using multiprocessing
from MDAnalysis.analysis.base import AnalysisFromFunction
import multiprocessing as mp
def frame_analysis(ag):
"""Per-frame analysis function."""
return {
'rgyr': ag.radius_of_gyration(),
'com': ag.center_of_mass()
}
# Run in parallel
protein = u.select_atoms("protein")
parallel_analysis = AnalysisFromFunction(frame_analysis, protein)
parallel_analysis.run(n_jobs=mp.cpu_count())
results = parallel_analysis.resultsInstall with Tessl CLI
npx tessl i tessl/pypi-mdanalysis