CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-mdanalysis

An object-oriented toolkit to analyze molecular dynamics trajectories.

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

analysis-tools.mddocs/

Analysis Tools

MDAnalysis provides a comprehensive suite of analysis tools for studying molecular dynamics trajectories. These tools follow a consistent interface based on the AnalysisBase class and provide standardized results storage.

Analysis Framework

AnalysisBase Class

All analysis classes inherit from AnalysisBase, providing a consistent workflow:

from MDAnalysis.analysis.base import AnalysisBase

class AnalysisBase:
    """
    Base class for trajectory analysis with standardized workflow.
    
    Provides common functionality for frame iteration, progress reporting,
    and results storage.
    """
    
    def __init__(self, trajectory, verbose=False, **kwargs):
        """
        Initialize analysis base.
        
        Parameters
        ----------
        trajectory : Universe or AtomGroup  
            MDAnalysis Universe or AtomGroup with trajectory information.
        verbose : bool, optional
            Whether to show progress bar during analysis (default False).
        **kwargs
            Additional arguments for specific analysis.
        """
    
    def run(self, start=None, stop=None, step=None, frames=None, verbose=None):
        """
        Execute the analysis over specified trajectory frames.
        
        Parameters
        ----------
        start : int, optional
            First frame to analyze (default None for beginning).
        stop : int, optional  
            Last frame to analyze (default None for end).
        step : int, optional
            Step size for frame iteration (default None for 1).
        frames : array-like, optional
            Specific frame indices to analyze.
        verbose : bool, optional
            Override verbose setting for this run.
            
        Returns
        -------
        self
            Returns self to allow method chaining.
            
        Examples
        --------
        >>> analysis = SomeAnalysis(u, selection1, selection2)
        >>> analysis.run(start=100, stop=500, step=10)
        >>> results = analysis.results
        """
        
    def _prepare(self):
        """
        Prepare analysis before trajectory iteration.
        
        Override in subclasses to initialize data structures
        and validate input parameters.
        """
    
    def _single_frame(self):
        """
        Analyze a single trajectory frame.
        
        Override in subclasses to implement per-frame calculations.
        This method is called for each frame during run().
        """
    
    def _conclude(self):
        """
        Finalize analysis after trajectory iteration.
        
        Override in subclasses to perform post-processing
        and populate final results.
        """

# Standard usage pattern
analysis = SomeAnalysis(universe, parameters...)
analysis.run()
results = analysis.results

Results Storage

from MDAnalysis.analysis.results import Results

class Results:
    """
    Container for analysis results with attribute access.
    
    Provides dictionary-like access to results with additional
    functionality for data management.
    """
    
    def __init__(self, *args, **kwargs):
        """
        Create results container.
        
        Parameters can be provided as dictionaries or keyword arguments.
        """
    
    def __getattr__(self, name):
        """
        Access results as attributes.
        
        Examples
        --------
        >>> results.rmsd  # Access RMSD array
        >>> results.times  # Access time points
        """
    
    def __setattr__(self, name, value):
        """
        Set result values as attributes.
        """
        
    def __contains__(self, name):
        """
        Check if result exists.
        """

Structural Analysis

Alignment and RMSD

from MDAnalysis.analysis import align, rms

# Structural alignment
def alignto(mobile, reference, select="all", weights=None, subsetA=None, 
            subsetB=None, **kwargs):
    """
    Align mobile AtomGroup to reference AtomGroup.
    
    Parameters
    ----------
    mobile : Universe or AtomGroup
        Structure to be aligned (modified in place).
    reference : Universe or AtomGroup  
        Reference structure for alignment.
    select : str, optional
        Selection string for atoms to use in alignment (default "all").
    weights : str or array-like, optional
        Weights for alignment fitting. Can be "mass" or array of weights.
    subsetA : AtomGroup, optional
        Specific atoms from mobile to use for fitting.
    subsetB : AtomGroup, optional
        Specific atoms from reference to use for fitting.
    **kwargs
        Additional arguments for fitting algorithm.
        
    Returns
    -------
    dict
        Dictionary containing transformation matrix and RMSD.
        
    Examples
    --------
    >>> # Align protein backbone
    >>> align.alignto(mobile_u, reference_u, select="backbone")
    
    >>> # Mass-weighted alignment
    >>> align.alignto(mobile_u, reference_u, select="name CA", weights="mass")
    """

class AlignTraj:
    """
    Align trajectory to reference structure.
    
    Performs structural alignment of each trajectory frame to a reference,
    optionally writing aligned trajectory to file.
    """
    
    def __init__(self, mobile, reference, select="all", filename=None, 
                 weights=None, **kwargs):
        """
        Initialize trajectory alignment.
        
        Parameters
        ----------
        mobile : Universe
            Universe with trajectory to align.
        reference : Universe or AtomGroup
            Reference structure for alignment.
        select : str, optional
            Selection for alignment atoms (default "all").
        filename : str, optional
            Output file for aligned trajectory.
        weights : str or array-like, optional
            Weights for alignment ("mass" or custom weights).
        **kwargs
            Additional parameters.
            
        Examples
        --------
        >>> aligner = align.AlignTraj(u, reference, select="backbone",
        ...                          filename="aligned.xtc")
        >>> aligner.run()
        """

class RMSD:
    """
    Calculate RMSD between trajectory and reference structure.
    """
    
    def __init__(self, atomgroup, reference=None, select="all", 
                 groupselections=None, **kwargs):
        """
        Initialize RMSD calculation.
        
        Parameters
        ----------
        atomgroup : AtomGroup or Universe
            Atoms for RMSD calculation.
        reference : AtomGroup or Universe, optional
            Reference structure (default first frame).
        select : str, optional  
            Selection string for RMSD atoms (default "all").
        groupselections : list, optional
            List of selection strings for multiple group RMSD.
        **kwargs
            Additional parameters including mass weighting options.
            
        Examples
        --------
        >>> R = rms.RMSD(u, select="backbone")
        >>> R.run()
        >>> rmsd_data = R.results.rmsd  # Shape: (n_frames, 3) [frame, time, RMSD]
        
        >>> # Multiple group RMSD
        >>> R = rms.RMSD(u, groupselections=["backbone", "sidechain"])
        >>> R.run()
        >>> backbone_rmsd = R.results.rmsd[:, 2]  # Backbone RMSD values
        """
        
    @property
    def results(self):
        """
        RMSD results after running analysis.
        
        Returns
        -------
        Results
            Results object with rmsd attribute containing array of
            shape (n_frames, 3 + n_groups) with columns:
            [frame, time, rmsd_total, rmsd_group1, rmsd_group2, ...]
        """

Root Mean Square Fluctuation

class RMSF:
    """
    Calculate root mean square fluctuation (RMSF) of atoms.
    
    RMSF measures atomic positional fluctuations around average positions.
    """
    
    def __init__(self, atomgroup, **kwargs):
        """
        Initialize RMSF calculation.
        
        Parameters  
        ----------
        atomgroup : AtomGroup
            Atoms for RMSF calculation.
        **kwargs
            Additional parameters for analysis setup.
            
        Examples
        --------
        >>> ca_atoms = u.select_atoms("name CA")
        >>> rmsf_analysis = rms.RMSF(ca_atoms)
        >>> rmsf_analysis.run()
        >>> rmsf_values = rmsf_analysis.results.rmsf
        """
        
    @property
    def results(self):
        """
        RMSF results after running analysis.
        
        Returns
        -------
        Results
            Results object with rmsf attribute containing per-atom
            RMSF values in Angstrom units.
        """

Distance Analysis

Distance Calculations

from MDAnalysis.analysis import distances

def distance_array(reference, configuration, box=None, result=None, backend="serial"):
    """
    Calculate distance array between two coordinate sets.
    
    Parameters
    ----------
    reference : array-like
        Reference coordinates of shape (n, 3).
    configuration : array-like  
        Configuration coordinates of shape (m, 3).
    box : array-like, optional
        Unit cell dimensions for periodic boundary conditions.
    result : numpy.ndarray, optional
        Pre-allocated result array of shape (n, m).
    backend : str, optional
        Computation backend ("serial" or "OpenMP").
        
    Returns
    -------
    numpy.ndarray
        Distance array of shape (n, m) containing all pairwise distances.
        
    Examples
    --------
    >>> protein_pos = protein.positions  
    >>> water_pos = waters.positions
    >>> dist_array = distances.distance_array(protein_pos, water_pos, 
    ...                                       box=u.dimensions)
    """

def self_distance_array(reference, box=None, result=None, backend="serial"):
    """
    Calculate self-distance array (all pairwise distances within one set).
    
    Parameters
    ----------
    reference : array-like
        Coordinates of shape (n, 3).
    box : array-like, optional
        Unit cell dimensions for periodic boundary conditions.
    result : numpy.ndarray, optional
        Pre-allocated result array of shape (n, n).
    backend : str, optional
        Computation backend ("serial" or "OpenMP").
        
    Returns
    -------
    numpy.ndarray
        Symmetric distance matrix of shape (n, n).
        
    Examples
    --------
    >>> ca_positions = ca_atoms.positions
    >>> ca_distances = distances.self_distance_array(ca_positions, 
    ...                                            box=u.dimensions)
    """

def contact_matrix(coordinates, cutoff=8.0, returntype="numpy", box=None):
    """
    Calculate contact matrix based on distance cutoff.
    
    Parameters
    ----------
    coordinates : array-like
        Atomic coordinates of shape (n, 3).
    cutoff : float, optional
        Distance cutoff for contacts in Angstrom (default 8.0).
    returntype : str, optional  
        Return type ("numpy" for dense array, "sparse" for sparse matrix).
    box : array-like, optional
        Unit cell dimensions.
        
    Returns
    -------
    numpy.ndarray or sparse matrix
        Contact matrix where 1 indicates contact, 0 indicates no contact.
        
    Examples
    --------
    >>> contacts = distances.contact_matrix(protein.positions, cutoff=6.0)
    >>> contact_count = np.sum(contacts)
    """

class InterRDF:
    """
    Calculate intermolecular radial distribution function (RDF).
    
    Computes g(r) between two atom groups, measuring probability of
    finding atoms at distance r.
    """
    
    def __init__(self, g1, g2, nbins=75, range=(0.0, 15.0), norm="rdf", 
                 exclusion_block=None, **kwargs):
        """
        Initialize RDF calculation.
        
        Parameters
        ----------
        g1 : AtomGroup
            First atom group.
        g2 : AtomGroup  
            Second atom group.
        nbins : int, optional
            Number of histogram bins (default 75).
        range : tuple, optional
            Distance range for RDF as (min, max) in Angstrom (default (0, 15)).
        norm : str, optional
            Normalization method ("rdf" or "density").
        exclusion_block : tuple, optional
            Exclude distances between atoms (n, m) apart in topology.
        **kwargs
            Additional parameters.
            
        Examples
        --------
        >>> water_O = u.select_atoms("name OH2")  
        >>> protein = u.select_atoms("protein")
        >>> rdf = distances.InterRDF(water_O, protein, range=(0, 10))
        >>> rdf.run()
        >>> r = rdf.results.bins  # Distance bins
        >>> gr = rdf.results.rdf  # RDF values
        """
        
    @property
    def results(self):
        """
        RDF results after running analysis.
        
        Returns
        -------
        Results
            Results object with:
            - bins: distance bin centers
            - rdf: radial distribution function g(r)
            - count: raw histogram counts
        """

Contact Analysis

from MDAnalysis.analysis import contacts

class Contacts:
    """
    Calculate contacts between two atom groups over trajectory.
    
    Monitors formation and breaking of contacts based on distance cutoff.
    """
    
    def __init__(self, selection_one, selection_two, radius=4.5, 
                 method="hard_cut", **kwargs):
        """
        Initialize contact analysis.
        
        Parameters
        ----------
        selection_one : AtomGroup
            First group of atoms.
        selection_two : AtomGroup
            Second group of atoms.
        radius : float, optional
            Contact distance cutoff in Angstrom (default 4.5).
        method : str, optional
            Contact detection method ("hard_cut" or "soft_cut").
        **kwargs
            Additional parameters for contact detection.
            
        Examples
        --------
        >>> protein = u.select_atoms("protein")
        >>> ligand = u.select_atoms("resname LIG")
        >>> contacts_analysis = contacts.Contacts(protein, ligand, radius=3.5)
        >>> contacts_analysis.run()
        >>> contact_matrix = contacts_analysis.results.contact_matrix
        """
        
    @property  
    def results(self):
        """
        Contact analysis results.
        
        Returns
        -------
        Results
            Results object with:
            - contact_matrix: binary matrix indicating contacts per frame
            - timeseries: contact time series data
        """

Hydrogen Bond Analysis

Modern Hydrogen Bond Analysis

from MDAnalysis.analysis.hydrogenbonds import HydrogenBondAnalysis

class HydrogenBondAnalysis:
    """
    Analyze hydrogen bonds in molecular dynamics trajectories.
    
    Identifies hydrogen bonds based on geometric criteria and tracks
    their formation/breaking over time.
    """
    
    def __init__(self, universe, donors_sel=None, hydrogens_sel=None, 
                 acceptors_sel=None, d_h_cutoff=1.2, d_a_cutoff=3.0, 
                 d_h_a_angle_cutoff=150, **kwargs):
        """
        Initialize hydrogen bond analysis.
        
        Parameters
        ----------
        universe : Universe
            MDAnalysis Universe object.
        donors_sel : str, optional
            Selection string for hydrogen bond donor atoms.
        hydrogens_sel : str, optional
            Selection string for hydrogen atoms.
        acceptors_sel : str, optional
            Selection string for hydrogen bond acceptor atoms.
        d_h_cutoff : float, optional
            Maximum donor-hydrogen distance in Angstrom (default 1.2).
        d_a_cutoff : float, optional
            Maximum donor-acceptor distance in Angstrom (default 3.0).
        d_h_a_angle_cutoff : float, optional
            Minimum donor-hydrogen-acceptor angle in degrees (default 150).
        **kwargs
            Additional parameters.
            
        Examples
        --------
        >>> # Analyze protein-water hydrogen bonds
        >>> hbond_analysis = HydrogenBondAnalysis(
        ...     universe=u,
        ...     donors_sel="protein",
        ...     acceptors_sel="resname SOL"
        ... )
        >>> hbond_analysis.run()
        >>> hbond_data = hbond_analysis.results.hbonds
        """
    
    def guess_donors(self, selection="protein", max_missing=1):
        """
        Automatically identify hydrogen bond donors.
        
        Parameters
        ----------
        selection : str, optional
            Selection string for donor search (default "protein").
        max_missing : int, optional
            Maximum missing hydrogen atoms per donor (default 1).
            
        Returns
        -------
        str
            Selection string for identified donors.
        """
    
    def guess_acceptors(self, selection="protein"):
        """
        Automatically identify hydrogen bond acceptors.
        
        Parameters
        ----------
        selection : str, optional
            Selection string for acceptor search (default "protein").
            
        Returns
        -------
        str  
            Selection string for identified acceptors.
        """
    
    @property
    def results(self):
        """
        Hydrogen bond analysis results.
        
        Returns
        -------
        Results
            Results object with:
            - hbonds: array of hydrogen bond data per frame
            - pairs: unique donor-acceptor pairs
            - times: time points for each frame
        """

class WaterBridgeAnalysis:
    """
    Analyze water-mediated hydrogen bonds between two selections.
    
    Identifies hydrogen bonds where water molecules bridge interactions
    between two molecular groups.
    """
    
    def __init__(self, universe, selection1, selection2, water_selection="resname SOL",
                 order=1, **kwargs):
        """
        Initialize water bridge analysis.
        
        Parameters
        ----------
        universe : Universe
            MDAnalysis Universe object.
        selection1 : str
            First selection (e.g., "protein").
        selection2 : str  
            Second selection (e.g., "resname LIG").
        water_selection : str, optional
            Selection for water molecules (default "resname SOL").
        order : int, optional
            Maximum number of water molecules in bridge (default 1).
        **kwargs
            Additional hydrogen bond criteria.
            
        Examples
        --------
        >>> wb_analysis = WaterBridgeAnalysis(
        ...     universe=u,
        ...     selection1="protein", 
        ...     selection2="resname LIG",
        ...     order=2  # Up to 2 water molecules
        ... )
        >>> wb_analysis.run()
        >>> bridges = wb_analysis.results.water_bridges
        """

Principal Component Analysis

from MDAnalysis.analysis import pca

class PCA:
    """
    Principal Component Analysis of atomic coordinate fluctuations.
    
    Performs PCA on coordinate data to identify principal modes of motion.
    """
    
    def __init__(self, universe, select="all", align=False, mean=None, 
                 n_components=None, **kwargs):
        """
        Initialize PCA calculation.
        
        Parameters
        ----------
        universe : Universe or AtomGroup
            System for PCA analysis.
        select : str, optional
            Selection string for atoms (default "all").
        align : bool, optional
            Whether to align structures before PCA (default False).
        mean : array-like, optional
            Pre-computed mean structure for centering.
        n_components : int, optional
            Number of principal components to compute.
        **kwargs
            Additional parameters.
            
        Examples
        --------
        >>> # PCA of backbone motion
        >>> pca_analysis = pca.PCA(u, select="backbone", align=True)
        >>> pca_analysis.run()
        >>> eigenvalues = pca_analysis.results.variance
        >>> eigenvectors = pca_analysis.results.p_components
        """
    
    def transform(self, atomgroup, n_components=None):
        """
        Project coordinates onto principal components.
        
        Parameters
        ----------
        atomgroup : AtomGroup
            Atoms to project (must match PCA selection).
        n_components : int, optional
            Number of components for projection.
            
        Returns
        -------
        numpy.ndarray
            Projected coordinates in PC space.
        """
    
    @property
    def results(self):
        """
        PCA analysis results.
        
        Returns
        -------
        Results  
            Results object with:
            - variance: eigenvalues (variance explained by each PC)
            - p_components: principal component vectors
            - variance_ratio: fraction of variance explained
        """

Specialized Analysis Tools

Membrane Analysis

from MDAnalysis.analysis import leaflet

class LeafletFinder:
    """
    Identify membrane leaflets in lipid bilayer systems.
    
    Uses phosphate positions and connectivity to determine upper
    and lower leaflets of lipid bilayers.
    """
    
    def __init__(self, universe, select="name P*", cutoff=15.0, pbc=True):
        """
        Initialize leaflet identification.
        
        Parameters
        ----------
        universe : Universe
            Universe containing membrane system.
        select : str, optional
            Selection for phosphate atoms (default "name P*").  
        cutoff : float, optional
            Distance cutoff for leaflet assignment (default 15.0).
        pbc : bool, optional
            Whether to use periodic boundary conditions (default True).
            
        Examples
        --------
        >>> leaflets = leaflet.LeafletFinder(u, select="name P", cutoff=12.0)
        >>> upper_leaflet = leaflets.groups(0)
        >>> lower_leaflet = leaflets.groups(1)
        """
    
    def groups(self, leaflet_id=None):
        """
        Get atoms belonging to specified leaflet.
        
        Parameters
        ----------
        leaflet_id : int, optional
            Leaflet identifier. If None, returns list of all leaflets.
            
        Returns
        -------
        AtomGroup or list
            Atoms in specified leaflet or list of leaflet AtomGroups.
        """

Polymer Analysis

from MDAnalysis.analysis import polymer

class PersistenceLength:
    """
    Calculate persistence length of polymer chains.
    
    Measures the characteristic length scale over which chain
    correlations decay exponentially.
    """
    
    def __init__(self, atomgroups, **kwargs):
        """
        Initialize persistence length calculation.
        
        Parameters
        ----------
        atomgroups : list of AtomGroup
            List of AtomGroups representing polymer backbone atoms.
        **kwargs
            Additional parameters for calculation.
            
        Examples
        --------
        >>> # DNA backbone analysis
        >>> backbone_atoms = [u.select_atoms(f"segid {seg} and name P") 
        ...                  for seg in ["DNA1", "DNA2"]]
        >>> lp_analysis = polymer.PersistenceLength(backbone_atoms)
        >>> lp_analysis.run()
        >>> persistence_length = lp_analysis.results.lp
        """

Mean Squared Displacement

from MDAnalysis.analysis import msd

class MeanSquaredDisplacement:
    """
    Calculate mean squared displacement for diffusion analysis.
    
    Computes MSD as a function of lag time to analyze diffusive motion.
    """
    
    def __init__(self, universe, select="all", msd_type="xyz", fft=True, **kwargs):
        """
        Initialize MSD calculation.
        
        Parameters
        ----------
        universe : Universe
            Universe with trajectory for MSD analysis.
        select : str, optional
            Selection string for atoms (default "all").
        msd_type : str, optional  
            Type of MSD calculation ("xyz", "xy", "yz", "xz", "x", "y", "z").
        fft : bool, optional
            Whether to use FFT-based algorithm for efficiency (default True).
        **kwargs
            Additional parameters.
            
        Examples
        --------
        >>> # Water diffusion analysis
        >>> water_O = u.select_atoms("name OH2")
        >>> msd_analysis = msd.MeanSquaredDisplacement(u, select="name OH2")
        >>> msd_analysis.run()
        >>> msd_data = msd_analysis.results.msd
        >>> lag_times = msd_analysis.results.lagtimes
        """
        
    @property
    def results(self):
        """
        MSD analysis results.
        
        Returns
        -------
        Results
            Results object with:
            - msd: mean squared displacement values
            - lagtimes: lag time values
        """

Usage Patterns

Analysis Workflow

# Standard analysis workflow
def analyze_trajectory(universe, output_prefix="analysis"):
    """
    Comprehensive trajectory analysis example.
    """
    # Structural analysis
    protein = universe.select_atoms("protein") 
    
    # RMSD analysis
    rmsd_analysis = rms.RMSD(protein, select="backbone")
    rmsd_analysis.run()
    
    # RMSF analysis  
    ca_atoms = protein.select_atoms("name CA")
    rmsf_analysis = rms.RMSF(ca_atoms)
    rmsf_analysis.run()
    
    # Hydrogen bond analysis
    hb_analysis = HydrogenBondAnalysis(universe)
    hb_analysis.run()
    
    # Save results
    np.savetxt(f"{output_prefix}_rmsd.dat", rmsd_analysis.results.rmsd)
    np.savetxt(f"{output_prefix}_rmsf.dat", rmsf_analysis.results.rmsf)
    
    return {
        'rmsd': rmsd_analysis.results,
        'rmsf': rmsf_analysis.results, 
        'hbonds': hb_analysis.results
    }

# Run analysis
results = analyze_trajectory(u, "protein_analysis")

Parallel Analysis

# Parallel analysis using multiprocessing
from MDAnalysis.analysis.base import AnalysisFromFunction
import multiprocessing as mp

def frame_analysis(ag):
    """Per-frame analysis function."""
    return {
        'rgyr': ag.radius_of_gyration(),
        'com': ag.center_of_mass()
    }

# Run in parallel
protein = u.select_atoms("protein")
parallel_analysis = AnalysisFromFunction(frame_analysis, protein)
parallel_analysis.run(n_jobs=mp.cpu_count())

results = parallel_analysis.results

Install with Tessl CLI

npx tessl i tessl/pypi-mdanalysis

docs

analysis-tools.md

auxiliary-data.md

converters.md

coordinate-transformations.md

core-functionality.md

index.md

io-formats.md

selection-language.md

topology-handling.md

units-utilities.md

tile.json