tessl/pypi-mdanalysis

An object-oriented toolkit to analyze molecular dynamics trajectories.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Units and Utilities

Name: tessl/pypi-mdanalysis
Author: tessl

MDAnalysis provides comprehensive utilities for unit conversions, mathematical operations, and helper functions essential for molecular dynamics analysis. The units system ensures consistent handling of physical quantities across different simulation packages and analysis workflows.

Units System

Base Units in MDAnalysis

MDAnalysis uses a consistent set of base units for all internal calculations:

import MDAnalysis.units as units

# Base units dictionary
MDANALYSIS_BASE_UNITS = {
    'length': 'Angstrom',      # Å  
    'time': 'ps',              # picosecond
    'energy': 'kJ/mol',        # kilojoule per mole
    'charge': 'e',             # elementary charge
    'mass': 'u',               # atomic mass unit (amu)
    'force': 'kJ/(mol*Angstrom)', # kJ/(mol·Å)
    'speed': 'Angstrom/ps',    # Å/ps
    'density': 'u/Angstrom**3' # amu/Å³
}

# Access base units
print(units.MDANALYSIS_BASE_UNITS['length'])  # 'Angstrom'
print(units.MDANALYSIS_BASE_UNITS['energy'])  # 'kJ/mol'

Unit Conversion Functions

def get_conversion_factor(unit_type, u1, u2):
    """
    Get conversion factor between two units of the same type.
    
    Parameters
    ----------
    unit_type : str
        Type of physical quantity ('length', 'time', 'energy', etc.).
    u1 : str
        Source unit.
    u2 : str
        Target unit.
        
    Returns
    -------
    float
        Conversion factor such that value_u2 = value_u1 * factor.
        
    Examples
    --------
    >>> # Length conversions
    >>> factor = units.get_conversion_factor('length', 'Angstrom', 'nm')
    >>> print(factor)  # 0.1 (1 Å = 0.1 nm)
    
    >>> # Energy conversions  
    >>> factor = units.get_conversion_factor('energy', 'kcal/mol', 'kJ/mol')
    >>> print(factor)  # 4.184 (1 kcal/mol = 4.184 kJ/mol)
    
    >>> # Time conversions
    >>> factor = units.get_conversion_factor('time', 'ps', 'ns')
    >>> print(factor)  # 0.001 (1 ps = 0.001 ns)
    """

def convert(values, u1, u2):
    """
    Convert values between units of the same physical quantity.
    
    Parameters
    ----------
    values : float or array-like
        Value(s) to convert.
    u1 : str  
        Source unit.
    u2 : str
        Target unit.
        
    Returns
    -------
    float or numpy.ndarray
        Converted value(s) in target units.
        
    Examples
    --------
    >>> # Convert distances
    >>> dist_nm = units.convert(10.0, 'Angstrom', 'nm')  
    >>> print(dist_nm)  # 1.0 nm
    
    >>> # Convert array of energies
    >>> energies_kcal = [1.0, 2.5, 3.8]
    >>> energies_kj = units.convert(energies_kcal, 'kcal/mol', 'kJ/mol')
    >>> print(energies_kj)  # [4.184, 10.46, 15.8992] kJ/mol
    
    >>> # Convert temperature to energy (thermal energy)
    >>> temp_K = 300.0
    >>> energy_kj = units.convert(temp_K, 'K', 'kJ/mol', 'energy')
    >>> print(energy_kj)  # ~2.494 kJ/mol (kT at 300K)
    """

Supported Unit Categories

Length Units

# Length unit conversions
length_units = {
    'Angstrom': 1.0,        # Base unit  
    'nm': 0.1,              # nanometer
    'pm': 100.0,            # picometer
    'fm': 1e5,              # femtometer
    'meter': 1e-10,         # meter
    'cm': 1e-8,             # centimeter
    'mm': 1e-7,             # millimeter
    'inch': 3.937e-9,       # inch
    'ft': 3.281e-10,        # foot
    'Bohr': 1.889726125,    # Bohr radius
}

# Examples
distances_angstrom = [1.0, 5.0, 10.0, 15.0]
distances_nm = units.convert(distances_angstrom, 'Angstrom', 'nm')
distances_pm = units.convert(distances_angstrom, 'Angstrom', 'pm')

print(f"Distances in Å: {distances_angstrom}")
print(f"Distances in nm: {distances_nm}")  
print(f"Distances in pm: {distances_pm}")

Time Units

# Time unit conversions
time_units = {
    'ps': 1.0,              # Base unit - picosecond
    'fs': 1000.0,           # femtosecond  
    'ns': 0.001,            # nanosecond
    'us': 1e-6,             # microsecond
    'ms': 1e-9,             # millisecond
    's': 1e-12,             # second
    'AKMA': 4.888821e-2,    # CHARMM internal time unit
}

# Examples  
times_ps = [0.1, 1.0, 10.0, 100.0]
times_ns = units.convert(times_ps, 'ps', 'ns')
times_fs = units.convert(times_ps, 'ps', 'fs')

print(f"Times in ps: {times_ps}")
print(f"Times in ns: {times_ns}")
print(f"Times in fs: {times_fs}")

Energy Units

# Energy unit conversions
energy_units = {
    'kJ/mol': 1.0,          # Base unit
    'kcal/mol': 1/4.184,    # kilocalorie per mole
    'eV': 96.485332,        # electron volt  
    'Hartree': 2625.5,      # Hartree (atomic unit)
    'Ry': 1312.75,          # Rydberg
    'J/mol': 0.001,         # joule per mole
    'K': 1/0.008314462618,  # Kelvin (thermal energy kT)
    'AKMA': 4.184,          # CHARMM internal energy
}

# Examples
energies_kj = [1.0, 10.0, 100.0]  
energies_kcal = units.convert(energies_kj, 'kJ/mol', 'kcal/mol')
energies_eV = units.convert(energies_kj, 'kJ/mol', 'eV')

print(f"Energies in kJ/mol: {energies_kj}")
print(f"Energies in kcal/mol: {energies_kcal}")
print(f"Energies in eV: {energies_eV}")

# Temperature to thermal energy conversion
temperature = 300.0  # K
thermal_energy = units.convert(temperature, 'K', 'kJ/mol')
print(f"Thermal energy at {temperature}K: {thermal_energy:.3f} kJ/mol")

Mass and Density Units

# Mass units
mass_units = {
    'u': 1.0,               # Base unit - atomic mass unit (amu)
    'amu': 1.0,             # alias for u
    'Da': 1.0,              # Dalton  
    'kDa': 0.001,           # kiloDalton
    'g/mol': 1.0,           # gram per mole (numerically same as amu)
    'kg': 1.66054e-27,      # kilogram
    'g': 1.66054e-24,       # gram
}

# Density units  
density_units = {
    'u/Angstrom**3': 1.0,         # Base unit
    'g/cm**3': 1.66054,           # gram per cubic centimeter
    'kg/m**3': 1660.54,           # kilogram per cubic meter
    'g/ml': 1.66054,              # gram per milliliter
}

# Examples
masses_amu = [12.01, 14.007, 15.999]  # C, N, O
masses_g = units.convert(masses_amu, 'u', 'g')
masses_kg = units.convert(masses_amu, 'u', 'kg')

print(f"Masses in amu: {masses_amu}")
print(f"Masses in g: {masses_g}")
print(f"Masses in kg: {masses_kg}")

Mathematical Utilities

Distance Calculations

from MDAnalysis.lib import distances

def distance_array(reference, configuration, box=None, result=None, backend="serial"):
    """
    Calculate distance matrix between two coordinate sets.
    
    Parameters
    ----------
    reference : array-like
        Reference coordinates of shape (n, 3).
    configuration : array-like
        Configuration coordinates of shape (m, 3).  
    box : array-like, optional
        Unit cell dimensions [a, b, c, alpha, beta, gamma] for
        periodic boundary condition handling.
    result : numpy.ndarray, optional
        Pre-allocated result array of shape (n, m).
    backend : {'serial', 'OpenMP'}, optional
        Computational backend (default 'serial').
        
    Returns
    -------
    numpy.ndarray
        Distance array of shape (n, m) where element [i,j] is the
        distance between reference[i] and configuration[j].
        
    Examples
    --------
    >>> import numpy as np
    >>> coords1 = np.random.random((100, 3)) * 10
    >>> coords2 = np.random.random((50, 3)) * 10
    >>> 
    >>> # Calculate all pairwise distances
    >>> dist_matrix = distances.distance_array(coords1, coords2)
    >>> print(dist_matrix.shape)  # (100, 50)
    >>> 
    >>> # With periodic boundaries
    >>> box = [20.0, 20.0, 20.0, 90.0, 90.0, 90.0]
    >>> dist_matrix_pbc = distances.distance_array(coords1, coords2, box=box)
    """

def self_distance_array(reference, box=None, result=None, backend="serial"):
    """
    Calculate self-distance matrix (all pairwise distances within one set).
    
    Parameters
    ----------
    reference : array-like
        Coordinates of shape (n, 3).
    box : array-like, optional
        Unit cell dimensions for PBC.
    result : numpy.ndarray, optional
        Pre-allocated result array of shape (n, n).
    backend : {'serial', 'OpenMP'}, optional
        Computational backend.
        
    Returns
    -------
    numpy.ndarray
        Symmetric distance matrix of shape (n, n).
        
    Examples
    --------
    >>> coords = protein.select_atoms("name CA").positions
    >>> ca_distances = distances.self_distance_array(coords)
    >>> 
    >>> # Distance between CA atoms 0 and 10
    >>> dist_0_10 = ca_distances[0, 10]
    >>> 
    >>> # Find closest CA atom to atom 0 (excluding itself)
    >>> distances_from_0 = ca_distances[0].copy()
    >>> distances_from_0[0] = np.inf  # Exclude self
    >>> closest_idx = np.argmin(distances_from_0)
    >>> closest_distance = distances_from_0[closest_idx]
    """

def calc_bonds(coordinates1, coordinates2, box=None, result=None, backend="serial"):
    """
    Calculate distances between pairs of coordinates (bonds).
    
    Parameters
    ----------
    coordinates1 : array-like
        First set of coordinates, shape (n, 3).
    coordinates2 : array-like  
        Second set of coordinates, shape (n, 3).
    box : array-like, optional
        Unit cell dimensions.
    result : numpy.ndarray, optional
        Pre-allocated result array of shape (n,).
    backend : {'serial', 'OpenMP'}, optional
        Computational backend.
        
    Returns
    -------
    numpy.ndarray
        Array of distances between coordinate pairs.
        
    Examples
    --------
    >>> # Calculate bond lengths
    >>> bonds = u.atoms.bonds
    >>> coord1 = u.atoms[bonds.indices[:, 0]].positions
    >>> coord2 = u.atoms[bonds.indices[:, 1]].positions
    >>> bond_lengths = distances.calc_bonds(coord1, coord2, box=u.dimensions)
    >>> 
    >>> print(f"Average bond length: {np.mean(bond_lengths):.3f} Å")
    >>> print(f"Bond length range: {np.min(bond_lengths):.3f} - {np.max(bond_lengths):.3f} Å")
    """

def calc_angles(coordinates1, coordinates2, coordinates3, box=None, result=None):
    """
    Calculate angles between three coordinate sets.
    
    Parameters
    ----------
    coordinates1 : array-like
        First coordinates (angle vertex), shape (n, 3).
    coordinates2 : array-like
        Second coordinates (angle center), shape (n, 3).  
    coordinates3 : array-like
        Third coordinates (angle vertex), shape (n, 3).
    box : array-like, optional
        Unit cell dimensions.
    result : numpy.ndarray, optional
        Pre-allocated result array.
        
    Returns
    -------
    numpy.ndarray
        Array of angles in radians.
        
    Examples
    --------
    >>> # Calculate backbone angles
    >>> angles = u.atoms.angles
    >>> coord1 = u.atoms[angles.indices[:, 0]].positions  
    >>> coord2 = u.atoms[angles.indices[:, 1]].positions
    >>> coord3 = u.atoms[angles.indices[:, 2]].positions
    >>> angle_values = distances.calc_angles(coord1, coord2, coord3)
    >>> angle_degrees = np.degrees(angle_values)
    >>> 
    >>> print(f"Average angle: {np.mean(angle_degrees):.1f}°")
    """

def calc_dihedrals(coord1, coord2, coord3, coord4, box=None, result=None):
    """
    Calculate dihedral angles between four coordinate sets.
    
    Parameters
    ----------
    coord1, coord2, coord3, coord4 : array-like
        Coordinates defining dihedral angles, each shape (n, 3).
    box : array-like, optional
        Unit cell dimensions.
    result : numpy.ndarray, optional
        Pre-allocated result array.
        
    Returns
    -------
    numpy.ndarray
        Array of dihedral angles in radians (-π to π).
        
    Examples
    --------
    >>> # Calculate backbone phi angles
    >>> dihedrals = u.atoms.dihedrals  
    >>> c1 = u.atoms[dihedrals.indices[:, 0]].positions
    >>> c2 = u.atoms[dihedrals.indices[:, 1]].positions
    >>> c3 = u.atoms[dihedrals.indices[:, 2]].positions  
    >>> c4 = u.atoms[dihedrals.indices[:, 3]].positions
    >>> dihedral_values = distances.calc_dihedrals(c1, c2, c3, c4)
    >>> dihedral_degrees = np.degrees(dihedral_values)
    """

Neighbor Searching

from MDAnalysis.lib import NeighborSearch

class NeighborSearch:
    """
    Fast neighbor searching using spatial data structures.
    
    Uses KDTree-based algorithms for efficient distance-based queries.
    """
    
    def __init__(self, atom_list, box=None, bucket_size=10):
        """
        Initialize neighbor search structure.
        
        Parameters
        ----------
        atom_list : AtomGroup or array-like
            Atoms to include in search tree or coordinate array.
        box : array-like, optional
            Unit cell dimensions for periodic boundary conditions.
        bucket_size : int, optional
            Bucket size for tree construction (default 10).
            
        Examples
        --------
        >>> # Search in protein atoms
        >>> protein = u.select_atoms("protein")
        >>> ns = NeighborSearch(protein, box=u.dimensions)
        
        >>> # Search in coordinate array
        >>> coords = protein.positions
        >>> ns = NeighborSearch(coords, box=u.dimensions)
        """
    
    def search(self, atoms, radius, level='A'):
        """
        Find neighbors within radius.
        
        Parameters
        ----------
        atoms : AtomGroup or array-like
            Query atoms or coordinates.
        radius : float
            Search radius in Angstrom.
        level : {'A', 'R', 'S'}, optional
            Return level: 'A' for atoms, 'R' for residues, 'S' for segments.
            
        Returns
        -------
        AtomGroup
            Atoms within radius of query atoms.
            
        Examples
        --------
        >>> # Find atoms within 5Å of ligand
        >>> ligand = u.select_atoms("resname LIG")
        >>> nearby = ns.search(ligand, 5.0)
        >>> 
        >>> # Find residues within 8Å  
        >>> nearby_residues = ns.search(ligand, 8.0, level='R')
        """

class AtomNeighborSearch(NeighborSearch):
    """Specialized neighbor search for AtomGroups with additional features."""
    
    def search(self, atoms, radius, level='A'):
        """Enhanced search with AtomGroup-specific features."""
        
    def search_pairs(self, radius):
        """
        Find all atom pairs within radius.
        
        Parameters
        ----------
        radius : float
            Distance cutoff for pair identification.
            
        Returns
        -------
        numpy.ndarray
            Array of shape (n_pairs, 2) containing atom index pairs.
            
        Examples
        --------
        >>> # Find all close contacts
        >>> pairs = ns.search_pairs(3.0)
        >>> print(f"Found {len(pairs)} contacts within 3.0 Å")
        """

Mathematical Functions

from MDAnalysis.lib import mdamath

def norm(v):
    """
    Calculate vector norm (magnitude).
    
    Parameters
    ----------
    v : array-like
        Input vector or array of vectors.
        
    Returns
    -------
    float or numpy.ndarray
        Vector norm(s).
        
    Examples
    --------
    >>> vector = [3.0, 4.0, 0.0]
    >>> magnitude = mdamath.norm(vector)  # 5.0
    >>> 
    >>> # Multiple vectors
    >>> vectors = np.random.random((100, 3))
    >>> magnitudes = mdamath.norm(vectors)
    """

def angle(v1, v2):
    """
    Calculate angle between two vectors.
    
    Parameters
    ----------
    v1, v2 : array-like
        Input vectors.
        
    Returns
    -------
    float
        Angle in radians.
        
    Examples
    --------
    >>> v1 = [1, 0, 0]  
    >>> v2 = [0, 1, 0]
    >>> ang = mdamath.angle(v1, v2)  # π/2 radians (90°)
    >>> ang_degrees = np.degrees(ang)  # 90.0°
    """

def dihedral(v1, v2, v3, v4):
    """
    Calculate dihedral angle between four vectors.
    
    Parameters  
    ----------
    v1, v2, v3, v4 : array-like
        Four vectors defining the dihedral angle.
        
    Returns
    -------
    float
        Dihedral angle in radians (-π to π).
        
    Examples
    --------
    >>> # Calculate a backbone dihedral
    >>> phi = mdamath.dihedral(c_prev, n, ca, c)
    >>> phi_degrees = np.degrees(phi)
    """

def normal(v):
    """
    Calculate a vector normal to the input vector.
    
    Parameters
    ----------
    v : array-like
        Input vector.
        
    Returns
    -------
    numpy.ndarray
        Normal vector of same magnitude.
        
    Examples
    --------
    >>> v = [1, 1, 0]
    >>> n = mdamath.normal(v)  # Perpendicular to v
    >>> print(np.dot(v, n))    # Should be ~0
    """

def find_fragments(bonds, natoms):
    """
    Find molecular fragments from bond connectivity.
    
    Parameters
    ----------
    bonds : array-like
        Bond connectivity as array of shape (n_bonds, 2).
    natoms : int
        Total number of atoms.
        
    Returns
    -------
    list
        List of arrays containing atom indices for each fragment.
        
    Examples
    --------
    >>> # Find disconnected molecules
    >>> bond_indices = u.atoms.bonds.indices
    >>> fragments = mdamath.find_fragments(bond_indices, u.atoms.n_atoms)
    >>> print(f"System has {len(fragments)} fragments")
    >>> 
    >>> # Largest fragment
    >>> largest_frag = max(fragments, key=len)
    >>> print(f"Largest fragment: {len(largest_frag)} atoms")
    """

Utility Functions

File and Format Utilities

from MDAnalysis.lib import util

def openany(datasource, mode='rb', reset=True):
    """
    Open compressed or uncompressed files transparently.
    
    Parameters
    ----------
    datasource : str or file-like
        File path or file-like object. Handles .gz, .bz2, .xz compression.
    mode : str, optional  
        File opening mode (default 'rb').
    reset : bool, optional
        Whether to reset file position to beginning (default True).
        
    Returns
    -------
    file-like
        Open file object.
        
    Examples
    --------
    >>> # Automatically handles compressed files
    >>> with util.openany('trajectory.xtc.gz') as f:
    ...     data = f.read()
    >>> 
    >>> # Works with regular files too
    >>> with util.openany('structure.pdb') as f:
    ...     lines = f.readlines()
    """

def anyopen(datasource, mode='r', reset=True):
    """
    Context manager version of openany.
    
    Parameters
    ----------
    datasource : str or file-like
        File path or file-like object.
    mode : str, optional
        File opening mode (default 'r').
    reset : bool, optional
        Whether to reset file position.
        
    Examples
    --------
    >>> # Use as context manager
    >>> with util.anyopen('data.txt.bz2', 'r') as f:
    ...     content = f.read()
    """

def filename(path, ext=None, keep=False):
    """
    Extract filename components.
    
    Parameters
    ----------
    path : str
        File path.
    ext : str, optional
        Force specific extension.
    keep : bool, optional
        Whether to keep extension (default False).
        
    Returns
    -------
    str
        Processed filename.
        
    Examples
    --------
    >>> util.filename('/path/to/file.pdb')           # 'file'
    >>> util.filename('/path/to/file.pdb', keep=True)  # 'file.pdb'  
    >>> util.filename('/path/to/file.pdb', ext='xyz')  # 'file.xyz'
    """

def format_from_filename(filename):
    """
    Guess file format from filename extension.
    
    Parameters
    ----------
    filename : str
        File path.
        
    Returns
    -------
    str or None
        Guessed format string, None if unknown.
        
    Examples
    --------
    >>> util.format_from_filename('traj.xtc')     # 'XTC'
    >>> util.format_from_filename('struct.pdb')   # 'PDB'
    >>> util.format_from_filename('data.unknown') # None
    """

Caching Utilities

from functools import lru_cache

@util.cached
def expensive_calculation(atomgroup, parameter):
    """
    Example of cached function decorator.
    
    The @cached decorator stores results based on function arguments
    to avoid recomputation with same inputs.
    """
    # Expensive calculation here
    result = atomgroup.some_expensive_operation(parameter)
    return result

class Store:
    """
    Simple key-value store for caching analysis results.
    """
    
    def __init__(self):
        self._data = {}
    
    def __setitem__(self, key, value):
        """Store value with key."""
        self._data[key] = value
    
    def __getitem__(self, key):
        """Retrieve value by key.""" 
        return self._data[key]
    
    def __contains__(self, key):
        """Check if key exists."""
        return key in self._data
    
    def get(self, key, default=None):
        """Get value with default fallback."""
        return self._data.get(key, default)
    
    def clear(self):
        """Clear all stored data."""
        self._data.clear()

# Usage example
results_cache = Store()
results_cache['rmsd_protein'] = rmsd_values
results_cache['distances_ca'] = distance_matrix

Progress Tracking

from MDAnalysis.lib.log import ProgressBar

class ProgressBar:
    """
    Progress bar for long-running calculations.
    """
    
    def __init__(self, total, **kwargs):
        """
        Initialize progress bar.
        
        Parameters
        ----------
        total : int
            Total number of iterations.
        **kwargs
            Additional formatting options.
        """
    
    def update(self, n=1):
        """
        Update progress by n steps.
        
        Parameters
        ----------
        n : int, optional
            Number of steps to advance (default 1).
        """
    
    def close(self):
        """Close progress bar."""

# Usage in analysis loops
def analyze_with_progress(universe):
    """Example analysis with progress tracking."""
    n_frames = len(universe.trajectory)
    results = []
    
    with ProgressBar(n_frames) as pbar:
        for ts in universe.trajectory:
            # Perform analysis
            result = calculate_something(universe)
            results.append(result)
            
            # Update progress
            pbar.update()
    
    return results

Advanced Utilities

Coordinate Utilities

def center_of_mass(coordinates, masses):
    """
    Calculate center of mass for coordinates.
    
    Parameters
    ----------
    coordinates : array-like
        Coordinate array of shape (n, 3).
    masses : array-like
        Mass array of shape (n,).
        
    Returns
    -------
    numpy.ndarray
        Center of mass as 3-element array.
        
    Examples
    --------
    >>> coords = protein.positions
    >>> masses = protein.masses
    >>> com = util.center_of_mass(coords, masses)
    """

def radius_of_gyration(coordinates, masses=None):
    """
    Calculate radius of gyration.
    
    Parameters
    ----------
    coordinates : array-like
        Coordinate array of shape (n, 3).
    masses : array-like, optional
        Mass array. If None, uses equal weights.
        
    Returns
    -------
    float
        Radius of gyration.
        
    Examples
    --------
    >>> rgyr = util.radius_of_gyration(protein.positions, protein.masses)
    >>> print(f"Radius of gyration: {rgyr:.2f} Å")
    """

def principal_axes(coordinates, masses=None):
    """
    Calculate principal axes of coordinate distribution.
    
    Parameters
    ----------
    coordinates : array-like
        Coordinate array of shape (n, 3).
    masses : array-like, optional
        Mass array for weighting.
        
    Returns
    -------
    numpy.ndarray
        Principal axes as 3x3 array (rows are axes).
        
    Examples
    --------
    >>> axes = util.principal_axes(protein.positions, protein.masses)
    >>> longest_axis = axes[0]  # Axis with largest eigenvalue
    >>> print(f"Longest axis: {longest_axis}")
    """

Data Processing Utilities

def smooth_data(data, window_size=5, method='moving_average'):
    """
    Smooth time series data.
    
    Parameters
    ----------
    data : array-like
        Input data to smooth.
    window_size : int, optional
        Size of smoothing window (default 5).
    method : str, optional
        Smoothing method ('moving_average', 'gaussian', 'savgol').
        
    Returns
    -------
    numpy.ndarray
        Smoothed data array.
        
    Examples
    --------
    >>> # Smooth RMSD time series
    >>> rmsd_smooth = util.smooth_data(rmsd_values, window_size=10)
    >>> 
    >>> # Gaussian smoothing
    >>> smooth_gaussian = util.smooth_data(data, method='gaussian')
    """

def block_average(data, block_size):
    """
    Calculate block averages for error analysis.
    
    Parameters
    ----------
    data : array-like
        Input time series data.
    block_size : int
        Size of blocks for averaging.
        
    Returns
    -------
    numpy.ndarray
        Array of block averages.
        
    Examples
    --------
    >>> # Block average for statistical analysis
    >>> blocks = util.block_average(energy_data, block_size=100)
    >>> block_error = np.std(blocks) / np.sqrt(len(blocks))
    >>> print(f"Block average error: {block_error:.3f}")
    """

def autocorrelation(data, maxlags=None):
    """
    Calculate autocorrelation function.
    
    Parameters
    ----------
    data : array-like
        Input time series.
    maxlags : int, optional
        Maximum lag time to calculate.
        
    Returns
    -------
    numpy.ndarray
        Autocorrelation function.
        
    Examples
    --------
    >>> # Autocorrelation of coordinate fluctuations
    >>> coord_fluct = coords - np.mean(coords, axis=0)
    >>> autocorr = util.autocorrelation(coord_fluct[:, 0])  # x-component
    >>> 
    >>> # Find correlation time
    >>> corr_time = np.where(autocorr < np.exp(-1))[0][0]
    """

Usage Examples

Unit Conversion Workflow

def convert_simulation_units(universe, output_units=None):
    """
    Convert simulation data to desired units.
    
    Parameters
    ----------
    universe : Universe
        MDAnalysis Universe object.
    output_units : dict, optional
        Desired output units for each quantity type.
        
    Returns
    -------
    dict
        Converted data in specified units.
    """
    if output_units is None:
        output_units = {
            'length': 'nm',
            'time': 'ns', 
            'energy': 'kcal/mol',
            'mass': 'g/mol'
        }
    
    # Get current trajectory properties
    positions = universe.atoms.positions  # Å
    if hasattr(universe.trajectory.ts, 'time'):
        time = universe.trajectory.ts.time  # ps
    else:
        time = 0.0
    
    masses = universe.atoms.masses  # amu
    
    # Convert units
    converted = {}
    
    # Convert positions
    if output_units['length'] != 'Angstrom':
        converted['positions'] = units.convert(
            positions, 'Angstrom', output_units['length']
        )
    else:
        converted['positions'] = positions
    
    # Convert time
    if output_units['time'] != 'ps':
        converted['time'] = units.convert(
            time, 'ps', output_units['time']  
        )
    else:
        converted['time'] = time
    
    # Convert masses
    if output_units['mass'] != 'u':
        converted['masses'] = units.convert(
            masses, 'u', output_units['mass']
        )
    else:
        converted['masses'] = masses
    
    # Convert box dimensions
    if universe.dimensions is not None:
        box_lengths = universe.dimensions[:3]  # Å
        if output_units['length'] != 'Angstrom':
            converted['box_lengths'] = units.convert(
                box_lengths, 'Angstrom', output_units['length']
            )
        else:
            converted['box_lengths'] = box_lengths
    
    # Add unit information
    converted['units'] = output_units.copy()
    
    return converted

# Example usage
converted_data = convert_simulation_units(u, {
    'length': 'nm',
    'time': 'ns',
    'mass': 'kDa'
})

print(f"Positions in {converted_data['units']['length']}")
print(f"Time: {converted_data['time']} {converted_data['units']['time']}")

Performance Optimization Example

def optimize_distance_calculations(coords1, coords2, cutoff=10.0):
    """
    Optimize distance calculations using various strategies.
    """
    import time
    
    n1, n2 = len(coords1), len(coords2)
    print(f"Calculating distances between {n1} and {n2} points")
    
    # Method 1: Full distance matrix (memory intensive)
    if n1 * n2 < 1e6:  # Only for smaller systems
        start = time.time()
        full_distances = distances.distance_array(coords1, coords2)
        close_pairs_full = np.where(full_distances < cutoff)
        time_full = time.time() - start
        print(f"Full matrix method: {time_full:.3f}s")
    
    # Method 2: Neighbor search (memory efficient)
    start = time.time()
    ns = NeighborSearch(coords1)
    close_atoms = []
    for i, coord in enumerate(coords2):
        neighbors = ns.search(coord.reshape(1, 3), cutoff)
        close_atoms.extend([(n, i) for n in neighbors])
    time_ns = time.time() - start
    print(f"Neighbor search method: {time_ns:.3f}s")
    
    # Method 3: Chunked processing (memory controlled)
    start = time.time()
    chunk_size = min(1000, n2)
    close_pairs_chunked = []
    
    for start_idx in range(0, n2, chunk_size):
        end_idx = min(start_idx + chunk_size, n2)
        chunk_coords = coords2[start_idx:end_idx]
        
        chunk_distances = distances.distance_array(coords1, chunk_coords)
        chunk_pairs = np.where(chunk_distances < cutoff)
        
        # Adjust indices for chunking
        adjusted_pairs = (chunk_pairs[0], chunk_pairs[1] + start_idx)
        close_pairs_chunked.append(adjusted_pairs)
    
    time_chunked = time.time() - start
    print(f"Chunked method: {time_chunked:.3f}s")
    
    return {
        'neighbor_search_time': time_ns,
        'chunked_time': time_chunked
    }

# Example usage
protein_coords = u.select_atoms("protein").positions
water_coords = u.select_atoms("resname SOL").positions[:1000]  # Limit for demo

timing_results = optimize_distance_calculations(protein_coords, water_coords)

Custom Unit System

class CustomUnitSystem:
    """
    Custom unit system for specialized applications.
    """
    
    def __init__(self, base_units=None):
        """
        Initialize custom unit system.
        
        Parameters
        ----------
        base_units : dict, optional
            Custom base units. If None, uses MDAnalysis defaults.
        """
        if base_units is None:
            self.base_units = units.MDANALYSIS_BASE_UNITS.copy()
        else:
            self.base_units = base_units.copy()
        
        # Custom conversion factors
        self.custom_conversions = {
            'length': {
                'lattice_units': 5.431,  # Silicon lattice parameter in Å
                'chain_length': 100.0,   # Polymer chain length unit
            },
            'energy': {
                'binding_energy': 25.0,  # Typical binding energy in kJ/mol
                'thermal_300K': 2.494,   # kT at 300K in kJ/mol
            }
        }
    
    def convert_custom(self, value, from_unit, to_unit, quantity_type):
        """
        Convert between custom units.
        
        Parameters
        ----------
        value : float or array-like
            Value to convert.
        from_unit : str
            Source unit.
        to_unit : str
            Target unit.
        quantity_type : str
            Type of physical quantity.
            
        Returns
        -------
        float or array-like
            Converted value.
        """
        # Check if custom conversion is needed
        custom_units = self.custom_conversions.get(quantity_type, {})
        
        if from_unit in custom_units and to_unit in custom_units:
            # Both custom units
            from_factor = custom_units[from_unit]
            to_factor = custom_units[to_unit] 
            return value * (from_factor / to_factor)
        
        elif from_unit in custom_units:
            # From custom to standard
            base_unit = self.base_units[quantity_type]
            from_factor = custom_units[from_unit]
            value_in_base = value * from_factor
            return units.convert(value_in_base, base_unit, to_unit)
        
        elif to_unit in custom_units:
            # From standard to custom
            base_unit = self.base_units[quantity_type]
            value_in_base = units.convert(value, from_unit, base_unit)
            to_factor = custom_units[to_unit]
            return value_in_base / to_factor
        
        else:
            # Standard MDAnalysis conversion
            return units.convert(value, from_unit, to_unit)

# Example usage
custom_units = CustomUnitSystem()

# Convert distance to lattice units
distance_angstrom = 10.8625  # 2 * silicon lattice parameter
distance_lattice = custom_units.convert_custom(
    distance_angstrom, 'Angstrom', 'lattice_units', 'length'
)
print(f"{distance_angstrom} Å = {distance_lattice} lattice units")

# Convert energy to thermal units
energy_kj = 7.482  # 3 * kT at 300K
energy_thermal = custom_units.convert_custom(
    energy_kj, 'kJ/mol', 'thermal_300K', 'energy'
)
print(f"{energy_kj} kJ/mol = {energy_thermal:.1f} thermal units")

Install with Tessl CLI