An object-oriented toolkit to analyze molecular dynamics trajectories.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
MDAnalysis provides a powerful and flexible selection language for identifying atoms, residues, and molecular components. The selection syntax is similar to that used in VMD and PyMOL, making it familiar to molecular visualization users while extending functionality for analysis workflows.
The selection language allows you to:
def select_atoms(universe_or_atomgroup, *args, **kwargs):
"""
Select atoms using the MDAnalysis selection language.
Parameters
----------
universe_or_atomgroup : Universe or AtomGroup
Source for atom selection.
*args : str
Selection string(s) in MDAnalysis selection language.
updating : bool, optional
Whether selection should update when topology changes (default False).
periodic : bool, optional
Whether to account for periodic boundary conditions in
geometric selections (default True).
rtol : float, optional
Relative tolerance for floating point comparisons (default 1e-5).
atol : float, optional
Absolute tolerance for floating point comparisons (default 1e-8).
Returns
-------
AtomGroup
Selected atoms matching the criteria.
Examples
--------
>>> # Basic property selections
>>> ca_atoms = u.select_atoms("name CA")
>>> protein = u.select_atoms("protein")
>>> waters = u.select_atoms("resname SOL TIP3 WAT")
>>> # Geometric selections
>>> near_protein = u.select_atoms("around 5.0 protein")
>>> binding_site = u.select_atoms("resid 23-45 and around 8.0 resname LIG")
>>> # Complex Boolean logic
>>> flexible = u.select_atoms("protein and not backbone and not resname PRO")
"""# Atom name selections
u.select_atoms("name CA") # Alpha carbons
u.select_atoms("name CA CB CG") # Multiple atom names
u.select_atoms("name C*") # Wildcard: all carbon atoms
u.select_atoms("name *A") # Names ending with A
# Atom type selections
u.select_atoms("type CT NH1 OH") # Specific atom types
u.select_atoms("type CT*") # Types starting with CT
# Mass selections
u.select_atoms("mass 12.01") # Specific mass (carbon)
u.select_atoms("mass > 14.0") # Heavier than nitrogen
u.select_atoms("mass 12.0:16.0") # Mass range
# Charge selections
u.select_atoms("charge 0.0") # Neutral atoms
u.select_atoms("charge < -0.5") # Highly negative
u.select_atoms("charge -0.1:0.1") # Nearly neutral range
# Element selections (if available)
u.select_atoms("element C") # Carbon atoms
u.select_atoms("element N O") # Nitrogen or oxygen# Residue name selections
u.select_atoms("resname ALA") # Alanine residues
u.select_atoms("resname ALA GLY PRO") # Multiple residue types
u.select_atoms("resname A*") # Names starting with A
# Residue ID selections
u.select_atoms("resid 1") # Residue 1
u.select_atoms("resid 1 5 10") # Specific residues
u.select_atoms("resid 1-10") # Range of residues
u.select_atoms("resid 1:10:2") # Range with step (1,3,5,7,9)
# Residue number selections (different from resid)
u.select_atoms("resnum 1-100") # Sequential numbering
# Insertion codes (for PDB files)
u.select_atoms("icode A") # Specific insertion code
u.select_atoms("icode ''") # Empty insertion code# Segment ID selections
u.select_atoms("segid PROA") # Specific segment
u.select_atoms("segid PROA PROB") # Multiple segments
u.select_atoms("segid PRO*") # Wildcard matching
# Combining segment and residue selections
u.select_atoms("segid PROA and resid 1-100")
u.select_atoms("segid PROB and resname ALA")# Protein components
u.select_atoms("protein") # All protein atoms
u.select_atoms("backbone") # Protein backbone (N, CA, C, O)
u.select_atoms("sidechain") # Protein sidechains
u.select_atoms("nucleic") # Nucleic acid atoms
u.select_atoms("nucleicacid") # Alias for nucleic
# Water and ions
u.select_atoms("water") # Water molecules
u.select_atoms("ion") # Ion atoms
# Secondary structure (if available)
u.select_atoms("alpha") # Alpha helix
u.select_atoms("beta") # Beta sheet
u.select_atoms("coil") # Random coil
# Atom categories
u.select_atoms("hydrogen") # Hydrogen atoms
u.select_atoms("heavy") # Heavy (non-hydrogen) atoms
u.select_atoms("donor") # Hydrogen bond donors
u.select_atoms("acceptor") # Hydrogen bond acceptors# Amino acid categories
u.select_atoms("acidic") # Acidic residues (ASP, GLU)
u.select_atoms("basic") # Basic residues (LYS, ARG, HIS)
u.select_atoms("polar") # Polar residues
u.select_atoms("nonpolar") # Nonpolar residues
u.select_atoms("charged") # Charged residues (acidic + basic)
u.select_atoms("aromatic") # Aromatic residues (PHE, TYR, TRP)
u.select_atoms("hydrophobic") # Hydrophobic residues
u.select_atoms("small") # Small residues (ALA, GLY, SER)
u.select_atoms("medium") # Medium-sized residues
u.select_atoms("large") # Large residues
# Specific amino acid types
u.select_atoms("aliphatic") # Aliphatic residues
u.select_atoms("sulfur") # Sulfur-containing (CYS, MET)
u.select_atoms("alcoholic") # Alcohol groups (SER, THR, TYR)
u.select_atoms("amide") # Amide groups (ASN, GLN)# Around selections - atoms within distance
u.select_atoms("around 5.0 protein") # Within 5Å of protein
u.select_atoms("around 3.5 resname LIG") # Near ligand
u.select_atoms("around 8.0 (resid 23 and name CA)") # Near specific atom
# Point selections - around coordinates
u.select_atoms("point 10.0 20.0 30.0 5.0") # Within 5Å of point
u.select_atoms("point 0 0 0 10.0") # Near origin
# Spherical layer selections
u.select_atoms("sphlayer 2.0 5.0 protein") # Shell around protein
u.select_atoms("spherical_layer 1.5 4.0 resname LIG") # Full name
# Cylindrical selections
u.select_atoms("cylayer 2.0 5.0 10.0 -5.0 protein") # Cylindrical shell
u.select_atoms("cylinder 5.0 10.0 -5.0 protein") # Inside cylinder
# Isolation selections - isolated atoms/residues
u.select_atoms("isolated") # Atoms with no neighbors
u.select_atoms("isolated 5.0") # No neighbors within 5Å# Slab selections - between parallel planes
u.select_atoms("slab 10.0 20.0 z") # Z-coordinate slab
u.select_atoms("slab -5.0 5.0 x") # X-coordinate slab
# Box selections - rectangular regions
u.select_atoms("prop x > 10.0") # X coordinate > 10
u.select_atoms("prop y < -5.0") # Y coordinate < -5
u.select_atoms("prop z 0.0:10.0") # Z range
# Complex geometric combinations
u.select_atoms("around 8.0 protein and prop z > 0") # Upper half near protein
u.select_atoms("sphlayer 3.0 6.0 protein and not water") # Protein shell, no water# AND operations
u.select_atoms("protein and name CA") # Protein alpha carbons
u.select_atoms("resname ALA and name CB") # Alanine beta carbons
u.select_atoms("resid 1-10 and backbone") # N-terminal backbone
# OR operations
u.select_atoms("resname ALA or resname GLY") # Alanine or glycine
u.select_atoms("name CA or name CB") # Alpha or beta carbons
u.select_atoms("segid PROA or segid PROB") # Multiple segments
# NOT operations
u.select_atoms("protein and not backbone") # Protein sidechains
u.select_atoms("not water and not ion") # Non-solvent atoms
u.select_atoms("around 5.0 protein and not protein") # Solvent around protein
# Complex combinations with parentheses
u.select_atoms("(resname ALA or resname GLY) and name CA")
u.select_atoms("protein and not (backbone or resname PRO)")
u.select_atoms("(around 5.0 resname LIG) and (protein or nucleic)")# Using AtomGroup operations for complex logic
protein = u.select_atoms("protein")
ligand = u.select_atoms("resname LIG")
waters = u.select_atoms("resname SOL")
# Intersection
binding_site_protein = protein & u.select_atoms("around 5.0 resname LIG")
# Union
protein_and_ligand = protein | ligand
# Difference
sidechain = protein - u.select_atoms("backbone")
# Symmetric difference
mobile_region = (protein | ligand) ^ u.select_atoms("around 3.0 (protein or resname LIG)")# Static selections (default) - fixed atom set
static_near = u.select_atoms("around 5.0 resname LIG")
# Dynamic selections - update each frame
dynamic_near = u.select_atoms("around 5.0 resname LIG", updating=True)
# Example of difference
ligand = u.select_atoms("resname LIG")
for ts in u.trajectory:
# Static selection - same atoms throughout
static_atoms = static_near.positions
# Dynamic selection - atoms change as ligand moves
dynamic_atoms = dynamic_near.positions
print(f"Frame {ts.frame}: Static={len(static_near)}, Dynamic={len(dynamic_near)}")# Selections accounting for periodic boundaries
u.select_atoms("around 5.0 protein", periodic=True) # Default behavior
u.select_atoms("around 5.0 protein", periodic=False) # Ignore PBC
# Important for systems where molecules cross box boundaries
wrapped_selection = u.select_atoms("sphlayer 2.0 5.0 protein", periodic=True)# Numerical comparisons
u.select_atoms("mass > 15.0") # Heavy atoms
u.select_atoms("charge < -0.5") # Negative atoms
u.select_atoms("charge >= -0.1 and charge <= 0.1") # Nearly neutral
# Range specifications
u.select_atoms("resid 1:100") # Residue range
u.select_atoms("mass 12.0:16.0") # Mass range
u.select_atoms("prop x -10.0:10.0") # Coordinate range
# Multiple ranges
u.select_atoms("resid 1-50 100-150 200-250") # Multiple residue ranges
u.select_atoms("name CA CB CG CD CE") # Multiple names# Wildcard patterns
u.select_atoms("name C*") # Names starting with C
u.select_atoms("name *A") # Names ending with A
u.select_atoms("name H*") # All hydrogens
u.select_atoms("resname A*") # Residues starting with A
u.select_atoms("segid *A") # Segments ending with A
# Regular expressions (when supported)
u.select_atoms("name ~/^C[0-9]+$/") # C followed by numbers
u.select_atoms("resname ~/^(ALA|GLY|PRO)$/") # Specific residues# Atom index selections
u.select_atoms("index 0") # First atom
u.select_atoms("index 0 10 20 30") # Specific indices
u.select_atoms("index 0-99") # Index range
u.select_atoms("index 0:100:10") # Every 10th atom
# Combining indices with other criteria
u.select_atoms("index 0-999 and protein") # First 1000 protein atoms
u.select_atoms("protein and index 0:1000:2") # Every other protein atom# MDAnalysis allows custom selection keyword definitions
from MDAnalysis.core.selection import SelectionParser
def hydrophobic_residues(group):
"""Custom selection for hydrophobic residues."""
hydrophobic = ['ALA', 'VAL', 'LEU', 'ILE', 'PHE', 'PRO', 'MET', 'TRP']
return group.residues[np.isin(group.residues.resnames, hydrophobic)].atoms
# Register custom selection
SelectionParser.parse_tokens.append(('hydrophobic_residues', hydrophobic_residues))
# Now can use in selections
u.select_atoms("hydrophobic_residues and name CA")def create_selection_library(universe):
"""
Create library of commonly used selections for a system.
"""
selections = {}
# Basic components
selections['protein'] = universe.select_atoms("protein")
selections['backbone'] = universe.select_atoms("backbone")
selections['sidechain'] = universe.select_atoms("protein and not backbone")
selections['water'] = universe.select_atoms("resname SOL WAT TIP*")
selections['ions'] = universe.select_atoms("ion")
# Secondary structure regions (if available)
try:
selections['helix'] = universe.select_atoms("alpha")
selections['sheet'] = universe.select_atoms("beta")
selections['loop'] = universe.select_atoms("coil")
except:
pass
# Functional regions
selections['ca_atoms'] = universe.select_atoms("name CA")
selections['heavy_atoms'] = universe.select_atoms("not name H*")
# Binding site (example - customize for specific system)
if 'LIG' in universe.residues.resnames:
selections['ligand'] = universe.select_atoms("resname LIG")
selections['binding_site'] = universe.select_atoms("around 5.0 resname LIG and protein")
selections['first_shell'] = universe.select_atoms("sphlayer 0.0 5.0 resname LIG")
selections['second_shell'] = universe.select_atoms("sphlayer 5.0 10.0 resname LIG")
return selections
# Create and use selection library
sel_lib = create_selection_library(u)
protein_ca = sel_lib['ca_atoms']
binding_site = sel_lib.get('binding_site', u.select_atoms("name CA")) # Fallbackclass SelectionCache:
"""
Cache frequently used selections for performance.
"""
def __init__(self, universe):
self.universe = universe
self._cache = {}
def get_selection(self, selection_string, **kwargs):
"""
Get selection with caching.
Parameters
----------
selection_string : str
Selection string.
**kwargs
Additional selection arguments.
Returns
-------
AtomGroup
Cached or newly created selection.
"""
# Create cache key from selection string and kwargs
cache_key = (selection_string, tuple(sorted(kwargs.items())))
if cache_key not in self._cache:
self._cache[cache_key] = self.universe.select_atoms(
selection_string, **kwargs
)
return self._cache[cache_key]
def clear_cache(self):
"""Clear selection cache."""
self._cache.clear()
# Usage
cache = SelectionCache(u)
# These will be cached after first call
protein = cache.get_selection("protein")
ca_atoms = cache.get_selection("name CA")
backbone = cache.get_selection("backbone")
# Fast subsequent access
for ts in u.trajectory:
# Fast access to cached selections
protein_com = protein.center_of_mass()
ca_positions = ca_atoms.positionsdef optimize_selection_workflow():
"""
Demonstrate efficient selection strategies.
"""
# Strategy 1: Use broad selections first, then narrow
# Less efficient:
# all_ca = u.select_atoms("name CA")
# protein_ca = all_ca.select_atoms("protein") # Re-selection overhead
# More efficient:
protein_ca = u.select_atoms("protein and name CA") # Single selection
# Strategy 2: Reuse selections when possible
protein = u.select_atoms("protein")
backbone = protein.select_atoms("backbone") # Subset of existing selection
sidechain = protein.select_atoms("not backbone") # Complement
# Strategy 3: Use static selections for unchanging criteria
static_protein = u.select_atoms("protein") # Won't change
# Strategy 4: Use updating selections only when needed
dynamic_contacts = u.select_atoms("around 5.0 resname LIG", updating=True)
# Strategy 5: Batch similar selections
selections = {
'ca': u.select_atoms("name CA"),
'cb': u.select_atoms("name CB"),
'backbone': u.select_atoms("backbone"),
'sidechain': u.select_atoms("protein and not backbone")
}
return selections
# Apply optimization strategies
optimized_sels = optimize_selection_workflow()def analyze_binding_pocket(universe, ligand_resname="LIG"):
"""
Comprehensive binding pocket analysis using selections.
"""
# Define key selections
ligand = universe.select_atoms(f"resname {ligand_resname}")
if len(ligand) == 0:
raise ValueError(f"No ligand found with resname {ligand_resname}")
# Binding pocket definition with multiple criteria
pocket_atoms = universe.select_atoms(
f"(around 5.0 resname {ligand_resname}) and protein and (not backbone)"
)
# Different shells around ligand
first_shell = universe.select_atoms(f"sphlayer 0.0 4.0 resname {ligand_resname}")
second_shell = universe.select_atoms(f"sphlayer 4.0 8.0 resname {ligand_resname}")
# Specific interaction partners
hbond_partners = universe.select_atoms(
f"(around 3.5 resname {ligand_resname}) and (donor or acceptor)"
)
# Hydrophobic contacts
hydrophobic_contacts = universe.select_atoms(
f"(around 4.5 resname {ligand_resname}) and "
f"(resname ALA VAL LEU ILE PHE PRO MET TRP) and "
f"(not backbone)"
)
# Salt bridge partners
salt_bridge_partners = universe.select_atoms(
f"(around 6.0 resname {ligand_resname}) and "
f"(resname LYS ARG ASP GLU) and "
f"(name NZ NH1 NH2 OD1 OD2 OE1 OE2)"
)
# Analysis results
results = {
'ligand_atoms': len(ligand),
'pocket_atoms': len(pocket_atoms),
'first_shell_atoms': len(first_shell),
'second_shell_atoms': len(second_shell),
'hbond_partners': len(hbond_partners),
'hydrophobic_contacts': len(hydrophobic_contacts),
'salt_bridge_partners': len(salt_bridge_partners)
}
# Residue-level analysis
pocket_residues = pocket_atoms.residues
results['pocket_residues'] = len(pocket_residues)
results['pocket_resnames'] = list(set(pocket_residues.resnames))
return results, {
'ligand': ligand,
'pocket': pocket_atoms,
'hbond_partners': hbond_partners,
'hydrophobic': hydrophobic_contacts,
'salt_bridges': salt_bridge_partners
}
# Run binding pocket analysis
results, selections = analyze_binding_pocket(u, "LIG")
print("Binding Pocket Analysis:")
for key, value in results.items():
print(f" {key}: {value}")def track_selection_changes(universe, selection_string, updating=True):
"""
Track how selection membership changes over trajectory.
"""
selection = universe.select_atoms(selection_string, updating=updating)
results = {
'frame': [],
'n_atoms': [],
'atom_indices': [],
'unique_atoms': set()
}
for ts in universe.trajectory:
frame = ts.frame
n_atoms = len(selection)
atom_indices = selection.indices.copy()
results['frame'].append(frame)
results['n_atoms'].append(n_atoms)
results['atom_indices'].append(atom_indices)
results['unique_atoms'].update(atom_indices)
# Summary statistics
results['mean_atoms'] = np.mean(results['n_atoms'])
results['std_atoms'] = np.std(results['n_atoms'])
results['min_atoms'] = np.min(results['n_atoms'])
results['max_atoms'] = np.max(results['n_atoms'])
results['total_unique'] = len(results['unique_atoms'])
return results
# Example: Track water molecules near protein
water_dynamics = track_selection_changes(
u, "resname SOL and around 5.0 protein", updating=True
)
print(f"Water dynamics around protein:")
print(f" Average water molecules: {water_dynamics['mean_atoms']:.1f}")
print(f" Range: {water_dynamics['min_atoms']}-{water_dynamics['max_atoms']}")
print(f" Total unique waters: {water_dynamics['total_unique']}")def validate_selections(universe, selection_dict):
"""
Validate multiple selections and check for issues.
"""
validation_results = {}
for name, selection_string in selection_dict.items():
try:
# Test selection
atoms = universe.select_atoms(selection_string)
# Basic validation
results = {
'valid': True,
'n_atoms': len(atoms),
'n_residues': len(atoms.residues) if len(atoms) > 0 else 0,
'n_segments': len(atoms.segments) if len(atoms) > 0 else 0,
'warnings': []
}
# Check for empty selection
if len(atoms) == 0:
results['warnings'].append("Selection returned no atoms")
# Check for very large selections (might be inefficient)
if len(atoms) > len(universe.atoms) * 0.8:
results['warnings'].append("Selection includes >80% of system")
# Check for geometric selections without proper context
if any(keyword in selection_string.lower()
for keyword in ['around', 'within', 'sphlayer']):
if 'updating' not in selection_string:
results['warnings'].append(
"Geometric selection might benefit from updating=True"
)
validation_results[name] = results
except Exception as e:
validation_results[name] = {
'valid': False,
'error': str(e),
'n_atoms': 0
}
return validation_results
# Example selection validation
test_selections = {
'protein': "protein",
'ca_atoms': "name CA",
'binding_site': "around 5.0 resname LIG and protein",
'invalid': "name INVALID_ATOM",
'water_shell': "sphlayer 5.0 10.0 protein and resname SOL"
}
validation = validate_selections(u, test_selections)
for name, result in validation.items():
if result['valid']:
print(f"{name}: {result['n_atoms']} atoms")
for warning in result.get('warnings', []):
print(f" Warning: {warning}")
else:
print(f"{name}: ERROR - {result['error']}")Install with Tessl CLI
npx tessl i tessl/pypi-mdanalysis