0
# Units and Utilities
1
2
MDAnalysis provides comprehensive utilities for unit conversions, mathematical operations, and helper functions essential for molecular dynamics analysis. The units system ensures consistent handling of physical quantities across different simulation packages and analysis workflows.
3
4
## Units System
5
6
### Base Units in MDAnalysis
7
8
MDAnalysis uses a consistent set of base units for all internal calculations:
9
10
```python { .api }
11
import MDAnalysis.units as units
12
13
# Base units dictionary
14
MDANALYSIS_BASE_UNITS = {
15
'length': 'Angstrom', # Å
16
'time': 'ps', # picosecond
17
'energy': 'kJ/mol', # kilojoule per mole
18
'charge': 'e', # elementary charge
19
'mass': 'u', # atomic mass unit (amu)
20
'force': 'kJ/(mol*Angstrom)', # kJ/(mol·Å)
21
'speed': 'Angstrom/ps', # Å/ps
22
'density': 'u/Angstrom**3' # amu/ų
23
}
24
25
# Access base units
26
print(units.MDANALYSIS_BASE_UNITS['length']) # 'Angstrom'
27
print(units.MDANALYSIS_BASE_UNITS['energy']) # 'kJ/mol'
28
```
29
30
### Unit Conversion Functions
31
32
```python { .api }
33
def get_conversion_factor(unit_type, u1, u2):
34
"""
35
Get conversion factor between two units of the same type.
36
37
Parameters
38
----------
39
unit_type : str
40
Type of physical quantity ('length', 'time', 'energy', etc.).
41
u1 : str
42
Source unit.
43
u2 : str
44
Target unit.
45
46
Returns
47
-------
48
float
49
Conversion factor such that value_u2 = value_u1 * factor.
50
51
Examples
52
--------
53
>>> # Length conversions
54
>>> factor = units.get_conversion_factor('length', 'Angstrom', 'nm')
55
>>> print(factor) # 0.1 (1 Å = 0.1 nm)
56
57
>>> # Energy conversions
58
>>> factor = units.get_conversion_factor('energy', 'kcal/mol', 'kJ/mol')
59
>>> print(factor) # 4.184 (1 kcal/mol = 4.184 kJ/mol)
60
61
>>> # Time conversions
62
>>> factor = units.get_conversion_factor('time', 'ps', 'ns')
63
>>> print(factor) # 0.001 (1 ps = 0.001 ns)
64
"""
65
66
def convert(values, u1, u2):
67
"""
68
Convert values between units of the same physical quantity.
69
70
Parameters
71
----------
72
values : float or array-like
73
Value(s) to convert.
74
u1 : str
75
Source unit.
76
u2 : str
77
Target unit.
78
79
Returns
80
-------
81
float or numpy.ndarray
82
Converted value(s) in target units.
83
84
Examples
85
--------
86
>>> # Convert distances
87
>>> dist_nm = units.convert(10.0, 'Angstrom', 'nm')
88
>>> print(dist_nm) # 1.0 nm
89
90
>>> # Convert array of energies
91
>>> energies_kcal = [1.0, 2.5, 3.8]
92
>>> energies_kj = units.convert(energies_kcal, 'kcal/mol', 'kJ/mol')
93
>>> print(energies_kj) # [4.184, 10.46, 15.8992] kJ/mol
94
95
>>> # Convert temperature to energy (thermal energy)
96
>>> temp_K = 300.0
97
>>> energy_kj = units.convert(temp_K, 'K', 'kJ/mol', 'energy')
98
>>> print(energy_kj) # ~2.494 kJ/mol (kT at 300K)
99
"""
100
```
101
102
### Supported Unit Categories
103
104
#### Length Units
105
106
```python { .api }
107
# Length unit conversions
108
length_units = {
109
'Angstrom': 1.0, # Base unit
110
'nm': 0.1, # nanometer
111
'pm': 100.0, # picometer
112
'fm': 1e5, # femtometer
113
'meter': 1e-10, # meter
114
'cm': 1e-8, # centimeter
115
'mm': 1e-7, # millimeter
116
'inch': 3.937e-9, # inch
117
'ft': 3.281e-10, # foot
118
'Bohr': 1.889726125, # Bohr radius
119
}
120
121
# Examples
122
distances_angstrom = [1.0, 5.0, 10.0, 15.0]
123
distances_nm = units.convert(distances_angstrom, 'Angstrom', 'nm')
124
distances_pm = units.convert(distances_angstrom, 'Angstrom', 'pm')
125
126
print(f"Distances in Å: {distances_angstrom}")
127
print(f"Distances in nm: {distances_nm}")
128
print(f"Distances in pm: {distances_pm}")
129
```
130
131
#### Time Units
132
133
```python { .api }
134
# Time unit conversions
135
time_units = {
136
'ps': 1.0, # Base unit - picosecond
137
'fs': 1000.0, # femtosecond
138
'ns': 0.001, # nanosecond
139
'us': 1e-6, # microsecond
140
'ms': 1e-9, # millisecond
141
's': 1e-12, # second
142
'AKMA': 4.888821e-2, # CHARMM internal time unit
143
}
144
145
# Examples
146
times_ps = [0.1, 1.0, 10.0, 100.0]
147
times_ns = units.convert(times_ps, 'ps', 'ns')
148
times_fs = units.convert(times_ps, 'ps', 'fs')
149
150
print(f"Times in ps: {times_ps}")
151
print(f"Times in ns: {times_ns}")
152
print(f"Times in fs: {times_fs}")
153
```
154
155
#### Energy Units
156
157
```python { .api }
158
# Energy unit conversions
159
energy_units = {
160
'kJ/mol': 1.0, # Base unit
161
'kcal/mol': 1/4.184, # kilocalorie per mole
162
'eV': 96.485332, # electron volt
163
'Hartree': 2625.5, # Hartree (atomic unit)
164
'Ry': 1312.75, # Rydberg
165
'J/mol': 0.001, # joule per mole
166
'K': 1/0.008314462618, # Kelvin (thermal energy kT)
167
'AKMA': 4.184, # CHARMM internal energy
168
}
169
170
# Examples
171
energies_kj = [1.0, 10.0, 100.0]
172
energies_kcal = units.convert(energies_kj, 'kJ/mol', 'kcal/mol')
173
energies_eV = units.convert(energies_kj, 'kJ/mol', 'eV')
174
175
print(f"Energies in kJ/mol: {energies_kj}")
176
print(f"Energies in kcal/mol: {energies_kcal}")
177
print(f"Energies in eV: {energies_eV}")
178
179
# Temperature to thermal energy conversion
180
temperature = 300.0 # K
181
thermal_energy = units.convert(temperature, 'K', 'kJ/mol')
182
print(f"Thermal energy at {temperature}K: {thermal_energy:.3f} kJ/mol")
183
```
184
185
#### Mass and Density Units
186
187
```python { .api }
188
# Mass units
189
mass_units = {
190
'u': 1.0, # Base unit - atomic mass unit (amu)
191
'amu': 1.0, # alias for u
192
'Da': 1.0, # Dalton
193
'kDa': 0.001, # kiloDalton
194
'g/mol': 1.0, # gram per mole (numerically same as amu)
195
'kg': 1.66054e-27, # kilogram
196
'g': 1.66054e-24, # gram
197
}
198
199
# Density units
200
density_units = {
201
'u/Angstrom**3': 1.0, # Base unit
202
'g/cm**3': 1.66054, # gram per cubic centimeter
203
'kg/m**3': 1660.54, # kilogram per cubic meter
204
'g/ml': 1.66054, # gram per milliliter
205
}
206
207
# Examples
208
masses_amu = [12.01, 14.007, 15.999] # C, N, O
209
masses_g = units.convert(masses_amu, 'u', 'g')
210
masses_kg = units.convert(masses_amu, 'u', 'kg')
211
212
print(f"Masses in amu: {masses_amu}")
213
print(f"Masses in g: {masses_g}")
214
print(f"Masses in kg: {masses_kg}")
215
```
216
217
## Mathematical Utilities
218
219
### Distance Calculations
220
221
```python { .api }
222
from MDAnalysis.lib import distances
223
224
def distance_array(reference, configuration, box=None, result=None, backend="serial"):
225
"""
226
Calculate distance matrix between two coordinate sets.
227
228
Parameters
229
----------
230
reference : array-like
231
Reference coordinates of shape (n, 3).
232
configuration : array-like
233
Configuration coordinates of shape (m, 3).
234
box : array-like, optional
235
Unit cell dimensions [a, b, c, alpha, beta, gamma] for
236
periodic boundary condition handling.
237
result : numpy.ndarray, optional
238
Pre-allocated result array of shape (n, m).
239
backend : {'serial', 'OpenMP'}, optional
240
Computational backend (default 'serial').
241
242
Returns
243
-------
244
numpy.ndarray
245
Distance array of shape (n, m) where element [i,j] is the
246
distance between reference[i] and configuration[j].
247
248
Examples
249
--------
250
>>> import numpy as np
251
>>> coords1 = np.random.random((100, 3)) * 10
252
>>> coords2 = np.random.random((50, 3)) * 10
253
>>>
254
>>> # Calculate all pairwise distances
255
>>> dist_matrix = distances.distance_array(coords1, coords2)
256
>>> print(dist_matrix.shape) # (100, 50)
257
>>>
258
>>> # With periodic boundaries
259
>>> box = [20.0, 20.0, 20.0, 90.0, 90.0, 90.0]
260
>>> dist_matrix_pbc = distances.distance_array(coords1, coords2, box=box)
261
"""
262
263
def self_distance_array(reference, box=None, result=None, backend="serial"):
264
"""
265
Calculate self-distance matrix (all pairwise distances within one set).
266
267
Parameters
268
----------
269
reference : array-like
270
Coordinates of shape (n, 3).
271
box : array-like, optional
272
Unit cell dimensions for PBC.
273
result : numpy.ndarray, optional
274
Pre-allocated result array of shape (n, n).
275
backend : {'serial', 'OpenMP'}, optional
276
Computational backend.
277
278
Returns
279
-------
280
numpy.ndarray
281
Symmetric distance matrix of shape (n, n).
282
283
Examples
284
--------
285
>>> coords = protein.select_atoms("name CA").positions
286
>>> ca_distances = distances.self_distance_array(coords)
287
>>>
288
>>> # Distance between CA atoms 0 and 10
289
>>> dist_0_10 = ca_distances[0, 10]
290
>>>
291
>>> # Find closest CA atom to atom 0 (excluding itself)
292
>>> distances_from_0 = ca_distances[0].copy()
293
>>> distances_from_0[0] = np.inf # Exclude self
294
>>> closest_idx = np.argmin(distances_from_0)
295
>>> closest_distance = distances_from_0[closest_idx]
296
"""
297
298
def calc_bonds(coordinates1, coordinates2, box=None, result=None, backend="serial"):
299
"""
300
Calculate distances between pairs of coordinates (bonds).
301
302
Parameters
303
----------
304
coordinates1 : array-like
305
First set of coordinates, shape (n, 3).
306
coordinates2 : array-like
307
Second set of coordinates, shape (n, 3).
308
box : array-like, optional
309
Unit cell dimensions.
310
result : numpy.ndarray, optional
311
Pre-allocated result array of shape (n,).
312
backend : {'serial', 'OpenMP'}, optional
313
Computational backend.
314
315
Returns
316
-------
317
numpy.ndarray
318
Array of distances between coordinate pairs.
319
320
Examples
321
--------
322
>>> # Calculate bond lengths
323
>>> bonds = u.atoms.bonds
324
>>> coord1 = u.atoms[bonds.indices[:, 0]].positions
325
>>> coord2 = u.atoms[bonds.indices[:, 1]].positions
326
>>> bond_lengths = distances.calc_bonds(coord1, coord2, box=u.dimensions)
327
>>>
328
>>> print(f"Average bond length: {np.mean(bond_lengths):.3f} Å")
329
>>> print(f"Bond length range: {np.min(bond_lengths):.3f} - {np.max(bond_lengths):.3f} Å")
330
"""
331
332
def calc_angles(coordinates1, coordinates2, coordinates3, box=None, result=None):
333
"""
334
Calculate angles between three coordinate sets.
335
336
Parameters
337
----------
338
coordinates1 : array-like
339
First coordinates (angle vertex), shape (n, 3).
340
coordinates2 : array-like
341
Second coordinates (angle center), shape (n, 3).
342
coordinates3 : array-like
343
Third coordinates (angle vertex), shape (n, 3).
344
box : array-like, optional
345
Unit cell dimensions.
346
result : numpy.ndarray, optional
347
Pre-allocated result array.
348
349
Returns
350
-------
351
numpy.ndarray
352
Array of angles in radians.
353
354
Examples
355
--------
356
>>> # Calculate backbone angles
357
>>> angles = u.atoms.angles
358
>>> coord1 = u.atoms[angles.indices[:, 0]].positions
359
>>> coord2 = u.atoms[angles.indices[:, 1]].positions
360
>>> coord3 = u.atoms[angles.indices[:, 2]].positions
361
>>> angle_values = distances.calc_angles(coord1, coord2, coord3)
362
>>> angle_degrees = np.degrees(angle_values)
363
>>>
364
>>> print(f"Average angle: {np.mean(angle_degrees):.1f}°")
365
"""
366
367
def calc_dihedrals(coord1, coord2, coord3, coord4, box=None, result=None):
368
"""
369
Calculate dihedral angles between four coordinate sets.
370
371
Parameters
372
----------
373
coord1, coord2, coord3, coord4 : array-like
374
Coordinates defining dihedral angles, each shape (n, 3).
375
box : array-like, optional
376
Unit cell dimensions.
377
result : numpy.ndarray, optional
378
Pre-allocated result array.
379
380
Returns
381
-------
382
numpy.ndarray
383
Array of dihedral angles in radians (-π to π).
384
385
Examples
386
--------
387
>>> # Calculate backbone phi angles
388
>>> dihedrals = u.atoms.dihedrals
389
>>> c1 = u.atoms[dihedrals.indices[:, 0]].positions
390
>>> c2 = u.atoms[dihedrals.indices[:, 1]].positions
391
>>> c3 = u.atoms[dihedrals.indices[:, 2]].positions
392
>>> c4 = u.atoms[dihedrals.indices[:, 3]].positions
393
>>> dihedral_values = distances.calc_dihedrals(c1, c2, c3, c4)
394
>>> dihedral_degrees = np.degrees(dihedral_values)
395
"""
396
```
397
398
### Neighbor Searching
399
400
```python { .api }
401
from MDAnalysis.lib import NeighborSearch
402
403
class NeighborSearch:
404
"""
405
Fast neighbor searching using spatial data structures.
406
407
Uses KDTree-based algorithms for efficient distance-based queries.
408
"""
409
410
def __init__(self, atom_list, box=None, bucket_size=10):
411
"""
412
Initialize neighbor search structure.
413
414
Parameters
415
----------
416
atom_list : AtomGroup or array-like
417
Atoms to include in search tree or coordinate array.
418
box : array-like, optional
419
Unit cell dimensions for periodic boundary conditions.
420
bucket_size : int, optional
421
Bucket size for tree construction (default 10).
422
423
Examples
424
--------
425
>>> # Search in protein atoms
426
>>> protein = u.select_atoms("protein")
427
>>> ns = NeighborSearch(protein, box=u.dimensions)
428
429
>>> # Search in coordinate array
430
>>> coords = protein.positions
431
>>> ns = NeighborSearch(coords, box=u.dimensions)
432
"""
433
434
def search(self, atoms, radius, level='A'):
435
"""
436
Find neighbors within radius.
437
438
Parameters
439
----------
440
atoms : AtomGroup or array-like
441
Query atoms or coordinates.
442
radius : float
443
Search radius in Angstrom.
444
level : {'A', 'R', 'S'}, optional
445
Return level: 'A' for atoms, 'R' for residues, 'S' for segments.
446
447
Returns
448
-------
449
AtomGroup
450
Atoms within radius of query atoms.
451
452
Examples
453
--------
454
>>> # Find atoms within 5Å of ligand
455
>>> ligand = u.select_atoms("resname LIG")
456
>>> nearby = ns.search(ligand, 5.0)
457
>>>
458
>>> # Find residues within 8Å
459
>>> nearby_residues = ns.search(ligand, 8.0, level='R')
460
"""
461
462
class AtomNeighborSearch(NeighborSearch):
463
"""Specialized neighbor search for AtomGroups with additional features."""
464
465
def search(self, atoms, radius, level='A'):
466
"""Enhanced search with AtomGroup-specific features."""
467
468
def search_pairs(self, radius):
469
"""
470
Find all atom pairs within radius.
471
472
Parameters
473
----------
474
radius : float
475
Distance cutoff for pair identification.
476
477
Returns
478
-------
479
numpy.ndarray
480
Array of shape (n_pairs, 2) containing atom index pairs.
481
482
Examples
483
--------
484
>>> # Find all close contacts
485
>>> pairs = ns.search_pairs(3.0)
486
>>> print(f"Found {len(pairs)} contacts within 3.0 Å")
487
"""
488
```
489
490
### Mathematical Functions
491
492
```python { .api }
493
from MDAnalysis.lib import mdamath
494
495
def norm(v):
496
"""
497
Calculate vector norm (magnitude).
498
499
Parameters
500
----------
501
v : array-like
502
Input vector or array of vectors.
503
504
Returns
505
-------
506
float or numpy.ndarray
507
Vector norm(s).
508
509
Examples
510
--------
511
>>> vector = [3.0, 4.0, 0.0]
512
>>> magnitude = mdamath.norm(vector) # 5.0
513
>>>
514
>>> # Multiple vectors
515
>>> vectors = np.random.random((100, 3))
516
>>> magnitudes = mdamath.norm(vectors)
517
"""
518
519
def angle(v1, v2):
520
"""
521
Calculate angle between two vectors.
522
523
Parameters
524
----------
525
v1, v2 : array-like
526
Input vectors.
527
528
Returns
529
-------
530
float
531
Angle in radians.
532
533
Examples
534
--------
535
>>> v1 = [1, 0, 0]
536
>>> v2 = [0, 1, 0]
537
>>> ang = mdamath.angle(v1, v2) # π/2 radians (90°)
538
>>> ang_degrees = np.degrees(ang) # 90.0°
539
"""
540
541
def dihedral(v1, v2, v3, v4):
542
"""
543
Calculate dihedral angle between four vectors.
544
545
Parameters
546
----------
547
v1, v2, v3, v4 : array-like
548
Four vectors defining the dihedral angle.
549
550
Returns
551
-------
552
float
553
Dihedral angle in radians (-π to π).
554
555
Examples
556
--------
557
>>> # Calculate a backbone dihedral
558
>>> phi = mdamath.dihedral(c_prev, n, ca, c)
559
>>> phi_degrees = np.degrees(phi)
560
"""
561
562
def normal(v):
563
"""
564
Calculate a vector normal to the input vector.
565
566
Parameters
567
----------
568
v : array-like
569
Input vector.
570
571
Returns
572
-------
573
numpy.ndarray
574
Normal vector of same magnitude.
575
576
Examples
577
--------
578
>>> v = [1, 1, 0]
579
>>> n = mdamath.normal(v) # Perpendicular to v
580
>>> print(np.dot(v, n)) # Should be ~0
581
"""
582
583
def find_fragments(bonds, natoms):
584
"""
585
Find molecular fragments from bond connectivity.
586
587
Parameters
588
----------
589
bonds : array-like
590
Bond connectivity as array of shape (n_bonds, 2).
591
natoms : int
592
Total number of atoms.
593
594
Returns
595
-------
596
list
597
List of arrays containing atom indices for each fragment.
598
599
Examples
600
--------
601
>>> # Find disconnected molecules
602
>>> bond_indices = u.atoms.bonds.indices
603
>>> fragments = mdamath.find_fragments(bond_indices, u.atoms.n_atoms)
604
>>> print(f"System has {len(fragments)} fragments")
605
>>>
606
>>> # Largest fragment
607
>>> largest_frag = max(fragments, key=len)
608
>>> print(f"Largest fragment: {len(largest_frag)} atoms")
609
"""
610
```
611
612
## Utility Functions
613
614
### File and Format Utilities
615
616
```python { .api }
617
from MDAnalysis.lib import util
618
619
def openany(datasource, mode='rb', reset=True):
620
"""
621
Open compressed or uncompressed files transparently.
622
623
Parameters
624
----------
625
datasource : str or file-like
626
File path or file-like object. Handles .gz, .bz2, .xz compression.
627
mode : str, optional
628
File opening mode (default 'rb').
629
reset : bool, optional
630
Whether to reset file position to beginning (default True).
631
632
Returns
633
-------
634
file-like
635
Open file object.
636
637
Examples
638
--------
639
>>> # Automatically handles compressed files
640
>>> with util.openany('trajectory.xtc.gz') as f:
641
... data = f.read()
642
>>>
643
>>> # Works with regular files too
644
>>> with util.openany('structure.pdb') as f:
645
... lines = f.readlines()
646
"""
647
648
def anyopen(datasource, mode='r', reset=True):
649
"""
650
Context manager version of openany.
651
652
Parameters
653
----------
654
datasource : str or file-like
655
File path or file-like object.
656
mode : str, optional
657
File opening mode (default 'r').
658
reset : bool, optional
659
Whether to reset file position.
660
661
Examples
662
--------
663
>>> # Use as context manager
664
>>> with util.anyopen('data.txt.bz2', 'r') as f:
665
... content = f.read()
666
"""
667
668
def filename(path, ext=None, keep=False):
669
"""
670
Extract filename components.
671
672
Parameters
673
----------
674
path : str
675
File path.
676
ext : str, optional
677
Force specific extension.
678
keep : bool, optional
679
Whether to keep extension (default False).
680
681
Returns
682
-------
683
str
684
Processed filename.
685
686
Examples
687
--------
688
>>> util.filename('/path/to/file.pdb') # 'file'
689
>>> util.filename('/path/to/file.pdb', keep=True) # 'file.pdb'
690
>>> util.filename('/path/to/file.pdb', ext='xyz') # 'file.xyz'
691
"""
692
693
def format_from_filename(filename):
694
"""
695
Guess file format from filename extension.
696
697
Parameters
698
----------
699
filename : str
700
File path.
701
702
Returns
703
-------
704
str or None
705
Guessed format string, None if unknown.
706
707
Examples
708
--------
709
>>> util.format_from_filename('traj.xtc') # 'XTC'
710
>>> util.format_from_filename('struct.pdb') # 'PDB'
711
>>> util.format_from_filename('data.unknown') # None
712
"""
713
```
714
715
### Caching Utilities
716
717
```python { .api }
718
from functools import lru_cache
719
720
@util.cached
721
def expensive_calculation(atomgroup, parameter):
722
"""
723
Example of cached function decorator.
724
725
The @cached decorator stores results based on function arguments
726
to avoid recomputation with same inputs.
727
"""
728
# Expensive calculation here
729
result = atomgroup.some_expensive_operation(parameter)
730
return result
731
732
class Store:
733
"""
734
Simple key-value store for caching analysis results.
735
"""
736
737
def __init__(self):
738
self._data = {}
739
740
def __setitem__(self, key, value):
741
"""Store value with key."""
742
self._data[key] = value
743
744
def __getitem__(self, key):
745
"""Retrieve value by key."""
746
return self._data[key]
747
748
def __contains__(self, key):
749
"""Check if key exists."""
750
return key in self._data
751
752
def get(self, key, default=None):
753
"""Get value with default fallback."""
754
return self._data.get(key, default)
755
756
def clear(self):
757
"""Clear all stored data."""
758
self._data.clear()
759
760
# Usage example
761
results_cache = Store()
762
results_cache['rmsd_protein'] = rmsd_values
763
results_cache['distances_ca'] = distance_matrix
764
```
765
766
### Progress Tracking
767
768
```python { .api }
769
from MDAnalysis.lib.log import ProgressBar
770
771
class ProgressBar:
772
"""
773
Progress bar for long-running calculations.
774
"""
775
776
def __init__(self, total, **kwargs):
777
"""
778
Initialize progress bar.
779
780
Parameters
781
----------
782
total : int
783
Total number of iterations.
784
**kwargs
785
Additional formatting options.
786
"""
787
788
def update(self, n=1):
789
"""
790
Update progress by n steps.
791
792
Parameters
793
----------
794
n : int, optional
795
Number of steps to advance (default 1).
796
"""
797
798
def close(self):
799
"""Close progress bar."""
800
801
# Usage in analysis loops
802
def analyze_with_progress(universe):
803
"""Example analysis with progress tracking."""
804
n_frames = len(universe.trajectory)
805
results = []
806
807
with ProgressBar(n_frames) as pbar:
808
for ts in universe.trajectory:
809
# Perform analysis
810
result = calculate_something(universe)
811
results.append(result)
812
813
# Update progress
814
pbar.update()
815
816
return results
817
```
818
819
## Advanced Utilities
820
821
### Coordinate Utilities
822
823
```python { .api }
824
def center_of_mass(coordinates, masses):
825
"""
826
Calculate center of mass for coordinates.
827
828
Parameters
829
----------
830
coordinates : array-like
831
Coordinate array of shape (n, 3).
832
masses : array-like
833
Mass array of shape (n,).
834
835
Returns
836
-------
837
numpy.ndarray
838
Center of mass as 3-element array.
839
840
Examples
841
--------
842
>>> coords = protein.positions
843
>>> masses = protein.masses
844
>>> com = util.center_of_mass(coords, masses)
845
"""
846
847
def radius_of_gyration(coordinates, masses=None):
848
"""
849
Calculate radius of gyration.
850
851
Parameters
852
----------
853
coordinates : array-like
854
Coordinate array of shape (n, 3).
855
masses : array-like, optional
856
Mass array. If None, uses equal weights.
857
858
Returns
859
-------
860
float
861
Radius of gyration.
862
863
Examples
864
--------
865
>>> rgyr = util.radius_of_gyration(protein.positions, protein.masses)
866
>>> print(f"Radius of gyration: {rgyr:.2f} Å")
867
"""
868
869
def principal_axes(coordinates, masses=None):
870
"""
871
Calculate principal axes of coordinate distribution.
872
873
Parameters
874
----------
875
coordinates : array-like
876
Coordinate array of shape (n, 3).
877
masses : array-like, optional
878
Mass array for weighting.
879
880
Returns
881
-------
882
numpy.ndarray
883
Principal axes as 3x3 array (rows are axes).
884
885
Examples
886
--------
887
>>> axes = util.principal_axes(protein.positions, protein.masses)
888
>>> longest_axis = axes[0] # Axis with largest eigenvalue
889
>>> print(f"Longest axis: {longest_axis}")
890
"""
891
```
892
893
### Data Processing Utilities
894
895
```python { .api }
896
def smooth_data(data, window_size=5, method='moving_average'):
897
"""
898
Smooth time series data.
899
900
Parameters
901
----------
902
data : array-like
903
Input data to smooth.
904
window_size : int, optional
905
Size of smoothing window (default 5).
906
method : str, optional
907
Smoothing method ('moving_average', 'gaussian', 'savgol').
908
909
Returns
910
-------
911
numpy.ndarray
912
Smoothed data array.
913
914
Examples
915
--------
916
>>> # Smooth RMSD time series
917
>>> rmsd_smooth = util.smooth_data(rmsd_values, window_size=10)
918
>>>
919
>>> # Gaussian smoothing
920
>>> smooth_gaussian = util.smooth_data(data, method='gaussian')
921
"""
922
923
def block_average(data, block_size):
924
"""
925
Calculate block averages for error analysis.
926
927
Parameters
928
----------
929
data : array-like
930
Input time series data.
931
block_size : int
932
Size of blocks for averaging.
933
934
Returns
935
-------
936
numpy.ndarray
937
Array of block averages.
938
939
Examples
940
--------
941
>>> # Block average for statistical analysis
942
>>> blocks = util.block_average(energy_data, block_size=100)
943
>>> block_error = np.std(blocks) / np.sqrt(len(blocks))
944
>>> print(f"Block average error: {block_error:.3f}")
945
"""
946
947
def autocorrelation(data, maxlags=None):
948
"""
949
Calculate autocorrelation function.
950
951
Parameters
952
----------
953
data : array-like
954
Input time series.
955
maxlags : int, optional
956
Maximum lag time to calculate.
957
958
Returns
959
-------
960
numpy.ndarray
961
Autocorrelation function.
962
963
Examples
964
--------
965
>>> # Autocorrelation of coordinate fluctuations
966
>>> coord_fluct = coords - np.mean(coords, axis=0)
967
>>> autocorr = util.autocorrelation(coord_fluct[:, 0]) # x-component
968
>>>
969
>>> # Find correlation time
970
>>> corr_time = np.where(autocorr < np.exp(-1))[0][0]
971
"""
972
```
973
974
## Usage Examples
975
976
### Unit Conversion Workflow
977
978
```python { .api }
979
def convert_simulation_units(universe, output_units=None):
980
"""
981
Convert simulation data to desired units.
982
983
Parameters
984
----------
985
universe : Universe
986
MDAnalysis Universe object.
987
output_units : dict, optional
988
Desired output units for each quantity type.
989
990
Returns
991
-------
992
dict
993
Converted data in specified units.
994
"""
995
if output_units is None:
996
output_units = {
997
'length': 'nm',
998
'time': 'ns',
999
'energy': 'kcal/mol',
1000
'mass': 'g/mol'
1001
}
1002
1003
# Get current trajectory properties
1004
positions = universe.atoms.positions # Å
1005
if hasattr(universe.trajectory.ts, 'time'):
1006
time = universe.trajectory.ts.time # ps
1007
else:
1008
time = 0.0
1009
1010
masses = universe.atoms.masses # amu
1011
1012
# Convert units
1013
converted = {}
1014
1015
# Convert positions
1016
if output_units['length'] != 'Angstrom':
1017
converted['positions'] = units.convert(
1018
positions, 'Angstrom', output_units['length']
1019
)
1020
else:
1021
converted['positions'] = positions
1022
1023
# Convert time
1024
if output_units['time'] != 'ps':
1025
converted['time'] = units.convert(
1026
time, 'ps', output_units['time']
1027
)
1028
else:
1029
converted['time'] = time
1030
1031
# Convert masses
1032
if output_units['mass'] != 'u':
1033
converted['masses'] = units.convert(
1034
masses, 'u', output_units['mass']
1035
)
1036
else:
1037
converted['masses'] = masses
1038
1039
# Convert box dimensions
1040
if universe.dimensions is not None:
1041
box_lengths = universe.dimensions[:3] # Å
1042
if output_units['length'] != 'Angstrom':
1043
converted['box_lengths'] = units.convert(
1044
box_lengths, 'Angstrom', output_units['length']
1045
)
1046
else:
1047
converted['box_lengths'] = box_lengths
1048
1049
# Add unit information
1050
converted['units'] = output_units.copy()
1051
1052
return converted
1053
1054
# Example usage
1055
converted_data = convert_simulation_units(u, {
1056
'length': 'nm',
1057
'time': 'ns',
1058
'mass': 'kDa'
1059
})
1060
1061
print(f"Positions in {converted_data['units']['length']}")
1062
print(f"Time: {converted_data['time']} {converted_data['units']['time']}")
1063
```
1064
1065
### Performance Optimization Example
1066
1067
```python { .api }
1068
def optimize_distance_calculations(coords1, coords2, cutoff=10.0):
1069
"""
1070
Optimize distance calculations using various strategies.
1071
"""
1072
import time
1073
1074
n1, n2 = len(coords1), len(coords2)
1075
print(f"Calculating distances between {n1} and {n2} points")
1076
1077
# Method 1: Full distance matrix (memory intensive)
1078
if n1 * n2 < 1e6: # Only for smaller systems
1079
start = time.time()
1080
full_distances = distances.distance_array(coords1, coords2)
1081
close_pairs_full = np.where(full_distances < cutoff)
1082
time_full = time.time() - start
1083
print(f"Full matrix method: {time_full:.3f}s")
1084
1085
# Method 2: Neighbor search (memory efficient)
1086
start = time.time()
1087
ns = NeighborSearch(coords1)
1088
close_atoms = []
1089
for i, coord in enumerate(coords2):
1090
neighbors = ns.search(coord.reshape(1, 3), cutoff)
1091
close_atoms.extend([(n, i) for n in neighbors])
1092
time_ns = time.time() - start
1093
print(f"Neighbor search method: {time_ns:.3f}s")
1094
1095
# Method 3: Chunked processing (memory controlled)
1096
start = time.time()
1097
chunk_size = min(1000, n2)
1098
close_pairs_chunked = []
1099
1100
for start_idx in range(0, n2, chunk_size):
1101
end_idx = min(start_idx + chunk_size, n2)
1102
chunk_coords = coords2[start_idx:end_idx]
1103
1104
chunk_distances = distances.distance_array(coords1, chunk_coords)
1105
chunk_pairs = np.where(chunk_distances < cutoff)
1106
1107
# Adjust indices for chunking
1108
adjusted_pairs = (chunk_pairs[0], chunk_pairs[1] + start_idx)
1109
close_pairs_chunked.append(adjusted_pairs)
1110
1111
time_chunked = time.time() - start
1112
print(f"Chunked method: {time_chunked:.3f}s")
1113
1114
return {
1115
'neighbor_search_time': time_ns,
1116
'chunked_time': time_chunked
1117
}
1118
1119
# Example usage
1120
protein_coords = u.select_atoms("protein").positions
1121
water_coords = u.select_atoms("resname SOL").positions[:1000] # Limit for demo
1122
1123
timing_results = optimize_distance_calculations(protein_coords, water_coords)
1124
```
1125
1126
### Custom Unit System
1127
1128
```python { .api }
1129
class CustomUnitSystem:
1130
"""
1131
Custom unit system for specialized applications.
1132
"""
1133
1134
def __init__(self, base_units=None):
1135
"""
1136
Initialize custom unit system.
1137
1138
Parameters
1139
----------
1140
base_units : dict, optional
1141
Custom base units. If None, uses MDAnalysis defaults.
1142
"""
1143
if base_units is None:
1144
self.base_units = units.MDANALYSIS_BASE_UNITS.copy()
1145
else:
1146
self.base_units = base_units.copy()
1147
1148
# Custom conversion factors
1149
self.custom_conversions = {
1150
'length': {
1151
'lattice_units': 5.431, # Silicon lattice parameter in Å
1152
'chain_length': 100.0, # Polymer chain length unit
1153
},
1154
'energy': {
1155
'binding_energy': 25.0, # Typical binding energy in kJ/mol
1156
'thermal_300K': 2.494, # kT at 300K in kJ/mol
1157
}
1158
}
1159
1160
def convert_custom(self, value, from_unit, to_unit, quantity_type):
1161
"""
1162
Convert between custom units.
1163
1164
Parameters
1165
----------
1166
value : float or array-like
1167
Value to convert.
1168
from_unit : str
1169
Source unit.
1170
to_unit : str
1171
Target unit.
1172
quantity_type : str
1173
Type of physical quantity.
1174
1175
Returns
1176
-------
1177
float or array-like
1178
Converted value.
1179
"""
1180
# Check if custom conversion is needed
1181
custom_units = self.custom_conversions.get(quantity_type, {})
1182
1183
if from_unit in custom_units and to_unit in custom_units:
1184
# Both custom units
1185
from_factor = custom_units[from_unit]
1186
to_factor = custom_units[to_unit]
1187
return value * (from_factor / to_factor)
1188
1189
elif from_unit in custom_units:
1190
# From custom to standard
1191
base_unit = self.base_units[quantity_type]
1192
from_factor = custom_units[from_unit]
1193
value_in_base = value * from_factor
1194
return units.convert(value_in_base, base_unit, to_unit)
1195
1196
elif to_unit in custom_units:
1197
# From standard to custom
1198
base_unit = self.base_units[quantity_type]
1199
value_in_base = units.convert(value, from_unit, base_unit)
1200
to_factor = custom_units[to_unit]
1201
return value_in_base / to_factor
1202
1203
else:
1204
# Standard MDAnalysis conversion
1205
return units.convert(value, from_unit, to_unit)
1206
1207
# Example usage
1208
custom_units = CustomUnitSystem()
1209
1210
# Convert distance to lattice units
1211
distance_angstrom = 10.8625 # 2 * silicon lattice parameter
1212
distance_lattice = custom_units.convert_custom(
1213
distance_angstrom, 'Angstrom', 'lattice_units', 'length'
1214
)
1215
print(f"{distance_angstrom} Å = {distance_lattice} lattice units")
1216
1217
# Convert energy to thermal units
1218
energy_kj = 7.482 # 3 * kT at 300K
1219
energy_thermal = custom_units.convert_custom(
1220
energy_kj, 'kJ/mol', 'thermal_300K', 'energy'
1221
)
1222
print(f"{energy_kj} kJ/mol = {energy_thermal:.1f} thermal units")
1223
```