Manipulating data formats of DeePMD-kit, VASP, QE, PWmat, and LAMMPS, etc.
—
Statistical analysis tools, unit conversions, geometry utilities, and integration with ML prediction and optimization frameworks. These tools enable comprehensive analysis and manipulation of atomistic data for scientific computing applications.
Statistical functions for comparing systems and analyzing errors in computational data. Useful for validating ML models and comparing different calculation methods.
def mae(errors) -> float:
"""
Calculate mean absolute error.
Parameters:
- errors: array-like, error values
Returns:
float: mean absolute error
"""
def rmse(errors) -> float:
"""
Calculate root mean squared error.
Parameters:
- errors: array-like, error values
Returns:
float: root mean squared error
"""
class ErrorsBase:
"""Base class for error calculations between systems."""
def __init__(self, system1, system2):
"""
Initialize error calculator.
Parameters:
- system1: LabeledSystem, reference system
- system2: LabeledSystem, comparison system
"""
@property
def e_errors(self) -> np.ndarray:
"""Energy errors array."""
@property
def f_errors(self) -> np.ndarray:
"""Force errors array."""
@property
def e_mae(self) -> float:
"""Energy mean absolute error."""
@property
def e_rmse(self) -> float:
"""Energy root mean squared error."""
@property
def f_mae(self) -> float:
"""Force mean absolute error."""
@property
def f_rmse(self) -> float:
"""Force root mean squared error."""
class Errors(ErrorsBase):
"""Error calculator for LabeledSystem objects."""
class MultiErrors(ErrorsBase):
"""Error calculator for MultiSystems objects."""Physical unit conversion utilities for energy, length, force, and pressure. Enables consistent unit handling across different software packages and calculation methods.
class EnergyConversion:
"""Energy unit conversion between different systems."""
def __init__(self, unitA: str, unitB: str):
"""
Initialize energy conversion.
Parameters:
- unitA: str, source unit ('eV', 'hartree', 'kcal_mol', 'kJ_mol')
- unitB: str, target unit
"""
def __call__(self, value: float) -> float:
"""Convert energy value from unitA to unitB.
Parameters:
- value: float, energy value in unitA
Returns:
float: energy value in unitB
"""
def value(self) -> float:
"""Get conversion factor from unitA to unitB."""
class LengthConversion:
"""Length unit conversion between different systems."""
def __init__(self, unitA: str, unitB: str):
"""
Initialize length conversion.
Parameters:
- unitA: str, source unit ('angstrom', 'bohr', 'nm', 'm')
- unitB: str, target unit
"""
def __call__(self, value: float) -> float:
"""Convert length value from unitA to unitB.
Parameters:
- value: float, length value in unitA
Returns:
float: length value in unitB
"""
def value(self) -> float:
"""Get conversion factor from unitA to unitB."""
class ForceConversion:
"""Force unit conversion between different systems."""
def __init__(self, unitA: str, unitB: str):
"""
Initialize force conversion.
Parameters:
- unitA: str, source unit ('eV_angstrom', 'hartree_bohr')
- unitB: str, target unit
"""
def __call__(self, value: float) -> float:
"""Convert force value from unitA to unitB.
Parameters:
- value: float, force value in unitA
Returns:
float: force value in unitB
"""
def value(self) -> float:
"""Get conversion factor from unitA to unitB."""
class PressureConversion:
"""Pressure unit conversion between different systems."""
def __init__(self, unitA: str, unitB: str):
"""
Initialize pressure conversion.
Parameters:
- unitA: str, source unit ('GPa', 'bar', 'atm', 'Pa')
- unitB: str, target unit
"""
def __call__(self, value: float) -> float:
"""Convert pressure value from unitA to unitB.
Parameters:
- value: float, pressure value in unitA
Returns:
float: pressure value in unitB
"""
def value(self) -> float:
"""Get conversion factor from unitA to unitB."""Fundamental physical constants and conversion factors for computational chemistry and materials science calculations.
# Fundamental constants
AVOGADRO: float # Avogadro constant (mol^-1)
ELE_CHG: float # Elementary charge (C)
BOHR: float # Bohr radius (angstrom)
HARTREE: float # Hartree energy (eV)
RYDBERG: float # Rydberg energy (eV)
# Energy conversion factors
econvs: dict[str, float] = {
'eV': 1.0,
'hartree': 27.211386245988,
'kcal_mol': 23.060548012,
'kJ_mol': 96.485332,
'rydberg': 13.605693123
}
# Length conversion factors
lconvs: dict[str, float] = {
'angstrom': 1.0,
'bohr': 0.5291772109,
'nm': 10.0,
'm': 1e10
}Utility functions for manipulating atomic structures, handling periodic boundary conditions, and working with element data.
def elements_index_map(elements: list[str], standard: list[str] = None, inverse: bool = False) -> dict:
"""
Create element-index mappings.
Parameters:
- elements: list of element symbols
- standard: standard element order (uses ELEMENTS if None)
- inverse: return index-to-element mapping if True
Returns:
dict: element-to-index or index-to-element mapping
"""
def remove_pbc(system, protect_layer: float = 0) -> dict:
"""
Remove periodic boundary conditions and create large cell.
Parameters:
- system: System instance
- protect_layer: protection layer thickness (angstrom)
Returns:
dict: system data with modified cell and coordinates
"""
def add_atom_names(data: dict, atom_names: list[str]) -> dict:
"""
Add new atom types to system data.
Parameters:
- data: system dictionary
- atom_names: new element names to add
Returns:
dict: updated system data
"""
def sort_atom_names(data: dict, type_map: list[str] = None) -> dict:
"""
Sort atom names consistently.
Parameters:
- data: system dictionary
- type_map: desired element order
Returns:
dict: system data with sorted atom names
"""Element data and periodic table utilities for chemical analysis and element identification.
class Element:
"""Element data from periodic table."""
def __init__(self, symbol: str):
"""
Initialize element.
Parameters:
- symbol: str, element symbol
"""
@property
def symbol(self) -> str:
"""Element symbol."""
@property
def atomic_number(self) -> int:
"""Atomic number."""
@property
def Z(self) -> int:
"""Atomic number (alias for atomic_number)."""
@property
def name(self) -> str:
"""Element name."""
@property
def X(self) -> float:
"""Electronegativity."""
@property
def mass(self) -> float:
"""Atomic mass (amu)."""
@property
def radius(self) -> float:
"""Atomic radius."""
@property
def calculated_radius(self) -> float:
"""Calculated atomic radius."""
@classmethod
def from_Z(cls, Z: int):
"""Create Element from atomic number.
Parameters:
- Z: int, atomic number
Returns:
Element instance
"""
# Element symbols list
ELEMENTS: list[str] # ['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', ...]Interface for ML model prediction and geometry optimization. Enables integration with external codes and ML frameworks.
class Driver:
"""Abstract base class for ML model prediction drivers."""
def label(self, system):
"""
Predict properties for system.
Parameters:
- system: System instance
Returns:
LabeledSystem with predicted properties
"""
@classmethod
def register(cls, key: str):
"""Register driver plugin decorator."""
@classmethod
def get_driver(cls, key: str):
"""Get driver by key."""
@classmethod
def get_drivers(cls) -> dict:
"""Get all registered drivers."""
class Minimizer:
"""Abstract base class for geometry minimization."""
def minimize(self, system):
"""
Minimize system geometry.
Parameters:
- system: System instance
Returns:
System with minimized geometry
"""
@classmethod
def register(cls, key: str):
"""Register minimizer plugin decorator."""
@classmethod
def get_minimizer(cls, key: str):
"""Get minimizer by key."""
@classmethod
def get_minimizers(cls) -> dict:
"""Get all registered minimizers."""
@abstractmethod
def minimize(self, data: dict) -> dict:
"""Minimize system geometry.
Parameters:
- data: dict, system data with coordinates
Returns:
dict: system data with minimized geometry
"""Strongly-typed data validation system that ensures consistency and correctness of atomistic data structures.
class DataType:
"""Represents a data type with shape validation and requirements."""
def __init__(self, name: str, dtype: type, shape: tuple, required: bool = True, deepmd_name: str = None):
"""
Initialize data type definition.
Parameters:
- name: str, data field name
- dtype: type, expected data type
- shape: tuple, expected array shape with axis identifiers
- required: bool, whether field is required
- deepmd_name: str, corresponding DeePMD field name
"""
def check(self, system) -> bool:
"""
Validate data in system.
Parameters:
- system: System instance
Returns:
bool: True if data is valid
Raises:
DataError: if data is invalid
"""
def real_shape(self, system) -> tuple:
"""
Calculate expected shape for system.
Parameters:
- system: System instance
Returns:
tuple: expected array shape
"""
class Axis:
"""Enumeration for data axis types."""
NFRAMES: str = 'nframes' # Number of frames axis
NATOMS: str = 'natoms' # Number of atoms axis
NTYPES: str = 'ntypes' # Number of atom types axis
NBONDS: str = 'nbonds' # Number of bonds axis
class DataError(Exception):
"""Exception raised for invalid data."""
def register_data_type(data_type: DataType, labeled: bool = False):
"""
Register custom data types.
Parameters:
- data_type: DataType instance to register
- labeled: bool, whether for labeled systems
"""
def get_data_types(labeled: bool = False) -> list[DataType]:
"""
Get all registered data types.
Parameters:
- labeled: bool, whether to include labeled data types
Returns:
list: registered data types
"""import dpdata
from dpdata.stat import mae, rmse
# Compare two calculations
ref_system = dpdata.LabeledSystem('reference.outcar', fmt='vasp/outcar')
test_system = dpdata.LabeledSystem('test.outcar', fmt='vasp/outcar')
# Calculate energy errors
energy_errors = test_system['energies'] - ref_system['energies']
print(f"Energy MAE: {mae(energy_errors):.4f} eV")
print(f"Energy RMSE: {rmse(energy_errors):.4f} eV")
# Force errors (per atom)
force_errors = test_system['forces'] - ref_system['forces']
force_errors_flat = force_errors.reshape(-1)
print(f"Force MAE: {mae(force_errors_flat):.4f} eV/Å")
print(f"Force RMSE: {rmse(force_errors_flat):.4f} eV/Å")from dpdata.unit import EnergyConversion, LengthConversion
# Convert energies from Hartree to eV
energy_conv = EnergyConversion('hartree', 'eV')
energy_ev = energy_conv(-76.4) # Water energy in eV
# Convert lengths from Bohr to Angstrom
length_conv = LengthConversion('bohr', 'angstrom')
bond_length_ang = length_conv(1.8) # Bond length in Angstrom
print(f"Energy: {energy_ev:.3f} eV")
print(f"Bond length: {bond_length_ang:.3f} Å")from dpdata.utils import elements_index_map, remove_pbc
# Create element mapping
elements = ['H', 'C', 'N', 'O']
type_map = elements_index_map(elements)
print("Type map:", type_map) # {'H': 0, 'C': 1, 'N': 2, 'O': 3}
# Remove periodic boundaries
sys = dpdata.System('POSCAR', fmt='vasp/poscar')
sys_nopbc = remove_pbc(sys, protect_layer=2.0)
# Convert to non-periodic system
nopbc_system = dpdata.System(data=sys_nopbc)
nopbc_system.to('xyz', 'molecule.xyz')# Example with custom driver (implementation would depend on specific ML framework)
@dpdata.driver.Driver.register('my_model')
class MyMLDriver(dpdata.driver.Driver):
def __init__(self, model_path):
self.model_path = model_path
def label(self, system):
# Load model and predict energies/forces
# Return LabeledSystem with predictions
pass
# Use driver for predictions
sys = dpdata.System('structure.xyz', fmt='xyz')
predicted = sys.predict(driver='my_model', model_path='model.pb')
print(f"Predicted energy: {predicted['energies'][0]:.4f} eV")from dpdata.data_type import DataType, Axis, register_data_type
# Define custom data type
custom_type = DataType(
name='my_property',
dtype=float,
shape=(Axis.NFRAMES, Axis.NATOMS),
required=False
)
# Register for use with systems
register_data_type(custom_type, labeled=True)
# Validate system data
try:
ls = dpdata.LabeledSystem('data.xyz', fmt='xyz')
custom_type.check(ls)
print("Data validation passed")
except dpdata.data_type.DataError as e:
print(f"Data validation failed: {e}")import numpy as np
from dpdata.periodic_table import ELEMENTS, Element
# Analyze composition
sys = dpdata.System('structure.xyz', fmt='xyz')
atom_names = sys.get_atom_names()
atom_counts = sys.get_atom_numbs()
print("Composition analysis:")
for name, count in zip(atom_names, atom_counts):
element = Element(name)
print(f"{name}: {count} atoms, mass = {element.mass:.2f} amu")
# Calculate total mass
total_mass = sum(Element(name).mass * count
for name, count in zip(atom_names, atom_counts))
print(f"Total mass: {total_mass:.2f} amu")
# Analyze forces if available
if isinstance(sys, dpdata.LabeledSystem) and sys.has_forces():
forces = sys['forces'] # Shape: (nframes, natoms, 3)
force_magnitudes = np.linalg.norm(forces, axis=2)
print(f"Max force: {np.max(force_magnitudes):.3f} eV/Å")
print(f"RMS force: {np.sqrt(np.mean(force_magnitudes**2)):.3f} eV/Å")Install with Tessl CLI
npx tessl i tessl/pypi-dpdata