Manipulating data formats of DeePMD-kit, VASP, QE, PWmat, and LAMMPS, etc.
—
Comprehensive format support for quantum chemistry (VASP, Gaussian, CP2K), molecular dynamics (LAMMPS, GROMACS), machine learning (DeePMD-kit), and general formats (XYZ, SDF), with both Python API and command-line tools. The plugin-based architecture enables seamless interoperability between different computational science software packages.
Plugin-based format conversion system that handles reading from and writing to various atomistic data formats. The system uses a registry pattern to dynamically load format handlers.
def load_format(fmt: str):
"""
Load format plugin by name.
Parameters:
- fmt: str, format identifier (e.g., 'vasp/poscar', 'lammps/lmp')
Returns:
Format handler instance
Raises:
NotImplementedError: if format is not supported
"""
class Format:
"""Abstract base class for file format plugins."""
@classmethod
def register(cls, key: str):
"""
Register format plugin decorator.
Parameters:
- key: str, format identifier
Returns:
Decorator function for format classes
"""
@classmethod
def register_from(cls, key: str):
"""
Register custom from method decorator.
Parameters:
- key: str, format identifier
Returns:
Decorator function for from methods
"""
@classmethod
def register_to(cls, key: str):
"""
Register custom to method decorator.
Parameters:
- key: str, format identifier
Returns:
Decorator function for to methods
"""
@classmethod
def get_formats(cls) -> dict:
"""Get all registered format plugins."""
@classmethod
def get_from_methods(cls) -> dict:
"""Get all registered from methods."""
@classmethod
def get_to_methods(cls) -> dict:
"""Get all registered to methods."""
@classmethod
def post(cls, func_name: str):
"""Register post-processing decorator.
Parameters:
- func_name: str, function name to post-process
Returns:
Decorator for post-processing functions
"""
def from_system(self, file_name: str, **kwargs) -> dict:
"""Load system data from file.
Parameters:
- file_name: str, path to input file
- kwargs: format-specific options
Returns:
dict: system data
"""
def to_system(self, data: dict, *args, **kwargs):
"""Write system data to file.
Parameters:
- data: dict, system data
- args: positional arguments for output
- kwargs: format-specific options
"""
def from_labeled_system(self, file_name: str, **kwargs) -> dict:
"""Load labeled system data from file.
Parameters:
- file_name: str, path to input file
- kwargs: format-specific options
Returns:
dict: labeled system data
"""
def to_labeled_system(self, data: dict, *args, **kwargs):
"""Write labeled system data to file.
Parameters:
- data: dict, labeled system data
- args: positional arguments for output
- kwargs: format-specific options
"""
def from_bond_order_system(self, file_name: str, **kwargs):
"""Load bond order system from file.
Parameters:
- file_name: str, path to input file
- kwargs: format-specific options
Returns:
RDKit molecule object
"""
def to_bond_order_system(self, data: dict, rdkit_mol, *args, **kwargs):
"""Write bond order system to file.
Parameters:
- data: dict, system data
- rdkit_mol: RDKit molecule object
- args: positional arguments for output
- kwargs: format-specific options
"""
def from_multi_systems(self, directory: str, **kwargs) -> dict:
"""Load multiple systems from directory.
Parameters:
- directory: str, directory path
- kwargs: format-specific options
Returns:
dict: multi-systems data
"""
def to_multi_systems(self, formulas: dict, directory: str, **kwargs):
"""Write multiple systems to directory.
Parameters:
- formulas: dict, system formulas and data
- directory: str, output directory
- kwargs: format-specific options
"""Core methods for loading and saving atomistic data in various formats. These methods are available on all System classes.
class System:
@classmethod
def from_fmt(cls, file_name: str, fmt: str, **kwargs):
"""
Load system from file with specified format.
Parameters:
- file_name: str, path to input file
- fmt: str, format identifier
- kwargs: format-specific options
Returns:
System instance loaded from file
"""
def to(self, fmt: str, *args, **kwargs):
"""
Export system to specified format.
Parameters:
- fmt: str, output format identifier
- args: positional arguments for format
- kwargs: format-specific options
"""Command-line tools for format conversion and basic operations. Provides quick conversion between formats without writing Python code.
def dpdata_cli():
"""
Main CLI entry point for format conversion.
Usage:
dpdata INPUT_FILE -i INPUT_FORMAT -o OUTPUT_FORMAT -O OUTPUT_PATH
"""
def convert(from_file: str, from_format: str, to_file: str, to_format: str, no_labeled: bool = False, multi: bool = False, type_map: list = None):
"""
Convert between file formats programmatically.
Parameters:
- from_file: str, source file path
- from_format: str, source format identifier
- to_file: str, target file path
- to_format: str, target format identifier
- no_labeled: bool, treat as unlabeled data
- multi: bool, handle multiple systems
- type_map: list, atom type mapping
"""Generic plugin registration system that enables extensible format support and custom functionality.
class Plugin:
"""Generic plugin registration system."""
def __init__(self):
"""Initialize plugin registry."""
def register(self, key: str):
"""
Register plugin decorator.
Parameters:
- key: str, plugin identifier
Returns:
Decorator function for plugin classes
"""
def get_plugin(self, key: str):
"""
Retrieve plugin by key.
Parameters:
- key: str, plugin identifier
Returns:
Plugin instance
Raises:
RuntimeError: if plugin not found
"""
@property
def plugins(self) -> dict:
"""Dictionary of all registered plugins."""DPData provides extensive format coverage across the computational science ecosystem:
VASP: Vienna Ab initio Simulation Package
vasp/poscar - POSCAR/CONTCAR structure filesvasp/outcar - OUTCAR output with energies and forcesvasp/xml - vasprun.xml electronic structure dataGaussian: Quantum chemistry software
gaussian/gjf - Gaussian input filesgaussian/log - Gaussian output filesCP2K: Quantum molecular dynamics
cp2k/output - CP2K output filescp2k/cell - CP2K cell filesABACUS: Density functional theory package
abacus/stru - ABACUS structure filesabacus/scf - Self-consistent field resultsOther QC Packages:
qe/pw - Quantum ESPRESSO pw.xfhi_aims/output - FHI-aims outputsiesta/output - SIESTA outputorca/output - ORCA quantum chemistrypsi4/output - PSI4 quantum chemistrydftbplus/output - DFTB+ calculationsLAMMPS: Large-scale Atomic/Molecular Massively Parallel Simulator
lammps/lmp - LAMMPS data fileslammps/dump - LAMMPS dump filesGROMACS: Molecular dynamics package
gromacs/gro - GROMACS structure filesAMBER: Molecular dynamics suite
amber/nc - AMBER NetCDF trajectoriesDeePMD-kit: Deep potential molecular dynamics
deepmd/raw - Raw data formatdeepmd/npy - NumPy array formatdeepmd/hdf5 - HDF5 data formatASE: Atomic Simulation Environment
ase/structure - ASE Atoms objectsXYZ: Cartesian coordinates
xyz - Standard XYZ formatSDF/MOL: Chemical structure formats
sdf - Structure Data Formatmol - MOL file formatPyMatGen: Materials analysis
pymatgen/structure - PyMatGen Structure objectsimport dpdata
# Load VASP OUTCAR file
ls = dpdata.LabeledSystem('OUTCAR', fmt='vasp/outcar')
# Convert to DeePMD training format
ls.to('deepmd/npy', 'training_data')
# Load LAMMPS dump file
sys = dpdata.System('dump.lammpstrj', fmt='lammps/dump', type_map=['H', 'O'])
# Convert to XYZ format
sys.to('xyz', 'trajectory.xyz')# Convert VASP to DeePMD format
dpdata OUTCAR -i vasp/outcar -o deepmd/npy -O deepmd_data
# Convert LAMMPS to XYZ
dpdata dump.lammpstrj -i lammps/dump -o xyz -O trajectory.xyz
# Check version
dpdata --version# Load different formats into MultiSystems
ms = dpdata.MultiSystems()
# Add VASP data
vasp_sys = dpdata.System('POSCAR', fmt='vasp/poscar')
ms.append(vasp_sys)
# Add LAMMPS data
lammps_sys = dpdata.System('data.lmp', fmt='lammps/lmp')
ms.append(lammps_sys)
# Export all to consistent format
ms.to('xyz', 'combined_structures.xyz')from dpdata.format import Format
@Format.register('custom/myformat')
class MyFormat(Format):
def from_system(self, file_name, **kwargs):
# Implementation for reading custom format
pass
def to_system(self, system, file_name, **kwargs):
# Implementation for writing custom format
pass
# Use custom format
sys = dpdata.System('myfile.custom', fmt='custom/myformat')# VASP with type mapping
ls = dpdata.LabeledSystem('OUTCAR', fmt='vasp/outcar', type_map=['C', 'H'])
# LAMMPS with specific frame range
sys = dpdata.System('dump.lammpstrj', fmt='lammps/dump', begin=100, step=10)
# DeePMD with compression
ls.to('deepmd/hdf5', 'data.hdf5', compression='gzip')
# XYZ with custom formatting
sys.to('xyz', 'structure.xyz', format_string='%.6f')try:
# Attempt to load file
sys = dpdata.System('input.xyz', fmt='xyz')
except FileNotFoundError:
print("Input file not found")
except NotImplementedError as e:
print(f"Format not supported: {e}")
except Exception as e:
print(f"Error loading data: {e}")
# Check available formats
formats = dpdata.format.Format.get_formats()
print("Available formats:", list(formats.keys()))Install with Tessl CLI
npx tessl i tessl/pypi-dpdata