or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data-analysis.mdformat-conversion.mdindex.mdsystem-management.md

system-management.mddocs/

0

# System Management

1

2

Core classes for managing atomistic data including unlabeled structures, energy/force labeled datasets, multi-composition systems, and molecular systems with bond information. These classes provide the fundamental data structures for all dpdata operations.

3

4

## Capabilities

5

6

### System Class

7

8

The fundamental data container for atomic simulation systems. Contains frames with consistent atom ordering, storing coordinates, cell information, atom types, and topology without energy/force labels.

9

10

```python { .api }

11

class System:

12

def __init__(self, file_name=None, fmt=None, type_map=None, begin=0, step=1, data=None, convergence_check=True, **kwargs):

13

"""

14

Initialize a System from file or data.

15

16

Parameters:

17

- file_name: str, path to input file

18

- fmt: str, format identifier ('vasp/poscar', 'lammps/lmp', etc.)

19

- type_map: list, mapping from element names to indices

20

- begin: int, starting frame index

21

- step: int, frame step size

22

- data: dict, raw system data

23

- convergence_check: bool, check VASP convergence

24

"""

25

26

def get_atom_names(self) -> list[str]:

27

"""Get list of element names."""

28

29

def get_atom_types(self) -> np.ndarray:

30

"""Get array of atom type indices."""

31

32

def get_atom_numbs(self) -> list[int]:

33

"""Get number of atoms per type."""

34

35

def get_nframes(self) -> int:

36

"""Get number of frames."""

37

38

def get_natoms(self) -> int:

39

"""Get total number of atoms."""

40

41

def get_ntypes(self) -> int:

42

"""Get number of atom types."""

43

44

def copy(self):

45

"""Create deep copy of system."""

46

47

def sub_system(self, f_idx):

48

"""

49

Extract subsystem by frame indices.

50

51

Parameters:

52

- f_idx: array-like, frame indices to extract

53

54

Returns:

55

System with selected frames

56

"""

57

58

def append(self, system):

59

"""

60

Append another system.

61

62

Parameters:

63

- system: System, system to append

64

"""

65

66

def sort_atom_names(self, type_map=None):

67

"""

68

Sort atoms by element names.

69

70

Parameters:

71

- type_map: list, element name order

72

"""

73

74

def sort_atom_types(self):

75

"""Sort atoms by type indices."""

76

77

def check_data(self):

78

"""Validate system data integrity."""

79

80

def map_atom_types(self, type_map: list[str]):

81

"""Map atom types using custom mapping.

82

83

Parameters:

84

- type_map: list, mapping from indices to element names

85

"""

86

87

def extend(self, systems: list[System]):

88

"""Extend system with multiple other systems.

89

90

Parameters:

91

- systems: list of System instances to append

92

"""

93

94

def affine_map(self, trans: np.ndarray, f_idx: int = 0):

95

"""Apply affine transformation to coordinates.

96

97

Parameters:

98

- trans: array, 3x3 transformation matrix

99

- f_idx: int, frame index to transform

100

"""

101

102

def rot_lower_triangular(self):

103

"""Rotate all frames to have lower triangular cells."""

104

105

def rot_frame_lower_triangular(self, f_idx: int = 0):

106

"""Rotate specific frame to have lower triangular cell.

107

108

Parameters:

109

- f_idx: int, frame index to rotate

110

"""

111

112

def add_atom_names(self, atom_names: list[str]):

113

"""Add new atom types.

114

115

Parameters:

116

- atom_names: list, new element names to add

117

"""

118

119

def replicate(self, ncopy):

120

"""

121

Replicate system in 3D.

122

123

Parameters:

124

- ncopy: array-like [nx, ny, nz], replication counts

125

126

Returns:

127

System with replicated structure

128

"""

129

130

def apply_pbc(self):

131

"""Apply periodic boundary conditions."""

132

133

def remove_pbc(self, protect_layer=0):

134

"""

135

Remove PBC and create large cell.

136

137

Parameters:

138

- protect_layer: float, protection layer thickness

139

"""

140

141

def perturb(self, pert_num, cell_pert_fraction=0.03, atom_pert_distance=0.01, atom_pert_style='normal', atom_pert_prob=1.0):

142

"""

143

Generate perturbed structures.

144

145

Parameters:

146

- pert_num: int, number of perturbed structures

147

- cell_pert_fraction: float, cell deformation fraction

148

- atom_pert_distance: float, atom displacement distance

149

- atom_pert_style: str, perturbation style ('normal', 'const')

150

- atom_pert_prob: float, probability of perturbing each atom

151

152

Returns:

153

MultiSystems with perturbed structures

154

"""

155

156

def shuffle(self):

157

"""Randomly shuffle frames."""

158

159

def pick_atom_idx(self, idx, nopbc=False):

160

"""

161

Select atoms by indices.

162

163

Parameters:

164

- idx: array-like, atom indices to select

165

- nopbc: bool, whether system is non-periodic

166

167

Returns:

168

System with selected atoms

169

"""

170

171

def remove_atom_names(self, atom_names):

172

"""

173

Remove specific atom types.

174

175

Parameters:

176

- atom_names: list, element names to remove

177

178

Returns:

179

System without specified atoms

180

"""

181

182

def pick_by_amber_mask(self, param, maskstr, pass_coords=True, nopbc=False):

183

"""

184

Select atoms using Amber mask syntax.

185

186

Parameters:

187

- param: str, path to parameter file

188

- maskstr: str, Amber mask string

189

- pass_coords: bool, whether to pass coordinates

190

- nopbc: bool, whether system is non-periodic

191

192

Returns:

193

System with selected atoms

194

"""

195

196

def replace(self, initial_atom_type, end_atom_type, replace_num=None):

197

"""

198

Replace atoms of one type with another.

199

200

Parameters:

201

- initial_atom_type: str, element to replace

202

- end_atom_type: str, replacement element

203

- replace_num: int, number of atoms to replace

204

205

Returns:

206

System with replaced atoms

207

"""

208

209

def predict(self, *args, driver=None, **kwargs):

210

"""

211

Predict properties using ML models.

212

213

Parameters:

214

- driver: str or Driver, prediction driver

215

- args, kwargs: driver-specific arguments

216

217

Returns:

218

LabeledSystem with predicted properties

219

"""

220

221

def minimize(self, *args, minimizer=None, **kwargs):

222

"""

223

Minimize geometry.

224

225

Parameters:

226

- minimizer: str or Minimizer, optimization method

227

- args, kwargs: minimizer-specific arguments

228

229

Returns:

230

System with minimized geometry

231

"""

232

233

def to(self, fmt, *args, **kwargs):

234

"""

235

Export to various formats.

236

237

Parameters:

238

- fmt: str, output format

239

- args, kwargs: format-specific arguments

240

"""

241

242

@classmethod

243

def from_dict(cls, data: dict):

244

"""Create System from dictionary data."""

245

246

@classmethod

247

def load(cls, filename: str):

248

"""Load System from JSON/YAML file."""

249

250

@property

251

def formula(self) -> str:

252

"""Chemical formula string."""

253

254

@property

255

def uniq_formula(self) -> str:

256

"""Sorted formula for comparison."""

257

258

@property

259

def short_formula(self) -> str:

260

"""Compressed formula without zeros."""

261

262

@property

263

def formula_hash(self) -> str:

264

"""SHA256 hash of formula."""

265

266

@property

267

def short_name(self) -> str:

268

"""Abbreviated system name."""

269

270

@property

271

def nopbc(self) -> bool:

272

"""Whether system is non-periodic."""

273

```

274

275

### LabeledSystem Class

276

277

System with energy, force, and virial labels for machine learning model training. Extends System with additional methods for handling training data.

278

279

```python { .api }

280

class LabeledSystem(System):

281

def has_forces(self) -> bool:

282

"""Check if forces are present."""

283

284

def has_virial(self) -> bool:

285

"""Check if virial data is present."""

286

287

def affine_map_fv(self, trans: np.ndarray, f_idx: int):

288

"""Apply transformation to forces and virial.

289

290

Parameters:

291

- trans: array, 3x3 transformation matrix

292

- f_idx: int, frame index

293

"""

294

295

def rot_frame_lower_triangular(self, f_idx: int = 0):

296

"""Rotate frame to lower triangular and adjust forces/virial.

297

298

Parameters:

299

- f_idx: int, frame index

300

"""

301

302

def correction(self, hl_sys):

303

"""

304

Calculate correction between two labeled systems.

305

306

Parameters:

307

- hl_sys: LabeledSystem, high-level reference system

308

309

Returns:

310

LabeledSystem with correction data

311

"""

312

313

def remove_outlier(self, threshold=3.0):

314

"""

315

Remove outlier frames based on energy distribution.

316

317

Parameters:

318

- threshold: float, standard deviation threshold

319

320

Returns:

321

LabeledSystem with outliers removed

322

"""

323

```

324

325

### MultiSystems Class

326

327

Container for multiple System objects with different compositions but consistent atom naming. Enables handling of datasets with multiple chemical compositions.

328

329

```python { .api }

330

class MultiSystems:

331

def __init__(self, *systems, type_map=None):

332

"""

333

Initialize MultiSystems container.

334

335

Parameters:

336

- systems: System objects to include

337

- type_map: list, consistent atom type mapping

338

"""

339

340

def from_fmt_obj(self, fmtobj, directory, labeled=False, **kwargs):

341

"""

342

Load multiple systems from format object.

343

344

Parameters:

345

- fmtobj: Format, format handler

346

- directory: str, directory path

347

- labeled: bool, whether systems have labels

348

"""

349

350

def to(self, fmt, *args, **kwargs):

351

"""Export all systems to format."""

352

353

def get_nframes(self) -> int:

354

"""Get total frames across all systems."""

355

356

def append(self, *systems):

357

"""

358

Add systems or other MultiSystems.

359

360

Parameters:

361

- systems: System or MultiSystems objects to add

362

"""

363

364

def predict(self, *args, driver=None, **kwargs):

365

"""Predict properties for all systems."""

366

367

def minimize(self, *args, minimizer=None, **kwargs):

368

"""Minimize all systems."""

369

370

def pick_atom_idx(self, idx, nopbc=False):

371

"""Select atoms from all systems."""

372

373

def correction(self, hl_sys):

374

"""Calculate corrections for all systems."""

375

376

def train_test_split(self, test_size=0.2, seed=None):

377

"""

378

Split into training/testing sets.

379

380

Parameters:

381

- test_size: float, fraction for testing

382

- seed: int, random seed

383

384

Returns:

385

tuple: (train_MultiSystems, test_MultiSystems)

386

"""

387

388

@classmethod

389

def from_file(cls, file_name: str, fmt: str = 'auto', **kwargs):

390

"""Load MultiSystems from single file.

391

392

Parameters:

393

- file_name: str, path to input file

394

- fmt: str, format identifier

395

- kwargs: format-specific options

396

397

Returns:

398

MultiSystems instance

399

"""

400

401

@classmethod

402

def from_dir(cls, dir_name: str, file_name: str, fmt: str = 'auto', type_map: list[str] = None):

403

"""Load MultiSystems from directory with multiple files.

404

405

Parameters:

406

- dir_name: str, directory path

407

- file_name: str, file pattern to match

408

- fmt: str, format identifier

409

- type_map: list, atom type mapping

410

411

Returns:

412

MultiSystems instance

413

"""

414

415

def load_systems_from_file(self, file_name: str, fmt: str, **kwargs):

416

"""Load and append systems from file.

417

418

Parameters:

419

- file_name: str, path to input file

420

- fmt: str, format identifier

421

- kwargs: format-specific options

422

"""

423

```

424

425

### BondOrderSystem Class

426

427

System with chemical bond information and formal charges, typically loaded from molecular file formats. Provides access to molecular connectivity and chemical properties.

428

429

```python { .api }

430

class BondOrderSystem(System):

431

def __init__(self, file_name=None, fmt=None, type_map=None, begin=0, step=1, data=None, rdkit_mol=None, sanitize_level='high', raise_errors=True, verbose=True, **kwargs):

432

"""

433

Initialize BondOrderSystem.

434

435

Parameters:

436

- rdkit_mol: RDKit molecule object

437

- sanitize_level: str, RDKit sanitization level

438

- raise_errors: bool, whether to raise errors

439

- verbose: bool, verbose output

440

"""

441

442

def from_rdkit_mol(self, rdkit_mol):

443

"""

444

Initialize from RDKit molecule.

445

446

Parameters:

447

- rdkit_mol: RDKit molecule object

448

"""

449

450

def get_nbonds(self) -> int:

451

"""Get number of bonds."""

452

453

def get_charge(self) -> int:

454

"""Get total formal charge."""

455

456

def get_mol(self):

457

"""Get RDKit molecule object."""

458

459

def get_bond_order(self, begin_atom_idx: int, end_atom_idx: int) -> int:

460

"""

461

Get bond order between atoms.

462

463

Parameters:

464

- begin_atom_idx: int, first atom index

465

- end_atom_idx: int, second atom index

466

467

Returns:

468

int: bond order (1=single, 2=double, 3=triple)

469

"""

470

471

def from_rdkit_mol(self, rdkit_mol):

472

"""Initialize from RDKit molecule object.

473

474

Parameters:

475

- rdkit_mol: RDKit Mol, molecule object

476

"""

477

478

def get_formal_charges(self) -> list[int]:

479

"""Get formal charges on atoms.

480

481

Returns:

482

list: formal charges for each atom

483

"""

484

```

485

486

## Usage Examples

487

488

### Working with Systems

489

490

```python

491

import dpdata

492

493

# Load VASP structure

494

sys = dpdata.System('POSCAR', fmt='vasp/poscar')

495

496

# Basic properties

497

print(f"Formula: {sys.formula}")

498

print(f"Atoms: {sys.get_natoms()}")

499

print(f"Types: {sys.get_atom_names()}")

500

501

# Manipulate structure

502

replicated = sys.replicate([2, 2, 1]) # 2x2x1 supercell

503

perturbed = sys.perturb(10, atom_pert_distance=0.1) # 10 perturbed structures

504

505

# Export

506

sys.to('lammps/lmp', 'structure.lmp')

507

```

508

509

### Working with Labeled Data

510

511

```python

512

# Load VASP trajectory with energies/forces

513

ls = dpdata.LabeledSystem('OUTCAR', fmt='vasp/outcar')

514

515

print(f"Has forces: {ls.has_forces()}")

516

print(f"Has virial: {ls.has_virial()}")

517

518

# Split trajectory

519

train_data = ls.sub_system(range(0, 80))

520

test_data = ls.sub_system(range(80, 100))

521

522

# Export for ML training

523

train_data.to('deepmd/npy', 'train_data')

524

test_data.to('deepmd/npy', 'test_data')

525

```

526

527

### Working with Multiple Systems

528

529

```python

530

# Load multiple compositions

531

ms = dpdata.MultiSystems()

532

ms.append(dpdata.System('water.xyz', fmt='xyz'))

533

ms.append(dpdata.System('methane.xyz', fmt='xyz'))

534

535

# Train/test split across all systems

536

train_ms, test_ms = ms.train_test_split(test_size=0.2, seed=42)

537

538

print(f"Total frames: {ms.get_nframes()}")

539

print(f"Train frames: {train_ms.get_nframes()}")

540

print(f"Test frames: {test_ms.get_nframes()}")

541

```