or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

data-analysis.mdformat-conversion.mdindex.mdsystem-management.md

data-analysis.mddocs/

0

# Data Analysis

1

2

Statistical analysis tools, unit conversions, geometry utilities, and integration with ML prediction and optimization frameworks. These tools enable comprehensive analysis and manipulation of atomistic data for scientific computing applications.

3

4

## Capabilities

5

6

### Statistical Analysis

7

8

Statistical functions for comparing systems and analyzing errors in computational data. Useful for validating ML models and comparing different calculation methods.

9

10

```python { .api }

11

def mae(errors) -> float:

12

"""

13

Calculate mean absolute error.

14

15

Parameters:

16

- errors: array-like, error values

17

18

Returns:

19

float: mean absolute error

20

"""

21

22

def rmse(errors) -> float:

23

"""

24

Calculate root mean squared error.

25

26

Parameters:

27

- errors: array-like, error values

28

29

Returns:

30

float: root mean squared error

31

"""

32

33

class ErrorsBase:

34

"""Base class for error calculations between systems."""

35

36

def __init__(self, system1, system2):

37

"""

38

Initialize error calculator.

39

40

Parameters:

41

- system1: LabeledSystem, reference system

42

- system2: LabeledSystem, comparison system

43

"""

44

45

@property

46

def e_errors(self) -> np.ndarray:

47

"""Energy errors array."""

48

49

@property

50

def f_errors(self) -> np.ndarray:

51

"""Force errors array."""

52

53

@property

54

def e_mae(self) -> float:

55

"""Energy mean absolute error."""

56

57

@property

58

def e_rmse(self) -> float:

59

"""Energy root mean squared error."""

60

61

@property

62

def f_mae(self) -> float:

63

"""Force mean absolute error."""

64

65

@property

66

def f_rmse(self) -> float:

67

"""Force root mean squared error."""

68

69

class Errors(ErrorsBase):

70

"""Error calculator for LabeledSystem objects."""

71

72

class MultiErrors(ErrorsBase):

73

"""Error calculator for MultiSystems objects."""

74

```

75

76

### Unit Conversion

77

78

Physical unit conversion utilities for energy, length, force, and pressure. Enables consistent unit handling across different software packages and calculation methods.

79

80

```python { .api }

81

class EnergyConversion:

82

"""Energy unit conversion between different systems."""

83

84

def __init__(self, unitA: str, unitB: str):

85

"""

86

Initialize energy conversion.

87

88

Parameters:

89

- unitA: str, source unit ('eV', 'hartree', 'kcal_mol', 'kJ_mol')

90

- unitB: str, target unit

91

"""

92

93

def __call__(self, value: float) -> float:

94

"""Convert energy value from unitA to unitB.

95

96

Parameters:

97

- value: float, energy value in unitA

98

99

Returns:

100

float: energy value in unitB

101

"""

102

103

def value(self) -> float:

104

"""Get conversion factor from unitA to unitB."""

105

106

class LengthConversion:

107

"""Length unit conversion between different systems."""

108

109

def __init__(self, unitA: str, unitB: str):

110

"""

111

Initialize length conversion.

112

113

Parameters:

114

- unitA: str, source unit ('angstrom', 'bohr', 'nm', 'm')

115

- unitB: str, target unit

116

"""

117

118

def __call__(self, value: float) -> float:

119

"""Convert length value from unitA to unitB.

120

121

Parameters:

122

- value: float, length value in unitA

123

124

Returns:

125

float: length value in unitB

126

"""

127

128

def value(self) -> float:

129

"""Get conversion factor from unitA to unitB."""

130

131

class ForceConversion:

132

"""Force unit conversion between different systems."""

133

134

def __init__(self, unitA: str, unitB: str):

135

"""

136

Initialize force conversion.

137

138

Parameters:

139

- unitA: str, source unit ('eV_angstrom', 'hartree_bohr')

140

- unitB: str, target unit

141

"""

142

143

def __call__(self, value: float) -> float:

144

"""Convert force value from unitA to unitB.

145

146

Parameters:

147

- value: float, force value in unitA

148

149

Returns:

150

float: force value in unitB

151

"""

152

153

def value(self) -> float:

154

"""Get conversion factor from unitA to unitB."""

155

156

class PressureConversion:

157

"""Pressure unit conversion between different systems."""

158

159

def __init__(self, unitA: str, unitB: str):

160

"""

161

Initialize pressure conversion.

162

163

Parameters:

164

- unitA: str, source unit ('GPa', 'bar', 'atm', 'Pa')

165

- unitB: str, target unit

166

"""

167

168

def __call__(self, value: float) -> float:

169

"""Convert pressure value from unitA to unitB.

170

171

Parameters:

172

- value: float, pressure value in unitA

173

174

Returns:

175

float: pressure value in unitB

176

"""

177

178

def value(self) -> float:

179

"""Get conversion factor from unitA to unitB."""

180

```

181

182

### Physical Constants

183

184

Fundamental physical constants and conversion factors for computational chemistry and materials science calculations.

185

186

```python { .api }

187

# Fundamental constants

188

AVOGADRO: float # Avogadro constant (mol^-1)

189

ELE_CHG: float # Elementary charge (C)

190

BOHR: float # Bohr radius (angstrom)

191

HARTREE: float # Hartree energy (eV)

192

RYDBERG: float # Rydberg energy (eV)

193

194

# Energy conversion factors

195

econvs: dict[str, float] = {

196

'eV': 1.0,

197

'hartree': 27.211386245988,

198

'kcal_mol': 23.060548012,

199

'kJ_mol': 96.485332,

200

'rydberg': 13.605693123

201

}

202

203

# Length conversion factors

204

lconvs: dict[str, float] = {

205

'angstrom': 1.0,

206

'bohr': 0.5291772109,

207

'nm': 10.0,

208

'm': 1e10

209

}

210

```

211

212

### Geometry Utilities

213

214

Utility functions for manipulating atomic structures, handling periodic boundary conditions, and working with element data.

215

216

```python { .api }

217

def elements_index_map(elements: list[str], standard: list[str] = None, inverse: bool = False) -> dict:

218

"""

219

Create element-index mappings.

220

221

Parameters:

222

- elements: list of element symbols

223

- standard: standard element order (uses ELEMENTS if None)

224

- inverse: return index-to-element mapping if True

225

226

Returns:

227

dict: element-to-index or index-to-element mapping

228

"""

229

230

def remove_pbc(system, protect_layer: float = 0) -> dict:

231

"""

232

Remove periodic boundary conditions and create large cell.

233

234

Parameters:

235

- system: System instance

236

- protect_layer: protection layer thickness (angstrom)

237

238

Returns:

239

dict: system data with modified cell and coordinates

240

"""

241

242

def add_atom_names(data: dict, atom_names: list[str]) -> dict:

243

"""

244

Add new atom types to system data.

245

246

Parameters:

247

- data: system dictionary

248

- atom_names: new element names to add

249

250

Returns:

251

dict: updated system data

252

"""

253

254

def sort_atom_names(data: dict, type_map: list[str] = None) -> dict:

255

"""

256

Sort atom names consistently.

257

258

Parameters:

259

- data: system dictionary

260

- type_map: desired element order

261

262

Returns:

263

dict: system data with sorted atom names

264

"""

265

```

266

267

### Periodic Table

268

269

Element data and periodic table utilities for chemical analysis and element identification.

270

271

```python { .api }

272

class Element:

273

"""Element data from periodic table."""

274

275

def __init__(self, symbol: str):

276

"""

277

Initialize element.

278

279

Parameters:

280

- symbol: str, element symbol

281

"""

282

283

@property

284

def symbol(self) -> str:

285

"""Element symbol."""

286

287

@property

288

def atomic_number(self) -> int:

289

"""Atomic number."""

290

291

@property

292

def Z(self) -> int:

293

"""Atomic number (alias for atomic_number)."""

294

295

@property

296

def name(self) -> str:

297

"""Element name."""

298

299

@property

300

def X(self) -> float:

301

"""Electronegativity."""

302

303

@property

304

def mass(self) -> float:

305

"""Atomic mass (amu)."""

306

307

@property

308

def radius(self) -> float:

309

"""Atomic radius."""

310

311

@property

312

def calculated_radius(self) -> float:

313

"""Calculated atomic radius."""

314

315

@classmethod

316

def from_Z(cls, Z: int):

317

"""Create Element from atomic number.

318

319

Parameters:

320

- Z: int, atomic number

321

322

Returns:

323

Element instance

324

"""

325

326

# Element symbols list

327

ELEMENTS: list[str] # ['H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', ...]

328

```

329

330

### Driver System

331

332

Interface for ML model prediction and geometry optimization. Enables integration with external codes and ML frameworks.

333

334

```python { .api }

335

class Driver:

336

"""Abstract base class for ML model prediction drivers."""

337

338

def label(self, system):

339

"""

340

Predict properties for system.

341

342

Parameters:

343

- system: System instance

344

345

Returns:

346

LabeledSystem with predicted properties

347

"""

348

349

@classmethod

350

def register(cls, key: str):

351

"""Register driver plugin decorator."""

352

353

@classmethod

354

def get_driver(cls, key: str):

355

"""Get driver by key."""

356

357

@classmethod

358

def get_drivers(cls) -> dict:

359

"""Get all registered drivers."""

360

361

class Minimizer:

362

"""Abstract base class for geometry minimization."""

363

364

def minimize(self, system):

365

"""

366

Minimize system geometry.

367

368

Parameters:

369

- system: System instance

370

371

Returns:

372

System with minimized geometry

373

"""

374

375

@classmethod

376

def register(cls, key: str):

377

"""Register minimizer plugin decorator."""

378

379

@classmethod

380

def get_minimizer(cls, key: str):

381

"""Get minimizer by key."""

382

383

@classmethod

384

def get_minimizers(cls) -> dict:

385

"""Get all registered minimizers."""

386

387

@abstractmethod

388

def minimize(self, data: dict) -> dict:

389

"""Minimize system geometry.

390

391

Parameters:

392

- data: dict, system data with coordinates

393

394

Returns:

395

dict: system data with minimized geometry

396

"""

397

```

398

399

### Data Type System

400

401

Strongly-typed data validation system that ensures consistency and correctness of atomistic data structures.

402

403

```python { .api }

404

class DataType:

405

"""Represents a data type with shape validation and requirements."""

406

407

def __init__(self, name: str, dtype: type, shape: tuple, required: bool = True, deepmd_name: str = None):

408

"""

409

Initialize data type definition.

410

411

Parameters:

412

- name: str, data field name

413

- dtype: type, expected data type

414

- shape: tuple, expected array shape with axis identifiers

415

- required: bool, whether field is required

416

- deepmd_name: str, corresponding DeePMD field name

417

"""

418

419

def check(self, system) -> bool:

420

"""

421

Validate data in system.

422

423

Parameters:

424

- system: System instance

425

426

Returns:

427

bool: True if data is valid

428

429

Raises:

430

DataError: if data is invalid

431

"""

432

433

def real_shape(self, system) -> tuple:

434

"""

435

Calculate expected shape for system.

436

437

Parameters:

438

- system: System instance

439

440

Returns:

441

tuple: expected array shape

442

"""

443

444

class Axis:

445

"""Enumeration for data axis types."""

446

NFRAMES: str = 'nframes' # Number of frames axis

447

NATOMS: str = 'natoms' # Number of atoms axis

448

NTYPES: str = 'ntypes' # Number of atom types axis

449

NBONDS: str = 'nbonds' # Number of bonds axis

450

451

class DataError(Exception):

452

"""Exception raised for invalid data."""

453

454

def register_data_type(data_type: DataType, labeled: bool = False):

455

"""

456

Register custom data types.

457

458

Parameters:

459

- data_type: DataType instance to register

460

- labeled: bool, whether for labeled systems

461

"""

462

463

def get_data_types(labeled: bool = False) -> list[DataType]:

464

"""

465

Get all registered data types.

466

467

Parameters:

468

- labeled: bool, whether to include labeled data types

469

470

Returns:

471

list: registered data types

472

"""

473

```

474

475

## Usage Examples

476

477

### Statistical Analysis

478

479

```python

480

import dpdata

481

from dpdata.stat import mae, rmse

482

483

# Compare two calculations

484

ref_system = dpdata.LabeledSystem('reference.outcar', fmt='vasp/outcar')

485

test_system = dpdata.LabeledSystem('test.outcar', fmt='vasp/outcar')

486

487

# Calculate energy errors

488

energy_errors = test_system['energies'] - ref_system['energies']

489

print(f"Energy MAE: {mae(energy_errors):.4f} eV")

490

print(f"Energy RMSE: {rmse(energy_errors):.4f} eV")

491

492

# Force errors (per atom)

493

force_errors = test_system['forces'] - ref_system['forces']

494

force_errors_flat = force_errors.reshape(-1)

495

print(f"Force MAE: {mae(force_errors_flat):.4f} eV/Å")

496

print(f"Force RMSE: {rmse(force_errors_flat):.4f} eV/Å")

497

```

498

499

### Unit Conversions

500

501

```python

502

from dpdata.unit import EnergyConversion, LengthConversion

503

504

# Convert energies from Hartree to eV

505

energy_conv = EnergyConversion('hartree', 'eV')

506

energy_ev = energy_conv(-76.4) # Water energy in eV

507

508

# Convert lengths from Bohr to Angstrom

509

length_conv = LengthConversion('bohr', 'angstrom')

510

bond_length_ang = length_conv(1.8) # Bond length in Angstrom

511

512

print(f"Energy: {energy_ev:.3f} eV")

513

print(f"Bond length: {bond_length_ang:.3f} Å")

514

```

515

516

### Geometry Utilities

517

518

```python

519

from dpdata.utils import elements_index_map, remove_pbc

520

521

# Create element mapping

522

elements = ['H', 'C', 'N', 'O']

523

type_map = elements_index_map(elements)

524

print("Type map:", type_map) # {'H': 0, 'C': 1, 'N': 2, 'O': 3}

525

526

# Remove periodic boundaries

527

sys = dpdata.System('POSCAR', fmt='vasp/poscar')

528

sys_nopbc = remove_pbc(sys, protect_layer=2.0)

529

530

# Convert to non-periodic system

531

nopbc_system = dpdata.System(data=sys_nopbc)

532

nopbc_system.to('xyz', 'molecule.xyz')

533

```

534

535

### Working with ML Drivers

536

537

```python

538

# Example with custom driver (implementation would depend on specific ML framework)

539

@dpdata.driver.Driver.register('my_model')

540

class MyMLDriver(dpdata.driver.Driver):

541

def __init__(self, model_path):

542

self.model_path = model_path

543

544

def label(self, system):

545

# Load model and predict energies/forces

546

# Return LabeledSystem with predictions

547

pass

548

549

# Use driver for predictions

550

sys = dpdata.System('structure.xyz', fmt='xyz')

551

predicted = sys.predict(driver='my_model', model_path='model.pb')

552

553

print(f"Predicted energy: {predicted['energies'][0]:.4f} eV")

554

```

555

556

### Data Type Validation

557

558

```python

559

from dpdata.data_type import DataType, Axis, register_data_type

560

561

# Define custom data type

562

custom_type = DataType(

563

name='my_property',

564

dtype=float,

565

shape=(Axis.NFRAMES, Axis.NATOMS),

566

required=False

567

)

568

569

# Register for use with systems

570

register_data_type(custom_type, labeled=True)

571

572

# Validate system data

573

try:

574

ls = dpdata.LabeledSystem('data.xyz', fmt='xyz')

575

custom_type.check(ls)

576

print("Data validation passed")

577

except dpdata.data_type.DataError as e:

578

print(f"Data validation failed: {e}")

579

```

580

581

### Advanced Analysis

582

583

```python

584

import numpy as np

585

from dpdata.periodic_table import ELEMENTS, Element

586

587

# Analyze composition

588

sys = dpdata.System('structure.xyz', fmt='xyz')

589

atom_names = sys.get_atom_names()

590

atom_counts = sys.get_atom_numbs()

591

592

print("Composition analysis:")

593

for name, count in zip(atom_names, atom_counts):

594

element = Element(name)

595

print(f"{name}: {count} atoms, mass = {element.mass:.2f} amu")

596

597

# Calculate total mass

598

total_mass = sum(Element(name).mass * count

599

for name, count in zip(atom_names, atom_counts))

600

print(f"Total mass: {total_mass:.2f} amu")

601

602

# Analyze forces if available

603

if isinstance(sys, dpdata.LabeledSystem) and sys.has_forces():

604

forces = sys['forces'] # Shape: (nframes, natoms, 3)

605

force_magnitudes = np.linalg.norm(forces, axis=2)

606

607

print(f"Max force: {np.max(force_magnitudes):.3f} eV/Å")

608

print(f"RMS force: {np.sqrt(np.mean(force_magnitudes**2)):.3f} eV/Å")

609

```