A Python package for creating feature transformations in applications of machine learning to materials science.
npx @tessl/cli install tessl/pypi-dscribe@2.1.0DScribe is a comprehensive Python library for transforming atomic structures into fixed-size numerical fingerprints (descriptors) used in machine learning applications for materials science. The package provides implementations of various descriptor methods including Coulomb Matrix, Sine Matrix, Ewald Matrix, Atom-centered Symmetry Functions (ACSF), Smooth Overlap of Atomic Positions (SOAP), Many-body Tensor Representation (MBTR), Local Many-body Tensor Representation (LMBTR), and Valle-Oganov descriptor. All descriptors support both spectrum generation and derivative calculations with respect to atomic positions.
pip install dscribe or conda install -c conda-forge dscribeimport dscribe
from dscribe import SystemFor descriptors:
from dscribe.descriptors import SOAP, ACSF, MBTR, CoulombMatrix, SineMatrix, EwaldSumMatrix, LMBTR, ValleOganovFor core classes:
from dscribe.core import System, LatticeFor kernels:
from dscribe.kernels import AverageKernel, REMatchKernelFor utilities:
from dscribe.utils.geometry import get_adjacency_matrix, get_extended_system
from dscribe.utils.species import symbols_to_numbers, get_atomic_numbers
from dscribe.utils.stats import system_stats
from dscribe.utils.dimensionality import is1d, is2dimport numpy as np
from ase.build import molecule
from dscribe.descriptors import SOAP, CoulombMatrix
from dscribe import System
# Define atomic structures using ASE
samples = [molecule("H2O"), molecule("NO2"), molecule("CO2")]
# Or create DScribe System objects (extends ASE Atoms with caching)
water_system = System.from_atoms(molecule("H2O"))
# Setup descriptors
cm_desc = CoulombMatrix(n_atoms_max=3, permutation="sorted_l2")
soap_desc = SOAP(species=["C", "H", "O", "N"], r_cut=5.0, n_max=8, l_max=6)
# Create descriptors as numpy arrays
water = samples[0]
coulomb_matrix = cm_desc.create(water)
soap = soap_desc.create(water, centers=[0]) # SOAP for atom at index 0
# Process multiple systems with optional parallelization
coulomb_matrices = cm_desc.create(samples, n_jobs=3)
oxygen_indices = [np.where(x.get_atomic_numbers() == 8)[0] for x in samples]
oxygen_soap = soap_desc.create(samples, centers=oxygen_indices, n_jobs=3)
# Calculate derivatives with respect to atomic positions
derivatives, descriptors = soap_desc.derivatives(water, return_descriptor=True)DScribe uses a hierarchical descriptor architecture:
System (extended ASE Atoms with caching) and Lattice (unit cell representation)Descriptor: Base class for all descriptorsDescriptorLocal: Base for per-atom descriptors (SOAP, ACSF, LMBTR)DescriptorGlobal: Base for per-structure descriptors (MBTR, ValleOganov)DescriptorMatrix: Base for matrix descriptors (CoulombMatrix, SineMatrix, EwaldSumMatrix)This design enables consistent interfaces across different descriptor types while supporting both local (per-atom) and global (per-structure) feature representations, parallel processing, and derivative calculations for machine learning applications in materials science.
Local descriptors compute features for individual atoms or local atomic environments, producing per-atom feature vectors that can be averaged or processed separately.
class SOAP:
def __init__(self, r_cut, n_max, l_max, sigma=1.0, rbf="gto",
weighting=None, average="off", compression={"mode": "off", "species_weighting": None},
species=None, periodic=False, sparse=False, dtype="float64"): ...
def create(self, system, centers=None, n_jobs=1, only_physical_cores=False, verbose=False): ...
def derivatives(self, system, centers=None, include=None, exclude=None, method="auto", return_descriptor=False, n_jobs=1, only_physical_cores=False): ...
class ACSF:
def __init__(self, r_cut, g2_params=None, g3_params=None, g4_params=None, g5_params=None,
species=None, periodic=False, sparse=False, dtype="float64"): ...
def create(self, system, centers=None, n_jobs=1, only_physical_cores=False, verbose=False): ...
def derivatives(self, system, centers=None, include=None, exclude=None, method="auto", return_descriptor=False, n_jobs=1, only_physical_cores=False): ...
class LMBTR:
def __init__(self, geometry=None, grid=None, weighting=None, normalize_gaussians=True,
normalization="none", species=None, periodic=False, sparse=False, dtype="float64"): ...
def create(self, system, centers=None, n_jobs=1, only_physical_cores=False, verbose=False): ...
def derivatives(self, system, centers=None, include=None, exclude=None, method="auto", return_descriptor=False, n_jobs=1, only_physical_cores=False): ...Global descriptors compute features for entire atomic structures, producing a single feature vector per structure that captures overall structural properties.
class MBTR:
def __init__(self, geometry=None, grid=None, weighting=None, normalize_gaussians=True,
normalization="none", species=None, periodic=False, sparse=False, dtype="float64"): ...
def create(self, system, n_jobs=1, only_physical_cores=False, verbose=False): ...
def derivatives(self, system, include=None, exclude=None, method="auto", return_descriptor=False, n_jobs=1, only_physical_cores=False): ...
class ValleOganov:
def __init__(self, species, function, n, sigma, r_cut, sparse=False, dtype="float64"): ...
def create(self, system, n_jobs=1, only_physical_cores=False, verbose=False): ...Matrix descriptors represent atomic structures as matrices based on pairwise interactions, then flatten or transform these matrices into fixed-size feature vectors.
class CoulombMatrix:
def __init__(self, n_atoms_max, permutation="sorted_l2", sigma=None, seed=None, sparse=False, dtype="float64"): ...
def create(self, system, n_jobs=1, only_physical_cores=False, verbose=False): ...
def get_matrix(self, system): ...
class SineMatrix:
def __init__(self, n_atoms_max, permutation="sorted_l2", sigma=None, seed=None, sparse=False, dtype="float64"): ...
def create(self, system, n_jobs=1, only_physical_cores=False, verbose=False): ...
def get_matrix(self, system): ...
class EwaldSumMatrix:
def __init__(self, n_atoms_max, permutation="sorted_l2", sigma=None, seed=None, sparse=False, dtype="float64"): ...
def create(self, system, accuracy=1e-5, w=1, r_cut=None, g_cut=None, a=None, n_jobs=1, only_physical_cores=False, verbose=False): ...
def get_matrix(self, system, accuracy=1e-5, w=1, r_cut=None, g_cut=None, a=None): ...Core classes provide the foundation for representing atomic systems and lattices with enhanced functionality beyond the standard ASE library.
class System:
def __init__(self, symbols=None, positions=None, numbers=None, cell=None, pbc=None, **kwargs): ...
@staticmethod
def from_atoms(atoms): ...
def get_distance_matrix(self): ...
def get_distance_matrix_within_radius(self, radius, pos=None, output_type="coo_matrix"): ...
def to_scaled(self, positions, wrap=False): ...
def to_cartesian(self, scaled_positions, wrap=False): ...
class Lattice:
def __init__(self, matrix): ...
@property
def matrix(self): ...
@property
def lengths(self): ...
@property
def abc(self): ...
def get_cartesian_coords(self, fractional_coords): ...
def get_fractional_coords(self, cart_coords): ...Kernel methods for measuring similarity between atomic structures based on local atomic environment comparisons using various similarity metrics.
class AverageKernel:
def __init__(self, metric, gamma=None, degree=3, coef0=1,
kernel_params=None, normalize_kernel=True): ...
def create(self, x, y=None): ...
class REMatchKernel:
def __init__(self, alpha=0.1, threshold=1e-6, metric="linear", gamma=None,
degree=3, coef0=1, kernel_params=None, normalize_kernel=True): ...
def create(self, x, y=None): ...Utility functions for working with atomic species, geometry calculations, statistics, and array operations commonly needed in materials science applications.
# Species utilities (from dscribe.utils.species)
def symbols_to_numbers(symbols): ...
def get_atomic_numbers(species): ...
# Geometry utilities (from dscribe.utils.geometry)
def get_adjacency_matrix(radius, pos1, pos2=None, output_type="coo_matrix"): ...
def get_adjacency_list(adjacency_matrix): ...
def get_extended_system(system, radial_cutoff, centers=None, return_cell_indices=False): ...
# Statistics utilities (from dscribe.utils.stats)
def system_stats(system_iterator): ...
# Dimensionality utilities (from dscribe.utils.dimensionality)
def is1d(array, dtype=None): ...
def is2d(array, dtype=None): ...All descriptor classes implement these standard methods:
create(system, ...) - Create descriptor for given system(s), returns numpy array or sparse matrixget_number_of_features() - Get total number of features in the descriptor outputderivatives(...) - Calculate derivatives with respect to atomic positions (where supported)Most descriptors accept these parameters:
system - ASE Atoms object(s) or DScribe System object(s) to processspecies - List of atomic species to include in the descriptorperiodic - Whether to consider periodic boundary conditionssparse - Whether to return sparse arrays for memory efficiencydtype - Data type for arrays ("float64", "float32")n_jobs - Number of parallel processes for computationverbose - Whether to print progress information during computation