From 87b976f1bb9db0e2c1d0ac5377006640ad44f92f Mon Sep 17 00:00:00 2001 From: Lauri Himanen Date: Tue, 13 Aug 2024 23:49:48 +0300 Subject: [PATCH] Started refactoring MBTR. --- dscribe/descriptors/mbtr.py | 856 +++++----------------------- dscribe/descriptors/mbtr_old.py | 953 ++++++++++++++++++++++++++++++++ dscribe/ext/constants.h | 6 + dscribe/ext/descriptor.h | 2 +- dscribe/ext/ext.cpp | 52 +- dscribe/ext/geometry.cpp | 51 ++ dscribe/ext/geometry.h | 42 ++ dscribe/ext/mbtr2.cpp | 771 ++++++++++++++++++++++++++ dscribe/ext/mbtr2.h | 155 ++++++ setup.py | 2 +- 10 files changed, 2148 insertions(+), 742 deletions(-) create mode 100644 dscribe/descriptors/mbtr_old.py create mode 100644 dscribe/ext/constants.h create mode 100644 dscribe/ext/mbtr2.cpp create mode 100644 dscribe/ext/mbtr2.h diff --git a/dscribe/descriptors/mbtr.py b/dscribe/descriptors/mbtr.py index 998418c8..329d20cb 100644 --- a/dscribe/descriptors/mbtr.py +++ b/dscribe/descriptors/mbtr.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Copyright 2019 DScribe developers Licensed under the Apache License, Version 2.0 (the "License"); @@ -13,114 +12,17 @@ See the License for the specific language governing permissions and limitations under the License. """ - -import sys -import math import numpy as np +import sparse + from ase import Atoms import ase.data -from dscribe.core import System from dscribe.descriptors.descriptorglobal import DescriptorGlobal -from dscribe.ext import MBTRWrapper import dscribe.utils.geometry - - -k1_geometry_functions = set(["atomic_number"]) -k2_geometry_functions = set(["distance", "inverse_distance"]) -k3_geometry_functions = set(["angle", "cosine"]) - - -def check_grid(grid: dict): - """Used to ensure that the given grid settings are valid. - - Args: - grid(dict): Dictionary containing the grid setup. - """ - msg = "The grid information is missing the value for {}" - val_names = ["min", "max", "sigma", "n"] - for val_name in val_names: - try: - grid[val_name] - except Exception: - raise KeyError(msg.format(val_name)) - - # Make the n into integer - grid["n"] = int(grid["n"]) - if grid["min"] >= grid["max"]: - raise ValueError("The min value should be smaller than the max value.") - - -def check_geometry(geometry: dict): - """Used to ensure that the given geometry settings are valid. - - Args: - geometry: Dictionary containing the geometry setup. - """ - - if "function" in geometry: - function = geometry["function"] - valid_functions = ( - k1_geometry_functions | k2_geometry_functions | k3_geometry_functions - ) - if function not in valid_functions: - raise ValueError( - f"Unknown geometry function. Please use one of the following: {sorted(list(valid_functions))}" - ) - else: - raise ValueError("Please specify a geometry function.") - - -def check_weighting(k: int, weighting: dict, periodic: bool): - """Used to ensure that the given weighting settings are valid. - - Args: - k: The MBTR degree. - weighting: Dictionary containing the weighting setup. - periodic: Whether the descriptor is periodic or not. - """ - if weighting is not None: - if k == 1: - valid_functions = set(["unity"]) - elif k == 2: - valid_functions = set(["unity", "exp", "inverse_square"]) - elif k == 3: - valid_functions = set(["unity", "exp", "smooth_cutoff"]) - function = weighting.get("function") - if function not in valid_functions: - raise ValueError( - f"Unknown weighting function specified for k={k}. Please use one of the following: {sorted(list(valid_functions))}" - ) - else: - if function == "exp": - if "threshold" not in weighting: - raise ValueError("Missing value for 'threshold' in the weighting.") - if "scale" not in weighting and "r_cut" not in weighting: - raise ValueError( - "Provide either 'scale' or 'r_cut' in the weighting." - ) - if "scale" in weighting and "r_cut" in weighting: - raise ValueError( - "Provide either 'scale' or 'r_cut', not both in the weighting." - ) - elif function == "inverse_square": - if "r_cut" not in weighting: - raise ValueError("Missing value for 'r_cut' in the weighting.") - elif function == "smooth_cutoff": - if "r_cut" not in weighting: - raise ValueError("Missing value for 'r_cut' in the weighting.") - - # Check that weighting function is specified for periodic systems - if periodic and k > 1: - valid = False - if weighting is not None: - function = weighting.get("function") - if function is not None: - if function != "unity": - valid = True - if not valid: - raise ValueError("Periodic systems need to have a weighting function.") +from dscribe.utils.species import get_atomic_numbers +import dscribe.ext class MBTR(DescriptorGlobal): @@ -147,29 +49,19 @@ def __init__( ): """ Args: - geometry (dict): Setup the geometry function. - For example:: + geometry (dict): Setup the geometry function. For example:: "geometry": {"function": "atomic_number"} - The geometry function determines the degree :math:`k` for MBTR. - The order :math:`k` tells how many atoms are involved in the - calculation and thus also heavily influence the computational - time. - The following geometry functions are available: - * :math:`k=1` - * ``"atomic_number"``: The atomic number. - * :math:`k=2` - * ``"distance"``: Pairwise distance in angstroms. - * ``"inverse_distance"``: Pairwise inverse distance in 1/angstrom. - * :math:`k=3` - * ``"angle"``: Angle in degrees. - * ``"cosine"``: Cosine of the angle. + * "atomic_number": The atomic number. + * "distance": Pairwise distance in angstroms. + * "inverse_distance": Pairwise inverse distance in 1/angstrom. + * "angle": Angle in degrees. + * "cosine": Cosine of the angle. - grid (dict): Setup the discretization grid. - For example:: + grid (dict): Setup the discretization grid. For example:: "grid": {"min": 0.1, "max": 2, "sigma": 0.1, "n": 50} @@ -185,17 +77,11 @@ def __init__( The following weighting functions are available: - * :math:`k=1` - * ``"unity"``: No weighting. - * :math:`k=2` - * ``"unity"``: No weighting. - * ``"exp"``: Weighting of the form :math:`e^{-sx}` - * ``"inverse_square"``: Weighting of the form :math:`1/(x^2)` - * :math:`k=3` - * ``"unity"``: No weighting. - * ``"exp"``: Weighting of the form :math:`e^{-sx}` - * ``"smooth_cutoff"``: Weighting of the form :math:`f_{ij}f_{ik}`, - where :math:`f = 1+y(x/r_{cut})^{y+1}-(y+1)(x/r_{cut})^{y}` + * "unity": No weighting. + * "exp": Weighting of the form :math:`e^{-sx}` + * "inverse_square": Weighting of the form :math:`1/(x^2)` + * "smooth_cutoff": Weighting of the form :math:`f_{ij}f_{ik}`, + where :math:`f = 1+y(x/r_{cut})^{y+1}-(y+1)(x/r_{cut})^{y}` The meaning of :math:`x` changes for different terms as follows: @@ -225,12 +111,12 @@ def __init__( normalization (str): Determines the method for normalizing the output. The available options are: - * ``"none"``: No normalization. - * ``"l2"``: Normalize the Euclidean length of the output to unity. - * ``"n_atoms"``: Normalize the output by dividing it with the number + * "none": No normalization. + * "l2": Normalize the Euclidean length of the output to unity. + * "n_atoms": Normalize the output by dividing it with the number of atoms in the system. If the system is periodic, the number of atoms is determined from the given unit cell. - * ``"valle_oganov"``: Use Valle-Oganov descriptor normalization, with + * "valle_oganov": Use Valle-Oganov descriptor normalization, with system cell volume and numbers of different atoms in the cell. species (iterable): The chemical species as a list of atomic numbers or as a list of chemical symbols. Notice that this is not @@ -250,102 +136,15 @@ def __init__( * ``"float64"``: Double precision floating point numbers. """ super().__init__(periodic=periodic, sparse=sparse, dtype=dtype) - self.system = None - self.geometry = geometry - self.grid = grid - self.weighting = weighting - self.species = species - self.normalization = normalization - self.normalize_gaussians = normalize_gaussians - - if self.normalization == "valle_oganov" and not periodic: - raise ValueError( - "Valle-Oganov normalization does not support non-periodic systems." - ) - - # Initializing .create() level variables - self._interaction_limit = None - - @property - def grid(self): - return self._grid - - @grid.setter - def grid(self, value): - check_grid(value) - self._grid = value - - @property - def geometry(self): - return self._geometry - - @geometry.setter - def geometry(self, value): - check_geometry(value) - k_map = { - "atomic_number": 1, - "distance": 2, - "inverse_distance": 2, - "angle": 3, - "cosine": 3, - } - self.k = k_map[value["function"]] - self._geometry = value - - @property - def weighting(self): - return self._weighting - - @weighting.setter - def weighting(self, value): - check_weighting(self.k, value, self.periodic) - self._weighting = value - - @property - def species(self): - return self._species - - @species.setter - def species(self, value): - """Used to check the validity of given atomic numbers and to initialize - the C-memory layout for them. - - Args: - value(iterable): Chemical species either as a list of atomic - numbers or list of chemical symbols. - """ - # The species are stored as atomic numbers for internal use. - self._set_species(value) - - # Setup mappings between atom indices and types together with some - # statistics - self.atomic_number_to_index = {} - self.index_to_atomic_number = {} - for i_atom, atomic_number in enumerate(self._atomic_numbers): - self.atomic_number_to_index[atomic_number] = i_atom - self.index_to_atomic_number[i_atom] = atomic_number - self.n_elements = len(self._atomic_numbers) - self.max_atomic_number = max(self._atomic_numbers) - self.min_atomic_number = min(self._atomic_numbers) - - @property - def normalization(self): - return self._normalization - - @normalization.setter - def normalization(self, value): - """Checks that the given normalization is valid. - - Args: - value(str): The normalization method to use. - """ - norm_options = set(("none", "l2", "n_atoms", "valle_oganov")) - if value not in norm_options: - raise ValueError( - "Unknown normalization option given. Please use one of the " - "following: {}.".format(", ".join(sorted(list(norm_options)))) - ) - self._normalization = value + self.wrapper = dscribe.ext.MBTR( + {} if geometry is None else geometry, + {} if grid is None else grid, + {} if weighting is None else weighting, + normalize_gaussians, + normalization, + get_atomic_numbers(species), + periodic, + ) def create(self, system, n_jobs=1, only_physical_cores=False, verbose=False): """Return MBTR output for the given systems. @@ -366,8 +165,8 @@ def create(self, system, n_jobs=1, only_physical_cores=False, verbose=False): into to the console. Returns: - np.ndarray | sparse.COO: MBTR for the given systems. The return type - depends on the 'sparse' attribute. + np.ndarray | sparse.COO MBTR for the + given systems. The return type depends on the 'sparse' attribute. """ # Combine input arguments system = [system] if isinstance(system, Atoms) else system @@ -392,32 +191,32 @@ def create_single(self, system): """Return the many-body tensor representation for the given system. Args: - system (:class:`ase.Atoms` | :class:`.System`): Input system. + system (:class:`ase.Atoms`): Input system. Returns: - np.ndarray | sparse.COO: A single concatenated output vector is - returned, either as a sparse or a dense vector. + np.ndarray | sparse.COO: The return type is + specified by the 'sparse'-parameter. """ - # Ensuring variables are re-initialized when a new system is introduced - self.system = system - self._interaction_limit = len(system) - - # Validate and normalize system - self.validate_positions(system.get_positions()) - self.validate_atomic_numbers(system.get_atomic_numbers()) - pbc = self.validate_pbc(system.get_pbc()) - self.validate_cell(system.get_cell(), pbc) + # Calculate with extension + output = np.zeros((self.get_number_of_features()), dtype=np.float64) + self.wrapper.create( + output, + system.get_positions(), + system.get_atomic_numbers(), + ase.geometry.cell.complete_cell(system.get_cell()), + np.asarray(system.get_pbc(), dtype=bool), + True + ) - mbtr, _ = getattr(self, f"_get_k{self.k}")(system, True, False) + # Convert to the final output precision. + if self.dtype != "float64": + output = output.astype(self.dtype) - # Handle normalization - if self.normalization == "l2": - mbtr /= np.linalg.norm(np.array(mbtr.data)) - elif self.normalization == "n_atoms": - n_atoms = len(system) - mbtr /= n_atoms + # Make into a sparse array if requested + if self._sparse: + output = sparse.COO.from_numpy(output) - return mbtr + return output def get_number_of_features(self): """Used to inquire the final number of features that this descriptor @@ -426,17 +225,59 @@ def get_number_of_features(self): Returns: int: Number of features for this descriptor. """ - n_elem = self.n_elements - n_grid = self.grid["n"] + return self.wrapper.get_number_of_features() + + @property + def geometry(self): + return self.wrapper.geometry + + @geometry.setter + def geometry(self, value): + self.wrapper.geometry = value + + @property + def grid(self): + return self.wrapper.grid + + @grid.setter + def grid(self, value): + self.wrapper.grid = value + + @property + def weighting(self): + return self.wrapper.weighting + + @weighting.setter + def weighting(self, value): + self.wrapper.weighting = value + + @property + def k(self): + return self.wrapper.k + + @property + def species(self): + return self.wrapper.species - if self.k == 1: - n_features = n_elem * n_grid - if self.k == 2: - n_features = (n_elem * (n_elem + 1) / 2) * n_grid - if self.k == 3: - n_features = (n_elem * n_elem * (n_elem + 1) / 2) * n_grid + @species.setter + def species(self, value): + self.wrapper.species = get_atomic_numbers(value) + + @property + def normalization(self): + return self.wrapper.normalization + + @normalization.setter + def normalization(self, value): + self.wrapper.normalization = value + + @property + def normalize_gaussians(self): + return self.wrapper.normalize_gaussians - return int(n_features) + @normalize_gaussians.setter + def normalize_gaussians(self, value): + self.wrapper.normalize_gaussians = value def get_location(self, species): """Can be used to query the location of a species combination in the @@ -458,10 +299,11 @@ def get_location(self, species): """ # Check that the corresponding part is calculated k = len(species) - if self.k is not k: + if k != self.k: + species_string = ", ".join([f"'{x}'" for x in species]) raise ValueError( - "Cannot retrieve the location for {}, as the term k={} has not " - "been specified.".format(species, k) + f"Cannot retrieve the location for ({species_string}), as the used" + + f" geometry function does not match the order k={k}." ) # Change chemical elements into atomic numbers @@ -474,480 +316,30 @@ def get_location(self, species): raise ValueError("Invalid chemical species: {}".format(specie)) numbers.append(specie) - # Check that species exists - self.validate_atomic_numbers(numbers) - - # Change into internal indexing - numbers = [self.atomic_number_to_index[x] for x in numbers] - n_elem = self.n_elements - - n = self.grid["n"] - if k == 1: - i = numbers[0] - m = i - start = int(m * n) - end = int((m + 1) * n) - + # k=1 + if len(numbers) == 1: + loc = self.wrapper.get_location(numbers[0]) + start = loc[0] + end = loc[1] # k=2 - if k == 2: - if numbers[0] > numbers[1]: - numbers = list(reversed(numbers)) - - i = numbers[0] - j = numbers[1] - - # This is the index of the spectrum. It is given by enumerating the - # elements of an upper triangular matrix from left to right and top - # to bottom. - m = j + i * n_elem - i * (i + 1) / 2 - start = int(m * n) - end = int((m + 1) * n) - + if len(numbers) == 2: + loc = self.wrapper.get_location(numbers[0], numbers[1]) + start = loc[0] + end = loc[1] # k=3 - if k == 3: - if numbers[0] > numbers[2]: - numbers = list(reversed(numbers)) - - i = numbers[0] - j = numbers[1] - k = numbers[2] - - # This is the index of the spectrum. It is given by enumerating the - # elements of a three-dimensional array where for valid elements - # k>=i. The enumeration begins from [0, 0, 0], and ends at [n_elem, - # n_elem, n_elem], looping the elements in the order k, i, j. - m = j * n_elem * (n_elem + 1) / 2 + k + i * n_elem - i * (i + 1) / 2 - start = int(m * n) - end = int((m + 1) * n) + if len(numbers) == 3: + loc = self.wrapper.get_location(numbers[0], numbers[1], numbers[2]) + start = loc[0] + end = loc[1] return slice(start, end) - def _get_k1(self, system, return_descriptor, return_derivatives): - """Calculates the first order term and/or its derivatives with - regard to atomic positions. - - Returns: - 1D or 3D ndarray: K1 values. Returns a 1D array. If - return_descriptor=False, returns an array of shape (0). - 3D ndarray: K1 derivatives. If return_derivatives=False, returns an - array of shape (0,0,0). - """ - start = self.grid["min"] - stop = self.grid["max"] - n = self.grid["n"] - sigma = self.grid["sigma"] - - n_elem = self.n_elements - n_features = n_elem * n - - if return_descriptor: - # Determine the geometry function - geom_func_name = self.geometry["function"] - - cmbtr = MBTRWrapper( - self.atomic_number_to_index, - self._interaction_limit, - np.zeros((len(system), 3), dtype=int), - ) - - k1 = np.zeros((n_features), dtype=np.float64) - cmbtr.get_k1( - k1, - system.get_atomic_numbers(), - geom_func_name.encode(), - b"unity", - {}, - start, - stop, - sigma, - n, - ) - else: - k1 = np.zeros((0), dtype=np.float64) - - if return_derivatives: - k1_d = np.zeros((self._interaction_limit, 3, n_features), dtype=np.float64) - else: - k1_d = np.zeros((0, 0, 0), dtype=np.float64) - - # Denormalize if requested - if not self.normalize_gaussians: - max_val = 1 / (sigma * math.sqrt(2 * math.pi)) - k1 /= max_val - k1_d /= max_val - - # Convert to the final output precision. - if self.dtype == "float32": - k1 = k1.astype(self.dtype) - k1_d = k1_d.astype(self.dtype) - - return (k1, k1_d) - - def _get_k2(self, system, return_descriptor, return_derivatives): - """Calculates the second order term and/or its derivatives with - regard to atomic positions. - Returns: - 1D ndarray: K2 values. Returns a 1D array. If - return_descriptor=False, returns an array of shape (0). - 3D ndarray: K2 derivatives. If return_derivatives=False, returns an - array of shape (0,0,0). - """ - start = self.grid["min"] - stop = self.grid["max"] - n = self.grid["n"] - sigma = self.grid["sigma"] - - # Determine the weighting function and possible radial cutoff - r_cut = None - parameters = {} - if self.weighting is not None: - weighting_function = self.weighting["function"] - if weighting_function == "exp": - threshold = self.weighting["threshold"] - r_cut = self.weighting.get("r_cut") - scale = self.weighting.get("scale") - if scale is not None and r_cut is None: - r_cut = -math.log(threshold) / scale - elif scale is None and r_cut is not None: - scale = -math.log(threshold) / r_cut - parameters = {b"scale": scale, b"threshold": threshold} - elif weighting_function == "inverse_square": - r_cut = self.weighting["r_cut"] - else: - weighting_function = "unity" - - # Determine the geometry function - geom_func_name = self.geometry["function"] - - # If needed, create the extended system - if self.periodic: - centers = system.get_positions() - ext_system, cell_indices = dscribe.utils.geometry.get_extended_system( - system, r_cut, centers, return_cell_indices=True - ) - ext_system = System.from_atoms(ext_system) - else: - ext_system = System.from_atoms(system) - cell_indices = np.zeros((len(system), 3), dtype=int) - - cmbtr = MBTRWrapper( - self.atomic_number_to_index, self._interaction_limit, cell_indices - ) - - # If radial cutoff is finite, use it to calculate the sparse - # distance matrix to reduce computational complexity from O(n^2) to - # O(n log(n)) - n_atoms = len(ext_system) - if r_cut is not None: - dmat = ext_system.get_distance_matrix_within_radius(r_cut) - adj_list = dscribe.utils.geometry.get_adjacency_list(dmat) - dmat_dense = np.full( - (n_atoms, n_atoms), sys.float_info.max - ) # The non-neighbor values are treated as "infinitely far". - dmat_dense[dmat.row, dmat.col] = dmat.data - # If no weighting is used, the full distance matrix is calculated - else: - dmat_dense = ext_system.get_distance_matrix() - adj_list = np.tile(np.arange(n_atoms), (n_atoms, 1)) - - n_elem = self.n_elements - n_features = int((n_elem * (n_elem + 1) / 2) * n) - - if return_descriptor: - k2 = np.zeros((n_features), dtype=np.float64) - else: - k2 = np.zeros((0), dtype=np.float64) - - if return_derivatives: - k2_d = np.zeros((self._interaction_limit, 3, n_features), dtype=np.float64) - else: - k2_d = np.zeros((0, 0, 0), dtype=np.float64) - - # Generate derivatives for k=2 term - cmbtr.get_k2( - k2, - k2_d, - return_descriptor, - return_derivatives, - ext_system.get_atomic_numbers(), - ext_system.get_positions(), - dmat_dense, - adj_list, - geom_func_name.encode(), - weighting_function.encode(), - parameters, - start, - stop, - sigma, - n, - ) - - # Denormalize if requested - if not self.normalize_gaussians: - max_val = 1 / (sigma * math.sqrt(2 * math.pi)) - k2 /= max_val - k2_d /= max_val - - # Valle-Oganov normalization is calculated separately for each pair. - # Not implemented for derivatives. - if self.normalization == "valle_oganov": - volume = self.system.cell.volume - # Calculate the amount of each element for N_A*N_B term - values, counts = np.unique( - self.system.get_atomic_numbers(), return_counts=True - ) - counts = dict(zip(values, counts)) - for i_z in values: - for j_z in values: - i = self.atomic_number_to_index[i_z] - j = self.atomic_number_to_index[j_z] - if j < i: - continue - if i == j: - count_product = 0.5 * counts[i_z] * counts[j_z] - else: - count_product = counts[i_z] * counts[j_z] - - # This is the index of the spectrum. It is given by enumerating the - # elements of an upper triangular matrix from left to right and top - # to bottom. - m = int(j + i * n_elem - i * (i + 1) / 2) - start = m * n - end = (m + 1) * n - norm_factor = volume / (count_product * 4 * np.pi) - - k2[start:end] *= norm_factor - k2_d[:, :, start:end] *= norm_factor - - # Convert to the final output precision. - if self.dtype == "float32": - k2 = k2.astype(self.dtype) - k2_d = k2_d.astype(self.dtype) - - return (k2, k2_d) - - def _get_k3(self, system, return_descriptor, return_derivatives): - """Calculates the third order term and/or its derivatives with - regard to atomic positions. - Returns: - 1D ndarray: K3 values. Returns a 1D array. If - return_descriptor=False, returns an array of shape (0). - 3D ndarray: K3 derivatives. If return_derivatives=False, returns an - array of shape (0,0,0). - """ - start = self.grid["min"] - stop = self.grid["max"] - n = self.grid["n"] - sigma = self.grid["sigma"] - - # Determine the weighting function and possible radial cutoff - r_cut = None - parameters = {} - if self.weighting is not None: - weighting_function = self.weighting["function"] - if weighting_function == "exp": - threshold = self.weighting["threshold"] - r_cut = self.weighting.get("r_cut") - scale = self.weighting.get("scale") - # If we want to limit the triplets to a distance r_cut, we need - # to allow x=2*r_cut in the case of k=3. - if scale is not None and r_cut is None: - r_cut = -0.5 * math.log(threshold) / scale - elif scale is None and r_cut is not None: - scale = -0.5 * math.log(threshold) / r_cut - parameters = {b"scale": scale, b"threshold": threshold} - if weighting_function == "smooth_cutoff": - try: - sharpness = self.weighting["sharpness"] - except Exception: - sharpness = 2 - parameters = { - b"sharpness": sharpness, - b"cutoff": self.weighting["r_cut"], - } - # Evaluating smooth-cutoff weighting values requires distances - # between two neighbours of an atom, and the maximum distance - # between them is twice the cutoff radius. To include the - # neighbour-to-neighbour distances in the distance matrix, the - # neighbour list is generated with double radius. - r_cut = 2 * self.weighting["r_cut"] - else: - weighting_function = "unity" - - # Determine the geometry function - geom_func_name = self.geometry["function"] - - # If needed, create the extended system - if self.periodic: - centers = system.get_positions() - ext_system, cell_indices = dscribe.utils.geometry.get_extended_system( - system, r_cut, centers, return_cell_indices=True - ) - ext_system = System.from_atoms(ext_system) - else: - ext_system = System.from_atoms(system) - cell_indices = np.zeros((len(system), 3), dtype=int) - - cmbtr = MBTRWrapper( - self.atomic_number_to_index, self._interaction_limit, cell_indices - ) - - n_atoms = len(ext_system) - if r_cut is not None: - dmat = ext_system.get_distance_matrix_within_radius(r_cut) - adj_list = dscribe.utils.geometry.get_adjacency_list(dmat) - dmat_dense = np.full( - (n_atoms, n_atoms), sys.float_info.max - ) # The non-neighbor values are treated as "infinitely far". - dmat_dense[dmat.col, dmat.row] = dmat.data - # If no weighting is used, the full distance matrix is calculated - else: - dmat_dense = ext_system.get_distance_matrix() - adj_list = np.tile(np.arange(n_atoms), (n_atoms, 1)) - - n_elem = self.n_elements - n_features = int((n_elem * n_elem * (n_elem + 1) / 2) * n) - - if return_descriptor: - k3 = np.zeros((n_features), dtype=np.float64) - else: - k3 = np.zeros((0), dtype=np.float64) - - if return_derivatives: - k3_d = np.zeros((self._interaction_limit, 3, n_features), dtype=np.float64) - else: - k3_d = np.zeros((0, 0, 0), dtype=np.float64) - - # Compute the k=3 term and its derivative - cmbtr.get_k3( - k3, - k3_d, - return_descriptor, - return_derivatives, - ext_system.get_atomic_numbers(), - ext_system.get_positions(), - dmat_dense, - adj_list, - geom_func_name.encode(), - weighting_function.encode(), - parameters, - start, - stop, - sigma, - n, - ) - - # Denormalize if requested - if not self.normalize_gaussians: - max_val = 1 / (sigma * math.sqrt(2 * math.pi)) - k3 /= max_val - k3_d /= max_val - - # Valle-Oganov normalization is calculated separately for each triplet - # Not implemented for derivatives. - if self.normalization == "valle_oganov": - volume = self.system.cell.volume - # Calculate the amount of each element for N_A*N_B*N_C term - values, counts = np.unique( - self.system.get_atomic_numbers(), return_counts=True - ) - counts = dict(zip(values, counts)) - for i_z in values: - for j_z in values: - for k_z in values: - i = self.atomic_number_to_index[i_z] - j = self.atomic_number_to_index[j_z] - k = self.atomic_number_to_index[k_z] - if k < i: - continue - # This is the index of the spectrum. It is given by enumerating the - # elements of a three-dimensional array where for valid elements - # k>=i. The enumeration begins from [0, 0, 0], and ends at [n_elem, - # n_elem, n_elem], looping the elements in the order j, i, k. - m = int( - j * n_elem * (n_elem + 1) / 2 - + k - + i * n_elem - - i * (i + 1) / 2 - ) - start = m * n - end = (m + 1) * n - count_product = counts[i_z] * counts[j_z] * counts[k_z] - norm_factor = volume / count_product - - k3[start:end] *= norm_factor - k3_d[:, :, start:end] *= norm_factor - - # Convert to the final output precision. - if self.dtype == "float32": - k3 = k3.astype(self.dtype) - k3_d = k3_d.astype(self.dtype) - - return (k3, k3_d) - - def validate_derivatives_method(self, method): - """Used to validate and determine the final method for calculating the - derivatives. - """ - methods = {"numerical", "analytical", "auto"} + def get_derivatives_method(self, method): + methods = {"numerical", "auto"} if method not in methods: raise ValueError( "Invalid method specified. Please choose from: {}".format(methods) ) - - if method == "numerical": - return method - - # Check if analytical derivatives can be used - try: - supported_normalization = ["none", "n_atoms", "valle_oganov"] - if self.normalization not in supported_normalization: - raise ValueError( - "Analytical derivatives not implemented for normalization option '{}'. Please choose from: {}".format( - self.normalization, supported_normalization - ) - ) - # Derivatives are not currently implemented for all k3 options - if self.k == 3: - # "angle" function is not differentiable - if self.geometry["function"] == "angle": - raise ValueError( - "Analytical derivatives not implemented for k3 geometry function 'angle'." - ) - except Exception as e: - if method == "analytical": - raise e - elif method == "auto": - method = "numerical" - else: - if method == "auto": - method = "analytical" - - return method - - def derivatives_analytical(self, d, c, system, indices, return_descriptor): - # Ensuring variables are re-initialized when a new system is introduced - self.system = system - self._interaction_limit = len(system) - - # Check that the system does not have elements that are not in the list - # of atomic numbers - self.validate_atomic_numbers(system.get_atomic_numbers()) - - mbtr, mbtr_d = getattr(self, f"_get_k{self.k}")(system, return_descriptor, True) - - # Handle normalization - if self.normalization == "n_atoms": - n_atoms = len(self.system) - mbtr /= n_atoms - mbtr_d /= n_atoms - - # For now, the derivatives are calculated with regard to all atomic - # positions. The desired indices are extracted here at the end. - i = 0 - for index in indices: - d[i, :] = mbtr_d[index, :, :] - i += 1 - - if return_descriptor: - np.copyto(c, mbtr) + if method == "auto": + method = "numerical" + return method \ No newline at end of file diff --git a/dscribe/descriptors/mbtr_old.py b/dscribe/descriptors/mbtr_old.py new file mode 100644 index 00000000..998418c8 --- /dev/null +++ b/dscribe/descriptors/mbtr_old.py @@ -0,0 +1,953 @@ +# -*- coding: utf-8 -*- +"""Copyright 2019 DScribe developers + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import sys +import math +import numpy as np + +from ase import Atoms +import ase.data + +from dscribe.core import System +from dscribe.descriptors.descriptorglobal import DescriptorGlobal +from dscribe.ext import MBTRWrapper +import dscribe.utils.geometry + + +k1_geometry_functions = set(["atomic_number"]) +k2_geometry_functions = set(["distance", "inverse_distance"]) +k3_geometry_functions = set(["angle", "cosine"]) + + +def check_grid(grid: dict): + """Used to ensure that the given grid settings are valid. + + Args: + grid(dict): Dictionary containing the grid setup. + """ + msg = "The grid information is missing the value for {}" + val_names = ["min", "max", "sigma", "n"] + for val_name in val_names: + try: + grid[val_name] + except Exception: + raise KeyError(msg.format(val_name)) + + # Make the n into integer + grid["n"] = int(grid["n"]) + if grid["min"] >= grid["max"]: + raise ValueError("The min value should be smaller than the max value.") + + +def check_geometry(geometry: dict): + """Used to ensure that the given geometry settings are valid. + + Args: + geometry: Dictionary containing the geometry setup. + """ + + if "function" in geometry: + function = geometry["function"] + valid_functions = ( + k1_geometry_functions | k2_geometry_functions | k3_geometry_functions + ) + if function not in valid_functions: + raise ValueError( + f"Unknown geometry function. Please use one of the following: {sorted(list(valid_functions))}" + ) + else: + raise ValueError("Please specify a geometry function.") + + +def check_weighting(k: int, weighting: dict, periodic: bool): + """Used to ensure that the given weighting settings are valid. + + Args: + k: The MBTR degree. + weighting: Dictionary containing the weighting setup. + periodic: Whether the descriptor is periodic or not. + """ + if weighting is not None: + if k == 1: + valid_functions = set(["unity"]) + elif k == 2: + valid_functions = set(["unity", "exp", "inverse_square"]) + elif k == 3: + valid_functions = set(["unity", "exp", "smooth_cutoff"]) + function = weighting.get("function") + if function not in valid_functions: + raise ValueError( + f"Unknown weighting function specified for k={k}. Please use one of the following: {sorted(list(valid_functions))}" + ) + else: + if function == "exp": + if "threshold" not in weighting: + raise ValueError("Missing value for 'threshold' in the weighting.") + if "scale" not in weighting and "r_cut" not in weighting: + raise ValueError( + "Provide either 'scale' or 'r_cut' in the weighting." + ) + if "scale" in weighting and "r_cut" in weighting: + raise ValueError( + "Provide either 'scale' or 'r_cut', not both in the weighting." + ) + elif function == "inverse_square": + if "r_cut" not in weighting: + raise ValueError("Missing value for 'r_cut' in the weighting.") + elif function == "smooth_cutoff": + if "r_cut" not in weighting: + raise ValueError("Missing value for 'r_cut' in the weighting.") + + # Check that weighting function is specified for periodic systems + if periodic and k > 1: + valid = False + if weighting is not None: + function = weighting.get("function") + if function is not None: + if function != "unity": + valid = True + if not valid: + raise ValueError("Periodic systems need to have a weighting function.") + + +class MBTR(DescriptorGlobal): + """Implementation of the Many-body tensor representation. + + You can use this descriptor for finite and periodic systems. When dealing + with periodic systems or when using machine learning models that use the + Euclidean norm to measure distance between vectors, it is advisable to use + some form of normalization. This implementation does not support the use of + a non-identity correlation matrix. + """ + + def __init__( + self, + geometry=None, + grid=None, + weighting=None, + normalize_gaussians=True, + normalization="none", + species=None, + periodic=False, + sparse=False, + dtype="float64", + ): + """ + Args: + geometry (dict): Setup the geometry function. + For example:: + + "geometry": {"function": "atomic_number"} + + The geometry function determines the degree :math:`k` for MBTR. + The order :math:`k` tells how many atoms are involved in the + calculation and thus also heavily influence the computational + time. + + The following geometry functions are available: + + * :math:`k=1` + * ``"atomic_number"``: The atomic number. + * :math:`k=2` + * ``"distance"``: Pairwise distance in angstroms. + * ``"inverse_distance"``: Pairwise inverse distance in 1/angstrom. + * :math:`k=3` + * ``"angle"``: Angle in degrees. + * ``"cosine"``: Cosine of the angle. + + grid (dict): Setup the discretization grid. + For example:: + + "grid": {"min": 0.1, "max": 2, "sigma": 0.1, "n": 50} + + In the grid setup *min* is the minimum value of the axis, *max* + is the maximum value of the axis, *sigma* is the standard + deviation of the gaussian broadening and *n* is the number of + points sampled on the grid. + + weighting (dict): Setup the weighting function and its parameters. + For example:: + + "weighting" : {"function": "exp", "r_cut": 10, "threshold": 1e-3} + + The following weighting functions are available: + + * :math:`k=1` + * ``"unity"``: No weighting. + * :math:`k=2` + * ``"unity"``: No weighting. + * ``"exp"``: Weighting of the form :math:`e^{-sx}` + * ``"inverse_square"``: Weighting of the form :math:`1/(x^2)` + * :math:`k=3` + * ``"unity"``: No weighting. + * ``"exp"``: Weighting of the form :math:`e^{-sx}` + * ``"smooth_cutoff"``: Weighting of the form :math:`f_{ij}f_{ik}`, + where :math:`f = 1+y(x/r_{cut})^{y+1}-(y+1)(x/r_{cut})^{y}` + + The meaning of :math:`x` changes for different terms as follows: + + * For :math:`k=2`: :math:`x` = Distance between A->B + * For :math:`k=3`: :math:`x` = Distance from A->B->C->A. + + The exponential weighting is motivated by the exponential decay + of screened Coulombic interactions in solids. In the exponential + weighting the parameters **threshold** determines the value of + the weighting function after which the rest of the terms will be + ignored. Either the parameter **scale** or **r_cut** can be used + to determine the parameter :math:`s`: **scale** directly + corresponds to this value whereas **r_cut** can be used to + indirectly determine it through :math:`s=-\log()`:. + + The inverse square and smooth cutoff function weightings use a + cutoff parameter **r_cut**, which is a radial distance after + which the rest of the atoms will be ignored. For the smooth + cutoff function, additional weighting key **sharpness** can be + added, which changes the value of :math:`y`. If a value for it + is not provided, it defaults to `2`. + + normalize_gaussians (bool): Determines whether the gaussians are + normalized to an area of 1. Defaults to True. If False, the + normalization factor is dropped and the gaussians have the form. + :math:`e^{-(x-\mu)^2/2\sigma^2}` + normalization (str): Determines the method for normalizing the + output. The available options are: + + * ``"none"``: No normalization. + * ``"l2"``: Normalize the Euclidean length of the output to unity. + * ``"n_atoms"``: Normalize the output by dividing it with the number + of atoms in the system. If the system is periodic, the number + of atoms is determined from the given unit cell. + * ``"valle_oganov"``: Use Valle-Oganov descriptor normalization, with + system cell volume and numbers of different atoms in the cell. + species (iterable): The chemical species as a list of atomic + numbers or as a list of chemical symbols. Notice that this is not + the atomic numbers that are present for an individual system, but + should contain all the elements that are ever going to be + encountered when creating the descriptors for a set of systems. + Keeping the number of chemical speices as low as possible is + preferable. + periodic (bool): Set to true if you want the descriptor output to + respect the periodicity of the atomic systems (see the + pbc-parameter in the constructor of ase.Atoms). + sparse (bool): Whether the output should be a sparse matrix or a + dense numpy array. + dtype (str): The data type of the output. Valid options are: + + * ``"float32"``: Single precision floating point numbers. + * ``"float64"``: Double precision floating point numbers. + """ + super().__init__(periodic=periodic, sparse=sparse, dtype=dtype) + self.system = None + self.geometry = geometry + self.grid = grid + self.weighting = weighting + self.species = species + self.normalization = normalization + self.normalize_gaussians = normalize_gaussians + + if self.normalization == "valle_oganov" and not periodic: + raise ValueError( + "Valle-Oganov normalization does not support non-periodic systems." + ) + + # Initializing .create() level variables + self._interaction_limit = None + + @property + def grid(self): + return self._grid + + @grid.setter + def grid(self, value): + check_grid(value) + self._grid = value + + @property + def geometry(self): + return self._geometry + + @geometry.setter + def geometry(self, value): + check_geometry(value) + k_map = { + "atomic_number": 1, + "distance": 2, + "inverse_distance": 2, + "angle": 3, + "cosine": 3, + } + self.k = k_map[value["function"]] + self._geometry = value + + @property + def weighting(self): + return self._weighting + + @weighting.setter + def weighting(self, value): + check_weighting(self.k, value, self.periodic) + self._weighting = value + + @property + def species(self): + return self._species + + @species.setter + def species(self, value): + """Used to check the validity of given atomic numbers and to initialize + the C-memory layout for them. + + Args: + value(iterable): Chemical species either as a list of atomic + numbers or list of chemical symbols. + """ + # The species are stored as atomic numbers for internal use. + self._set_species(value) + + # Setup mappings between atom indices and types together with some + # statistics + self.atomic_number_to_index = {} + self.index_to_atomic_number = {} + for i_atom, atomic_number in enumerate(self._atomic_numbers): + self.atomic_number_to_index[atomic_number] = i_atom + self.index_to_atomic_number[i_atom] = atomic_number + self.n_elements = len(self._atomic_numbers) + self.max_atomic_number = max(self._atomic_numbers) + self.min_atomic_number = min(self._atomic_numbers) + + @property + def normalization(self): + return self._normalization + + @normalization.setter + def normalization(self, value): + """Checks that the given normalization is valid. + + Args: + value(str): The normalization method to use. + """ + norm_options = set(("none", "l2", "n_atoms", "valle_oganov")) + if value not in norm_options: + raise ValueError( + "Unknown normalization option given. Please use one of the " + "following: {}.".format(", ".join(sorted(list(norm_options)))) + ) + self._normalization = value + + def create(self, system, n_jobs=1, only_physical_cores=False, verbose=False): + """Return MBTR output for the given systems. + + Args: + system (:class:`ase.Atoms` or list of :class:`ase.Atoms`): One or many atomic structures. + n_jobs (int): Number of parallel jobs to instantiate. Parallellizes + the calculation across samples. Defaults to serial calculation + with n_jobs=1. If a negative number is given, the used cpus + will be calculated with, n_cpus + n_jobs, where n_cpus is the + amount of CPUs as reported by the OS. With only_physical_cores + you can control which types of CPUs are counted in n_cpus. + only_physical_cores (bool): If a negative n_jobs is given, + determines which types of CPUs are used in calculating the + number of jobs. If set to False (default), also virtual CPUs + are counted. If set to True, only physical CPUs are counted. + verbose(bool): Controls whether to print the progress of each job + into to the console. + + Returns: + np.ndarray | sparse.COO: MBTR for the given systems. The return type + depends on the 'sparse' attribute. + """ + # Combine input arguments + system = [system] if isinstance(system, Atoms) else system + inp = [(i_sys,) for i_sys in system] + + # Determine if the outputs have a fixed size + static_size = [self.get_number_of_features()] + + # Create in parallel + output = self.create_parallel( + inp, + self.create_single, + n_jobs, + static_size, + only_physical_cores, + verbose=verbose, + ) + + return output + + def create_single(self, system): + """Return the many-body tensor representation for the given system. + + Args: + system (:class:`ase.Atoms` | :class:`.System`): Input system. + + Returns: + np.ndarray | sparse.COO: A single concatenated output vector is + returned, either as a sparse or a dense vector. + """ + # Ensuring variables are re-initialized when a new system is introduced + self.system = system + self._interaction_limit = len(system) + + # Validate and normalize system + self.validate_positions(system.get_positions()) + self.validate_atomic_numbers(system.get_atomic_numbers()) + pbc = self.validate_pbc(system.get_pbc()) + self.validate_cell(system.get_cell(), pbc) + + mbtr, _ = getattr(self, f"_get_k{self.k}")(system, True, False) + + # Handle normalization + if self.normalization == "l2": + mbtr /= np.linalg.norm(np.array(mbtr.data)) + elif self.normalization == "n_atoms": + n_atoms = len(system) + mbtr /= n_atoms + + return mbtr + + def get_number_of_features(self): + """Used to inquire the final number of features that this descriptor + will have. + + Returns: + int: Number of features for this descriptor. + """ + n_elem = self.n_elements + n_grid = self.grid["n"] + + if self.k == 1: + n_features = n_elem * n_grid + if self.k == 2: + n_features = (n_elem * (n_elem + 1) / 2) * n_grid + if self.k == 3: + n_features = (n_elem * n_elem * (n_elem + 1) / 2) * n_grid + + return int(n_features) + + def get_location(self, species): + """Can be used to query the location of a species combination in the + the output. + + Args: + species(tuple): A tuple containing a species combination as + chemical symbols or atomic numbers. The tuple can be for example + ("H"), ("H", "O") or ("H", "O", "H"). + + Returns: + slice: slice containing the location of the specified species + combination. The location is given as a python slice-object, that + can be directly used to target ranges in the output. + + Raises: + ValueError: If the requested species combination is not in the + output or if invalid species defined. + """ + # Check that the corresponding part is calculated + k = len(species) + if self.k is not k: + raise ValueError( + "Cannot retrieve the location for {}, as the term k={} has not " + "been specified.".format(species, k) + ) + + # Change chemical elements into atomic numbers + numbers = [] + for specie in species: + if isinstance(specie, str): + try: + specie = ase.data.atomic_numbers[specie] + except KeyError: + raise ValueError("Invalid chemical species: {}".format(specie)) + numbers.append(specie) + + # Check that species exists + self.validate_atomic_numbers(numbers) + + # Change into internal indexing + numbers = [self.atomic_number_to_index[x] for x in numbers] + n_elem = self.n_elements + + n = self.grid["n"] + if k == 1: + i = numbers[0] + m = i + start = int(m * n) + end = int((m + 1) * n) + + # k=2 + if k == 2: + if numbers[0] > numbers[1]: + numbers = list(reversed(numbers)) + + i = numbers[0] + j = numbers[1] + + # This is the index of the spectrum. It is given by enumerating the + # elements of an upper triangular matrix from left to right and top + # to bottom. + m = j + i * n_elem - i * (i + 1) / 2 + start = int(m * n) + end = int((m + 1) * n) + + # k=3 + if k == 3: + if numbers[0] > numbers[2]: + numbers = list(reversed(numbers)) + + i = numbers[0] + j = numbers[1] + k = numbers[2] + + # This is the index of the spectrum. It is given by enumerating the + # elements of a three-dimensional array where for valid elements + # k>=i. The enumeration begins from [0, 0, 0], and ends at [n_elem, + # n_elem, n_elem], looping the elements in the order k, i, j. + m = j * n_elem * (n_elem + 1) / 2 + k + i * n_elem - i * (i + 1) / 2 + start = int(m * n) + end = int((m + 1) * n) + + return slice(start, end) + + def _get_k1(self, system, return_descriptor, return_derivatives): + """Calculates the first order term and/or its derivatives with + regard to atomic positions. + + Returns: + 1D or 3D ndarray: K1 values. Returns a 1D array. If + return_descriptor=False, returns an array of shape (0). + 3D ndarray: K1 derivatives. If return_derivatives=False, returns an + array of shape (0,0,0). + """ + start = self.grid["min"] + stop = self.grid["max"] + n = self.grid["n"] + sigma = self.grid["sigma"] + + n_elem = self.n_elements + n_features = n_elem * n + + if return_descriptor: + # Determine the geometry function + geom_func_name = self.geometry["function"] + + cmbtr = MBTRWrapper( + self.atomic_number_to_index, + self._interaction_limit, + np.zeros((len(system), 3), dtype=int), + ) + + k1 = np.zeros((n_features), dtype=np.float64) + cmbtr.get_k1( + k1, + system.get_atomic_numbers(), + geom_func_name.encode(), + b"unity", + {}, + start, + stop, + sigma, + n, + ) + else: + k1 = np.zeros((0), dtype=np.float64) + + if return_derivatives: + k1_d = np.zeros((self._interaction_limit, 3, n_features), dtype=np.float64) + else: + k1_d = np.zeros((0, 0, 0), dtype=np.float64) + + # Denormalize if requested + if not self.normalize_gaussians: + max_val = 1 / (sigma * math.sqrt(2 * math.pi)) + k1 /= max_val + k1_d /= max_val + + # Convert to the final output precision. + if self.dtype == "float32": + k1 = k1.astype(self.dtype) + k1_d = k1_d.astype(self.dtype) + + return (k1, k1_d) + + def _get_k2(self, system, return_descriptor, return_derivatives): + """Calculates the second order term and/or its derivatives with + regard to atomic positions. + Returns: + 1D ndarray: K2 values. Returns a 1D array. If + return_descriptor=False, returns an array of shape (0). + 3D ndarray: K2 derivatives. If return_derivatives=False, returns an + array of shape (0,0,0). + """ + start = self.grid["min"] + stop = self.grid["max"] + n = self.grid["n"] + sigma = self.grid["sigma"] + + # Determine the weighting function and possible radial cutoff + r_cut = None + parameters = {} + if self.weighting is not None: + weighting_function = self.weighting["function"] + if weighting_function == "exp": + threshold = self.weighting["threshold"] + r_cut = self.weighting.get("r_cut") + scale = self.weighting.get("scale") + if scale is not None and r_cut is None: + r_cut = -math.log(threshold) / scale + elif scale is None and r_cut is not None: + scale = -math.log(threshold) / r_cut + parameters = {b"scale": scale, b"threshold": threshold} + elif weighting_function == "inverse_square": + r_cut = self.weighting["r_cut"] + else: + weighting_function = "unity" + + # Determine the geometry function + geom_func_name = self.geometry["function"] + + # If needed, create the extended system + if self.periodic: + centers = system.get_positions() + ext_system, cell_indices = dscribe.utils.geometry.get_extended_system( + system, r_cut, centers, return_cell_indices=True + ) + ext_system = System.from_atoms(ext_system) + else: + ext_system = System.from_atoms(system) + cell_indices = np.zeros((len(system), 3), dtype=int) + + cmbtr = MBTRWrapper( + self.atomic_number_to_index, self._interaction_limit, cell_indices + ) + + # If radial cutoff is finite, use it to calculate the sparse + # distance matrix to reduce computational complexity from O(n^2) to + # O(n log(n)) + n_atoms = len(ext_system) + if r_cut is not None: + dmat = ext_system.get_distance_matrix_within_radius(r_cut) + adj_list = dscribe.utils.geometry.get_adjacency_list(dmat) + dmat_dense = np.full( + (n_atoms, n_atoms), sys.float_info.max + ) # The non-neighbor values are treated as "infinitely far". + dmat_dense[dmat.row, dmat.col] = dmat.data + # If no weighting is used, the full distance matrix is calculated + else: + dmat_dense = ext_system.get_distance_matrix() + adj_list = np.tile(np.arange(n_atoms), (n_atoms, 1)) + + n_elem = self.n_elements + n_features = int((n_elem * (n_elem + 1) / 2) * n) + + if return_descriptor: + k2 = np.zeros((n_features), dtype=np.float64) + else: + k2 = np.zeros((0), dtype=np.float64) + + if return_derivatives: + k2_d = np.zeros((self._interaction_limit, 3, n_features), dtype=np.float64) + else: + k2_d = np.zeros((0, 0, 0), dtype=np.float64) + + # Generate derivatives for k=2 term + cmbtr.get_k2( + k2, + k2_d, + return_descriptor, + return_derivatives, + ext_system.get_atomic_numbers(), + ext_system.get_positions(), + dmat_dense, + adj_list, + geom_func_name.encode(), + weighting_function.encode(), + parameters, + start, + stop, + sigma, + n, + ) + + # Denormalize if requested + if not self.normalize_gaussians: + max_val = 1 / (sigma * math.sqrt(2 * math.pi)) + k2 /= max_val + k2_d /= max_val + + # Valle-Oganov normalization is calculated separately for each pair. + # Not implemented for derivatives. + if self.normalization == "valle_oganov": + volume = self.system.cell.volume + # Calculate the amount of each element for N_A*N_B term + values, counts = np.unique( + self.system.get_atomic_numbers(), return_counts=True + ) + counts = dict(zip(values, counts)) + for i_z in values: + for j_z in values: + i = self.atomic_number_to_index[i_z] + j = self.atomic_number_to_index[j_z] + if j < i: + continue + if i == j: + count_product = 0.5 * counts[i_z] * counts[j_z] + else: + count_product = counts[i_z] * counts[j_z] + + # This is the index of the spectrum. It is given by enumerating the + # elements of an upper triangular matrix from left to right and top + # to bottom. + m = int(j + i * n_elem - i * (i + 1) / 2) + start = m * n + end = (m + 1) * n + norm_factor = volume / (count_product * 4 * np.pi) + + k2[start:end] *= norm_factor + k2_d[:, :, start:end] *= norm_factor + + # Convert to the final output precision. + if self.dtype == "float32": + k2 = k2.astype(self.dtype) + k2_d = k2_d.astype(self.dtype) + + return (k2, k2_d) + + def _get_k3(self, system, return_descriptor, return_derivatives): + """Calculates the third order term and/or its derivatives with + regard to atomic positions. + Returns: + 1D ndarray: K3 values. Returns a 1D array. If + return_descriptor=False, returns an array of shape (0). + 3D ndarray: K3 derivatives. If return_derivatives=False, returns an + array of shape (0,0,0). + """ + start = self.grid["min"] + stop = self.grid["max"] + n = self.grid["n"] + sigma = self.grid["sigma"] + + # Determine the weighting function and possible radial cutoff + r_cut = None + parameters = {} + if self.weighting is not None: + weighting_function = self.weighting["function"] + if weighting_function == "exp": + threshold = self.weighting["threshold"] + r_cut = self.weighting.get("r_cut") + scale = self.weighting.get("scale") + # If we want to limit the triplets to a distance r_cut, we need + # to allow x=2*r_cut in the case of k=3. + if scale is not None and r_cut is None: + r_cut = -0.5 * math.log(threshold) / scale + elif scale is None and r_cut is not None: + scale = -0.5 * math.log(threshold) / r_cut + parameters = {b"scale": scale, b"threshold": threshold} + if weighting_function == "smooth_cutoff": + try: + sharpness = self.weighting["sharpness"] + except Exception: + sharpness = 2 + parameters = { + b"sharpness": sharpness, + b"cutoff": self.weighting["r_cut"], + } + # Evaluating smooth-cutoff weighting values requires distances + # between two neighbours of an atom, and the maximum distance + # between them is twice the cutoff radius. To include the + # neighbour-to-neighbour distances in the distance matrix, the + # neighbour list is generated with double radius. + r_cut = 2 * self.weighting["r_cut"] + else: + weighting_function = "unity" + + # Determine the geometry function + geom_func_name = self.geometry["function"] + + # If needed, create the extended system + if self.periodic: + centers = system.get_positions() + ext_system, cell_indices = dscribe.utils.geometry.get_extended_system( + system, r_cut, centers, return_cell_indices=True + ) + ext_system = System.from_atoms(ext_system) + else: + ext_system = System.from_atoms(system) + cell_indices = np.zeros((len(system), 3), dtype=int) + + cmbtr = MBTRWrapper( + self.atomic_number_to_index, self._interaction_limit, cell_indices + ) + + n_atoms = len(ext_system) + if r_cut is not None: + dmat = ext_system.get_distance_matrix_within_radius(r_cut) + adj_list = dscribe.utils.geometry.get_adjacency_list(dmat) + dmat_dense = np.full( + (n_atoms, n_atoms), sys.float_info.max + ) # The non-neighbor values are treated as "infinitely far". + dmat_dense[dmat.col, dmat.row] = dmat.data + # If no weighting is used, the full distance matrix is calculated + else: + dmat_dense = ext_system.get_distance_matrix() + adj_list = np.tile(np.arange(n_atoms), (n_atoms, 1)) + + n_elem = self.n_elements + n_features = int((n_elem * n_elem * (n_elem + 1) / 2) * n) + + if return_descriptor: + k3 = np.zeros((n_features), dtype=np.float64) + else: + k3 = np.zeros((0), dtype=np.float64) + + if return_derivatives: + k3_d = np.zeros((self._interaction_limit, 3, n_features), dtype=np.float64) + else: + k3_d = np.zeros((0, 0, 0), dtype=np.float64) + + # Compute the k=3 term and its derivative + cmbtr.get_k3( + k3, + k3_d, + return_descriptor, + return_derivatives, + ext_system.get_atomic_numbers(), + ext_system.get_positions(), + dmat_dense, + adj_list, + geom_func_name.encode(), + weighting_function.encode(), + parameters, + start, + stop, + sigma, + n, + ) + + # Denormalize if requested + if not self.normalize_gaussians: + max_val = 1 / (sigma * math.sqrt(2 * math.pi)) + k3 /= max_val + k3_d /= max_val + + # Valle-Oganov normalization is calculated separately for each triplet + # Not implemented for derivatives. + if self.normalization == "valle_oganov": + volume = self.system.cell.volume + # Calculate the amount of each element for N_A*N_B*N_C term + values, counts = np.unique( + self.system.get_atomic_numbers(), return_counts=True + ) + counts = dict(zip(values, counts)) + for i_z in values: + for j_z in values: + for k_z in values: + i = self.atomic_number_to_index[i_z] + j = self.atomic_number_to_index[j_z] + k = self.atomic_number_to_index[k_z] + if k < i: + continue + # This is the index of the spectrum. It is given by enumerating the + # elements of a three-dimensional array where for valid elements + # k>=i. The enumeration begins from [0, 0, 0], and ends at [n_elem, + # n_elem, n_elem], looping the elements in the order j, i, k. + m = int( + j * n_elem * (n_elem + 1) / 2 + + k + + i * n_elem + - i * (i + 1) / 2 + ) + start = m * n + end = (m + 1) * n + count_product = counts[i_z] * counts[j_z] * counts[k_z] + norm_factor = volume / count_product + + k3[start:end] *= norm_factor + k3_d[:, :, start:end] *= norm_factor + + # Convert to the final output precision. + if self.dtype == "float32": + k3 = k3.astype(self.dtype) + k3_d = k3_d.astype(self.dtype) + + return (k3, k3_d) + + def validate_derivatives_method(self, method): + """Used to validate and determine the final method for calculating the + derivatives. + """ + methods = {"numerical", "analytical", "auto"} + if method not in methods: + raise ValueError( + "Invalid method specified. Please choose from: {}".format(methods) + ) + + if method == "numerical": + return method + + # Check if analytical derivatives can be used + try: + supported_normalization = ["none", "n_atoms", "valle_oganov"] + if self.normalization not in supported_normalization: + raise ValueError( + "Analytical derivatives not implemented for normalization option '{}'. Please choose from: {}".format( + self.normalization, supported_normalization + ) + ) + # Derivatives are not currently implemented for all k3 options + if self.k == 3: + # "angle" function is not differentiable + if self.geometry["function"] == "angle": + raise ValueError( + "Analytical derivatives not implemented for k3 geometry function 'angle'." + ) + except Exception as e: + if method == "analytical": + raise e + elif method == "auto": + method = "numerical" + else: + if method == "auto": + method = "analytical" + + return method + + def derivatives_analytical(self, d, c, system, indices, return_descriptor): + # Ensuring variables are re-initialized when a new system is introduced + self.system = system + self._interaction_limit = len(system) + + # Check that the system does not have elements that are not in the list + # of atomic numbers + self.validate_atomic_numbers(system.get_atomic_numbers()) + + mbtr, mbtr_d = getattr(self, f"_get_k{self.k}")(system, return_descriptor, True) + + # Handle normalization + if self.normalization == "n_atoms": + n_atoms = len(self.system) + mbtr /= n_atoms + mbtr_d /= n_atoms + + # For now, the derivatives are calculated with regard to all atomic + # positions. The desired indices are extracted here at the end. + i = 0 + for index in indices: + d[i, :] = mbtr_d[index, :, :] + i += 1 + + if return_descriptor: + np.copyto(c, mbtr) diff --git a/dscribe/ext/constants.h b/dscribe/ext/constants.h new file mode 100644 index 00000000..a68665bb --- /dev/null +++ b/dscribe/ext/constants.h @@ -0,0 +1,6 @@ +#ifndef CONSTANTS_H +#define CONSTANTS_H +#define PI 3.1415926535897932384626433832795028841971693993751058209749445923078164062 +#define SQRT2 sqrt(2.0) +#define SQRT2PI sqrt(2.0 * PI) +#endif \ No newline at end of file diff --git a/dscribe/ext/descriptor.h b/dscribe/ext/descriptor.h index 73f32ffd..657d09c5 100644 --- a/dscribe/ext/descriptor.h +++ b/dscribe/ext/descriptor.h @@ -36,7 +36,7 @@ class Descriptor { Descriptor(bool periodic, string average="", double cutoff=0); const bool periodic; const string average; - const double cutoff; + double cutoff; }; #endif diff --git a/dscribe/ext/ext.cpp b/dscribe/ext/ext.cpp index 95ceb474..ac266c86 100644 --- a/dscribe/ext/ext.cpp +++ b/dscribe/ext/ext.cpp @@ -22,7 +22,7 @@ limitations under the License. #include "coulombmatrix.h" #include "soap.h" #include "acsf.h" -#include "mbtr.h" +#include "mbtr2.h" #include "geometry.h" namespace py = pybind11; @@ -110,13 +110,49 @@ PYBIND11_MODULE(ext, m) { )); // MBTR - py::class_(m, "MBTRWrapper") - .def(py::init< map, int , vector> >()) - .def("get_k1", &MBTR::getK1) - .def("get_k2", &MBTR::getK2) - .def("get_k3", &MBTR::getK3) - .def("get_k2_local", &MBTR::getK2Local) - .def("get_k3_local", &MBTR::getK3Local); + // py::class_(m, "MBTRWrapper") + // .def(py::init< map, int , vector> >()) + // .def("get_k1", &MBTR::getK1) + // .def("get_k2", &MBTR::getK2) + // .def("get_k3", &MBTR::getK3) + // .def("get_k2_local", &MBTR::getK2Local) + // .def("get_k3_local", &MBTR::getK3Local); + // MBTR + py::class_(m, "MBTR") + .def(py::init, bool>()) + .def("create", overload_cast_, py::array_t, py::array_t, py::array_t, py::array_t>()(&DescriptorGlobal::create)) + .def("get_number_of_features", &MBTR::get_number_of_features) + .def("get_location", overload_cast_()(&MBTR::get_location)) + .def("get_location", overload_cast_()(&MBTR::get_location)) + .def("get_location", overload_cast_()(&MBTR::get_location)) + .def_property("geometry", &MBTR::get_geometry, &MBTR::set_geometry) + .def_property("grid", &MBTR::get_grid, &MBTR::set_grid) + .def_property("weighting", &MBTR::get_weighting, &MBTR::set_weighting) + .def_property_readonly("k", &MBTR::get_k) + .def_property("species", &MBTR::get_species, &MBTR::set_species) + .def_property("normalization", &MBTR::get_normalization, &MBTR::set_normalization) + .def_property("normalize_gaussians", &MBTR::get_normalize_gaussians, &MBTR::set_normalize_gaussians) + .def_property_readonly("species_index_map", &MBTR::get_species_index_map) + .def("derivatives_numerical", &MBTR::derivatives_numerical) + .def(py::pickle( + [](const MBTR &p) { + return py::make_tuple(p.geometry, p.grid, p.weighting, p.normalize_gaussians, p.normalization, p.species, p.periodic); + }, + [](py::tuple t) { + if (t.size() != 7) + throw std::runtime_error("Invalid state!"); + MBTR p( + t[0].cast(), + t[1].cast(), + t[2].cast(), + t[3].cast(), + t[4].cast(), + t[5].cast>(), + t[6].cast() + ); + return p; + } + )); // CellList py::class_(m, "CellList") diff --git a/dscribe/ext/geometry.cpp b/dscribe/ext/geometry.cpp index 151118fe..0ba6eb76 100644 --- a/dscribe/ext/geometry.cpp +++ b/dscribe/ext/geometry.cpp @@ -34,6 +34,57 @@ inline double norm(const vector& a) { return sqrt(accum); }; +System::System( + py::array_t positions, + py::array_t atomic_numbers, + bool extra +) + : positions(positions) + , atomic_numbers(atomic_numbers) +{ + if (!extra) { return; } + + // Create the default set of interactive atoms, which encompasses the whole + // system + unordered_set interactive_atoms = unordered_set(); + int n_atoms = atomic_numbers.size(); + for (int i = 0; i < n_atoms; ++i) { + interactive_atoms.insert(i); + } + this->interactive_atoms = interactive_atoms; + + // Create the default cell indices + py::array_t cell_indices({n_atoms}); + auto cell_indices_mu = cell_indices.mutable_unchecked<1>(); + for (int i = 0; i < n_atoms; ++i) { + cell_indices_mu(i) = 0; + } + this->cell_indices = cell_indices; + + // Create the default indices + py::array_t indices({uint(n_atoms)}); + auto indices_mu = indices.mutable_unchecked<1>(); + for (int i = 0; i < n_atoms; ++i) { + indices_mu(i) = i; + } + this->indices = indices; +} + +System::System( + py::array_t positions, + py::array_t atomic_numbers, + py::array_t indices, + py::array_t cell_indices, + unordered_set interactive_atoms +) + : positions(positions) + , atomic_numbers(atomic_numbers) + , indices(indices) + , cell_indices(cell_indices) + , interactive_atoms(interactive_atoms) +{ +} + ExtendedSystem extend_system( py::array_t positions, py::array_t atomic_numbers, diff --git a/dscribe/ext/geometry.h b/dscribe/ext/geometry.h index d8abf011..7d8d3d14 100644 --- a/dscribe/ext/geometry.h +++ b/dscribe/ext/geometry.h @@ -31,6 +31,48 @@ struct ExtendedSystem { py::array_t indices; }; +class System { + public: + System( + py::array_t positions, + py::array_t atomic_numbers, + bool extra = true + ); + System( + py::array_t positions, + py::array_t atomic_numbers, + py::array_t indices, + py::array_t cell_indices, + unordered_set interactive_atoms + ); + py::array_t positions; + py::array_t atomic_numbers; + /** + * Indices is a one-dimensional array that links each atom in the system + * into an index in the original, non-repeated system. + */ + py::array_t indices; + /** + * Cell indices is a {n_atoms, 3} array that links each atom in the + * system into the index of a repeated cell. For non-extended systems + * all atoms are always tied to cell with index (0, 0, 0), but for + * extended atoms the index will vary. + */ + py::array_t cell_indices; + /** + * Interactive atoms contains the indices of the interacting atoms in + * the system. Interacting atoms are the ones which will act as local + * centers when creating a descriptor. + */ + unordered_set interactive_atoms; + + py::array_t get_positions() {return this->positions;}; + py::array_t get_atomic_numbers() {return this->atomic_numbers;}; + py::array_t get_indices() {return this->indices;}; + py::array_t get_cell_indices() {return this->cell_indices;}; + unordered_set get_interactive_atoms() {return this->interactive_atoms;}; +}; + inline vector cross(const vector& a, const vector& b); inline double dot(const vector& a, const vector& b); inline double norm(const vector& a); diff --git a/dscribe/ext/mbtr2.cpp b/dscribe/ext/mbtr2.cpp new file mode 100644 index 00000000..382f0b45 --- /dev/null +++ b/dscribe/ext/mbtr2.cpp @@ -0,0 +1,771 @@ +/*Copyright 2019 DScribe developers + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include +#include +#include +#include +#include "mbtr2.h" +#include "constants.h" + + +using namespace std; + +inline double MBTR::get_scale() { + double scale; + if (this->weighting.contains("scale")) { + scale = this->weighting["scale"].cast(); + } else { + double threshold = this->weighting["threshold"].cast(); + double r_cut = this->weighting["r_cut"].cast(); + scale = - log(threshold) / r_cut; + } + return scale; +} + +inline double weight_unity_k1(int atomic_number) { + return 1; +} + +inline double weight_unity_k2(double distance) { + return 1; +} + +inline double weight_unity_k3(double distance_ij, double distance_jk, double distance_ki) { + return 1; +} + +inline double weight_exponential_k2(double distance, double scale) { + double expValue = exp(-scale*distance); + return expValue; +} + +inline double weight_exponential_k3(double distance_ij, double distance_jk, double distance_ki, double scale) { + double distTotal = distance_ij + distance_jk + distance_ki; + double expValue = exp(-scale*distTotal); + return expValue; +} + +inline double weight_square_k2(double distance) { + double value = 1/(distance*distance); + return value; +} + +inline double weight_smooth_k3(double distance_ij, double distance_jk, double distance_ki, double sharpness, double cutoff) { + double f_ij = 1 + sharpness * pow((distance_ij/cutoff), (sharpness+1)) - (sharpness+1)* pow((distance_ij/cutoff), sharpness); + double f_jk = 1 + sharpness * pow((distance_jk/cutoff), (sharpness+1)) - (sharpness+1)* pow((distance_jk/cutoff), sharpness); + return f_ij*f_jk; +} + +inline double geom_atomic_number(int atomic_number) { + return (double)atomic_number; +} + +inline double geom_distance(double distance) { + return distance; +} + +inline double geom_inverse_distance(double distance) { + double invDist = 1/distance; + return invDist; +} + +inline double geom_cosine(double distance_ij, double distance_jk, double distance_ki) { + double distance_ji_square = distance_ij*distance_ij; + double distance_ki_square = distance_ki*distance_ki; + double distance_jk_square = distance_jk*distance_jk; + double cosine = 0.5/(distance_jk*distance_ij) * (distance_ji_square+distance_jk_square-distance_ki_square); + + // Due to numerical reasons the cosine might be slightly under -1 or above 1 + // degrees. E.g. acos is not defined then so we clip the values to prevent + // NaN:s + cosine = max(-1.0, min(cosine, 1.0)); + return cosine; +} + +inline double geom_angle(double distance_ij, double distance_jk, double distance_ki) { + double cosine = geom_cosine(distance_ij, distance_jk, distance_ki); + double angle = acos(cosine) * 180.0 / PI; + return angle; +} + +inline bool same_cell(py::detail::unchecked_reference &cell_indices_u, int i, int j) { + return cell_indices_u(i) == cell_indices_u(j); +} + +MBTR::MBTR( + const py::dict geometry, + const py::dict grid, + const py::dict weighting, + bool normalize_gaussians, + string normalization, + py::array_t species, + bool periodic +) + : DescriptorGlobal(periodic) + , normalize_gaussians(normalize_gaussians) +{ + this->set_species(species); + this->set_normalization(normalization); + this->set_geometry(geometry); + this->set_grid(grid); + this->set_weighting(weighting); + this->set_periodic(periodic); + this->validate(); +} + +int MBTR::get_number_of_features() const { + int n_species = this->species.size(); + int n_features = 0; + int n_grid = this->grid["n"].cast(); + + if (this->k == 1) { + n_features = n_species * n_grid; + } else if (this->k == 2) { + n_features = (n_species * (n_species + 1) / 2) * n_grid; + } else if (this->k == 3) { + n_features = (n_species * n_species * (n_species + 1) / 2) * n_grid; + } + return n_features; +} + +/** + * @brief Used to normalize part of the given one-dimensional py::array + * in-place. + * + * @param out The array to normalize + * @param start Start index, defaults to 0 + * @param end End index, defaults to -1 = end of array + */ +void MBTR::normalize_output(py::array_t &out, int start, int end) { + if (this->normalization == "l2") { + // Gather magnitude + auto out_mu = out.mutable_unchecked<1>(); + if (end == -1) { + end = out.size(); + } + double norm = 0; + for (int i = start; i < end; ++i) { + norm += out_mu[i] * out_mu[i]; + } + + // Divide by L2 norm + double factor = 1 / sqrt(norm); + for (int i = start; i < end; ++i) { + out_mu[i] *= factor; + } + } +} + +void MBTR::create( + py::array_t out, + py::array_t positions, + py::array_t atomic_numbers, + CellList cell_list +) { + System system = System(positions, atomic_numbers, true); + if (this->k == 1) { + this->calculate_k1(out, system); + } else if (this->k == 2) { + this->calculate_k2(out, system, cell_list); + } else if (this->k == 3) { + this->calculate_k3(out, system, cell_list); + } + this->normalize_output(out); + return; +} + +pair MBTR::get_location(int z1) { + // Check that the corresponding part is calculated + if (this->k != 1) { + throw invalid_argument( + "Cannot retrieve the location for {}, as the term k1 has not " + "been specified." + ); + } + + // Change into internal indexing + int m = this->species_index_map[z1]; + + // Get the start and end index + int n = this->grid["n"].cast(); + int start = m * n; + int end = (m + 1) * n; + + return make_pair(start, end); +}; + + +pair MBTR::get_location(int z1, int z2) { + // Check that the corresponding part is calculated + if (this->k != 2) { + throw invalid_argument( + "Cannot retrieve the location for {}, as the term k2 has not " + "been specified." + ); + } + + // Change into internal indexing + int i = this->species_index_map[z1]; + int j = this->species_index_map[z2]; + + + // Sort + vector numbers = {i, j}; + if (numbers[0] > numbers[1]) { + reverse(numbers.begin(), numbers.end()); + } + i = numbers[0]; + j = numbers[1]; + + // This is the index of the spectrum. It is given by enumerating the + // elements of an upper triangular matrix from left to right and top + // to bottom. + int n = this->grid["n"].cast(); + int n_elem = this->species.size(); + int m = j + i * n_elem - i * (i + 1) / 2; + int start = m * n; + int end = (m + 1) * n; + + return make_pair(start, end); +}; + +pair MBTR::get_location(int z1, int z2, int z3) { + // Check that the corresponding part is calculated + if (this->k != 3) { + throw invalid_argument( + "Cannot retrieve the location for {}, as the term k3 has not " + "been specified." + ); + } + + // Change into internal indexing + int i = this->species_index_map[z1]; + int j = this->species_index_map[z2]; + int k = this->species_index_map[z3]; + + // Reverse if order is not correct + vector numbers = {i, j, k}; + if (numbers[0] > numbers[2]) { + reverse(numbers.begin(), numbers.end()); + } + i = numbers[0]; + j = numbers[1]; + k = numbers[2]; + + // This is the index of the spectrum. It is given by enumerating the + // elements of a three-dimensional array where for valid elements + // k>=i. The enumeration begins from [0, 0, 0], and ends at [n_elem, + // n_elem, n_elem], looping the elements in the order k, i, j. + int n = this->grid["n"].cast(); + int n_elem = this->species.size(); + int m = j * n_elem * (n_elem + 1) / 2 + k + i * n_elem - i * (i + 1) / 2; + + int start = m * n; + int end = (m + 1) * n; + + return make_pair(start, end); +}; + +void MBTR::set_geometry(py::dict geometry) { + this->geometry = geometry; + if (geometry.contains("function")) { + string function = geometry["function"].cast(); + unordered_set k1({"atomic_number"}); + unordered_set k2({"distance", "inverse_distance"}); + unordered_set k3({"angle", "cosine"}); + if (k1.find(function) != k1.end()) { + this->k = 1; + } else if (k2.find(function) != k2.end()) { + this->k = 2; + } else if (k3.find(function) != k3.end()) { + this->k = 3; + } else { + throw invalid_argument("Unknown geometry function."); + } + } else { + throw invalid_argument("Please specify a geometry function."); + } +} + +void MBTR::set_grid(py::dict grid) { + this->grid = grid; +} + +void MBTR::set_weighting(py::dict weighting) { + // Default weighting unity + if (!weighting.contains("function")) { + weighting["function"] = "unity"; + } + + // Default sharpness=2 for smooth cutoff + string function = weighting["function"].cast(); + if (function == "smooth_cutoff" && !weighting.contains("sharpness")) { + weighting["sharpness"] = 2; + } + + this->weighting = weighting; + this->cutoff = this->get_cutoff(); +} + +void MBTR::set_normalize_gaussians(bool normalize_gaussians) { + this->normalize_gaussians = normalize_gaussians; +} + +void MBTR::set_normalization(string normalization) { + unordered_set options({"l2", "none"}); + if (options.find(normalization) == options.end()) { + throw invalid_argument("Unknown normalization option."); + } + this->normalization = normalization; +} + +void MBTR::set_species(py::array_t species) { + this->species = species; + // Setup mappings between atom indices and types together with some + // statistics + map species_index_map; + map index_species_map; + auto species_u = species.unchecked<1>(); + int max_atomic_number = species_u(0); + int min_atomic_number = species_u(0); + int n_elements = species_u.size(); + for (int i = 0; i < n_elements; ++i) { + int atomic_number = species_u(i); + if (atomic_number > max_atomic_number) { + max_atomic_number = atomic_number; + } + if (atomic_number < min_atomic_number) { + min_atomic_number = atomic_number; + } + species_index_map[atomic_number] = i; + index_species_map[i] = atomic_number; + } + this->species_index_map = species_index_map; + this->index_species_map = index_species_map; +} + +void MBTR::set_periodic(bool periodic) { + this->periodic = periodic; +} + +double MBTR::get_cutoff() { + double cutoff = numeric_limits::infinity(); + if (weighting.contains("function")) { + string function = weighting["function"].cast(); + if (function == "exp" || function == "exponential") { + if (weighting.contains("r_cut")) { + cutoff = weighting["r_cut"].cast(); + } else if (weighting.contains("scale")) { + double scale = weighting["scale"].cast(); + if (!weighting.contains("threshold")) { + throw invalid_argument("Missing value for 'threshold'."); + } + double threshold = weighting["threshold"].cast(); + cutoff = -log(threshold) / scale; + } + } else if (function == "inverse_square") { + if (weighting.contains("r_cut")) { + cutoff = weighting["r_cut"].cast(); + } + } + } + + // In k3, the distance is defined as the perimeter, thus we half the + // distance to get the actual cutoff. + if (this->k == 3) { + cutoff *= 0.5; + } + return cutoff; +} + +void MBTR::validate() { + this->assert_valle(); + this->assert_weighting(); + this->assert_periodic_weighting(); +} + +void MBTR::assert_valle() { + if (this->normalization == "valle_oganov" && !this->periodic) { + throw invalid_argument("Valle-Oganov normalization does not support non-periodic systems."); + } +} + +void MBTR::assert_periodic_weighting() { + if (this->periodic && this->k != 1) { + bool valid = false; + if (weighting.contains("function")) { + string function = weighting["function"].cast(); + if (function != "unity") { + valid = true; + } + } + if (!valid) { + throw invalid_argument("Periodic systems need to have a weighting function."); + } + } +} + +void MBTR::assert_weighting() { + unordered_set valid_functions; + if (this->k == 1) { + valid_functions = unordered_set({"unity"}); + } else { + valid_functions = unordered_set({"unity", "exp", "exponential", "inverse_square"}); + } + ostringstream os; + string function = weighting["function"].cast(); + if (valid_functions.find(function) == valid_functions.end()) { + throw invalid_argument("Unknown weighting function."); + } else { + if (function == "exp" || function == "exponential") { + if (!weighting.contains("threshold")) { + throw invalid_argument("Missing value for 'threshold'."); + } + if (!weighting.contains("scale") && !weighting.contains("r_cut")) { + throw invalid_argument("Provide either 'scale' or 'r_cut'."); + } + if (weighting.contains("scale") && weighting.contains("r_cut")) { + throw invalid_argument("Provide only 'scale' or 'r_cut', not both."); + } + } else if (function == "inverse_square") { + if (!weighting.contains("r_cut")) { + throw invalid_argument("Missing value for 'r_cut'."); + } + } else if (function == "smooth_cutoff") { + if (!weighting.contains("r_cut")) { + throw invalid_argument("Missing value for 'r_cut'."); + } + } + } +} + +py::array_t MBTR::get_species() {return this->species;}; +bool MBTR::get_periodic() {return this->periodic;}; +map MBTR::get_species_index_map() {return this->species_index_map;}; +py::dict MBTR::get_geometry() {return geometry;}; +py::dict MBTR::get_grid() {return grid;}; +py::dict MBTR::get_weighting() {return weighting;}; +int MBTR::get_k() {return k;}; +string MBTR::get_normalization() {return normalization;}; +bool MBTR::get_normalize_gaussians() {return normalize_gaussians;}; + +inline void MBTR::add_gaussian(double center, double weight, double start, double dx, double sigma, int n, const pair &loc, py::detail::unchecked_mutable_reference &out_mu) { + // We first calculate the cumulative distribution function for a normal + // distribution. + vector cdf(n+1); + double x = start; + for (auto &it : cdf) { + it = 0.5 * (1.0 + erf((x-center)/(sigma * SQRT2))); + x += dx; + } + + // The normal distribution is calculated as a derivative of the cumulative + // distribution, as with coarse discretization this methods preserves the + // norm better. + double normalization = weight * (this->normalize_gaussians + ? 1 + : sigma * SQRT2PI); + vector pdf(n); + + int output_start = loc.first; + for (int i=0; i < n; ++i) { + out_mu[output_start + i] += normalization * (cdf[i+1]-cdf[i]) / dx; + } +} + +void MBTR::calculate_k1(py::array_t &out, System &system) { + // Create mutable and unchecked versions + py::array_t atomic_numbers = system.atomic_numbers; + auto out_mu = out.mutable_unchecked<1>(); + auto atomic_numbers_u = atomic_numbers.unchecked<1>(); + + // Get grid setup + double sigma = this->grid["sigma"].cast(); + double min = this->grid["min"].cast(); + double max = this->grid["max"].cast(); + int n = this->grid["n"].cast(); + double dx = (max - min) / (n - 1); + double start = min - dx/2; + + // Determine the geometry function to use + string geom_func_name = this->geometry["function"].cast(); + function geom_func; + if (geom_func_name == "atomic_number") { + geom_func = geom_atomic_number; + } else { + throw invalid_argument("Invalid geometry function for k=1."); + } + + // Determine the weighting function to use + string weight_func_name = this->weighting["function"].cast(); + function weight_func; + if (weight_func_name == "unity") { + weight_func = weight_unity_k1; + } else { + throw invalid_argument("Invalid weighting function for k=1."); + } + + // Loop through all the atoms in the original, non-extended cell + for (auto &i : system.interactive_atoms) { + int i_z = atomic_numbers_u(i); + double geom = geom_func(i_z); + double weight = weight_func(i_z); + + // Get the index of the present elements in the final vector + pair loc = get_location(i_z); + + // Add gaussian to output + add_gaussian(geom, weight, start, dx, sigma, n, loc, out_mu); + } +} + +void MBTR::calculate_k2(py::array_t &out, System &system, CellList &cell_list) { + // Create mutable and unchecked versions + auto out_mu = out.mutable_unchecked<1>(); + auto atomic_numbers = system.atomic_numbers; + auto atomic_numbers_u = atomic_numbers.unchecked<1>(); + + // Get grid setup + double sigma = this->grid["sigma"].cast(); + double min = this->grid["min"].cast(); + double max = this->grid["max"].cast(); + int n = this->grid["n"].cast(); + double dx = (max - min) / (n - 1); + double start = min - dx/2; + + // Determine the geometry function to use + string geom_func_name = this->geometry["function"].cast(); + function geom_func; + if (geom_func_name == "distance") { + geom_func = geom_distance; + } else if (geom_func_name == "inverse_distance") { + geom_func = geom_inverse_distance; + } else { + throw invalid_argument("Invalid geometry function for k=2."); + } + + // Determine the weighting function to use + string weight_func_name = this->weighting["function"].cast(); + function weight_func; + if (weight_func_name == "unity") { + weight_func = weight_unity_k2; + } else if (weight_func_name == "exp" || weight_func_name == "exponential") { + double scale = get_scale(); + weight_func = bind(weight_exponential_k2, std::placeholders::_1, scale); + } else if (weight_func_name == "inverse_square") { + weight_func = weight_square_k2; + } else { + throw invalid_argument("Invalid weighting function for k=2."); + } + + // Loop over all atoms in the system + // TODO: There may be a more efficent way of looping through the atoms. + // Maybe looping over the interactive atoms only? Also maybe iterating over + // the cells only in the positive lattice vector direction? + double cutoff_k2 = this->cutoff; + int n_atoms = atomic_numbers.size(); + auto cell_indices_u = system.cell_indices.unchecked<1>(); + for (int i=0; i < n_atoms; ++i) { + // For each atom we loop only over the neighbours + CellListResult neighbours_i = cell_list.getNeighboursForIndex(i); + int n_neighbours = neighbours_i.indices.size(); + + for (int it = 0; it < n_neighbours; ++it) { + int j = neighbours_i.indices[it]; + double distance = neighbours_i.distances[it]; + if (distance > cutoff_k2) { + continue; + } + + if (j > i) { + // Only consider pairs that have at least one atom in the + // 'interactive subset', typically the original cell but can + // also be another local region. + bool i_interactive = system.interactive_atoms.find(i) != system.interactive_atoms.end(); + bool j_interactive = system.interactive_atoms.find(j) != system.interactive_atoms.end(); + if (i_interactive || j_interactive) { + + double geom = geom_func(distance); + double weight = weight_func(distance); + + // When the pair of atoms are in different copies of the + // cell, the weight is halved. This is done in order to + // avoid double counting the same distance in the opposite + // direction. This correction makes periodic cells with + // different translations equal and also supercells equal to + // the primitive cell within a constant that is given by the + // number of repetitions of the primitive cell in the + // supercell. + if (!same_cell(cell_indices_u, i, j)) { + weight /= 2; + } + + // Get the starting index of the species pair in the final vector + int i_z = atomic_numbers_u(i); + int j_z = atomic_numbers_u(j); + pair loc = get_location(i_z, j_z); + + // Add gaussian to output + add_gaussian(geom, weight, start, dx, sigma, n, loc, out_mu); + } + } + } + } +} + +void MBTR::calculate_k3(py::array_t &out, System &system, CellList &cell_list) { + // Create mutable and unchecked versions + auto out_mu = out.mutable_unchecked<1>(); + auto atomic_numbers = system.atomic_numbers; + auto atomic_numbers_u = atomic_numbers.unchecked<1>(); + auto positions = system.positions; + auto positions_u = positions.unchecked<2>(); + + // Get grid setup + double sigma = this->grid["sigma"].cast(); + double min =this->grid["min"].cast(); + double max = this->grid["max"].cast(); + int n = this->grid["n"].cast(); + double dx = (max - min) / (n - 1); + double start = min - dx/2; + + // Determine the geometry function to use + string geom_func_name = this->geometry["function"].cast(); + function geom_func; + if (geom_func_name == "angle") { + geom_func = geom_angle; + } else if (geom_func_name == "cosine") { + geom_func = geom_cosine; + } else { + throw invalid_argument("Invalid geometry function for k=3."); + } + + // Determine the weighting function to use + string weight_func_name = this->weighting["function"].cast(); + function weight_func; + if (weight_func_name == "unity") { + weight_func = weight_unity_k3; + } else if (weight_func_name == "exp" || weight_func_name == "exponential") { + double scale = get_scale(); + weight_func = bind( + weight_exponential_k3, + std::placeholders::_1, + std::placeholders::_2, + std::placeholders::_3, + scale + ); + } else if (weight_func_name == "smooth_cutoff") { + double sharpness = this->weighting["sharpness"].cast(); + double r_cut = this->weighting["r_cut"].cast(); + weight_func = bind( + weight_smooth_k3, + std::placeholders::_1, + std::placeholders::_2, + std::placeholders::_3, + sharpness, + r_cut + ); + } else { + throw invalid_argument("Invalid weighting function for k=3."); + } + + // For each atom we loop only over the atoms triplets that are within the + // neighbourhood + double cutoff_k3 = this->cutoff * 2; + int n_atoms = atomic_numbers.size(); + auto pos_u = system.positions.unchecked<2>(); + auto cell_indices_u = system.cell_indices.unchecked<1>(); + for (int i=0; i < n_atoms; ++i) { + CellListResult neighbours_i = cell_list.getNeighboursForIndex(i); + int n_neighbours_i = neighbours_i.indices.size(); + for (int it_i = 0; it_i < n_neighbours_i; ++it_i) { + int j = neighbours_i.indices[it_i]; + CellListResult neighbours_j = cell_list.getNeighboursForIndex(j); + int n_neighbours_j = neighbours_j.indices.size(); + for (int it_j = 0; it_j < n_neighbours_j; ++it_j) { + int k = neighbours_j.indices[it_j]; + // Only consider triples that have at least one atom in the + // 'interaction subset', typically the original cell but can + // also be another local region. + bool i_interactive = system.interactive_atoms.find(i) != system.interactive_atoms.end(); + bool j_interactive = system.interactive_atoms.find(j) != system.interactive_atoms.end(); + bool k_interactive = system.interactive_atoms.find(k) != system.interactive_atoms.end(); + if (i_interactive || j_interactive || k_interactive) { + // Calculate angle for all index permutations from choosing + // three out of n_atoms. The same atom cannot be present + // twice in the permutation. + if (j != i && k != j && k != i) { + // The angles are symmetric: ijk = kji. The value is + // calculated only for the triplet where k > i. + if (k > i) { + + double distance_ij = neighbours_i.distances[it_i]; + double distance_jk = neighbours_j.distances[k]; + if (distance_ij + distance_jk > cutoff_k3) { + continue; + } + // The i-k distance is here calculated. TODO: One + // could alternatively check if k is part of i's + // neighbours: if not, then this triplet can be + // skipped. This would be possible if e.g. celllist + // result indices would be an ordered set. + double dx = positions_u(i, 0) - positions_u(k, 0); + double dy = positions_u(i, 1) - positions_u(k, 1); + double dz = positions_u(i, 2) - positions_u(k, 2); + double distance_ki = sqrt(dx*dx + dy*dy + dz*dz); + if (distance_ij + distance_jk + distance_ki > cutoff_k3) { + continue; + } + + // Calculate geometry value. + double geom = geom_func(distance_ij, distance_jk, distance_ki); + + // Calculate weight value. + double weight = weight_func(distance_ij, distance_jk, distance_ki); + + // The contributions are weighted by their multiplicity arising from + // the translational symmetry. Each triple of atoms is repeated N + // times in the extended system through translational symmetry. The + // weight for the angles is thus divided by N so that the + // multiplication from symmetry is countered. This makes the final + // spectrum invariant to the selected supercell size and shape + // after normalization. The number of repetitions N is given by how + // many unique cell indices (the index of the repeated cell with + // respect to the original cell at index 0) are present for + // the atoms in the triple. + int diff_sum = + (int)!same_cell(cell_indices_u, i, j) + + (int)!same_cell(cell_indices_u, i, k) + + (int)!same_cell(cell_indices_u, j, k); + if (diff_sum > 1) { + weight /= diff_sum; + } + + // Get the starting index of the species triple in + // the final vector + int i_z = atomic_numbers_u(i); + int j_z = atomic_numbers_u(j); + int k_z = atomic_numbers_u(k); + pair loc = get_location(i_z, j_z, k_z); + + // Add gaussian to output + add_gaussian(geom, weight, start, dx, sigma, n, loc, out_mu); + } + } + } + } + } + } +} \ No newline at end of file diff --git a/dscribe/ext/mbtr2.h b/dscribe/ext/mbtr2.h new file mode 100644 index 00000000..6a1d0628 --- /dev/null +++ b/dscribe/ext/mbtr2.h @@ -0,0 +1,155 @@ +/*Copyright 2019 DScribe developers + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#ifndef MBTR_H +#define MBTR_H + +#include +#include +#include +#include "descriptorglobal.h" +#include "geometry.h" + +namespace py = pybind11; +using namespace std; + +/** + * Many-body Tensor Representation (MBTR) descriptor. + */ +class MBTR: public DescriptorGlobal { + public: + /** + * Constructor, see the python docs for more details about variables. + */ + MBTR( + const py::dict geometry, + const py::dict grid, + const py::dict weighting, + bool normalize_gaussians, + string normalization, + py::array_t species, + bool periodic + ); + + /** + * For creating feature vectors. + */ + void create( + py::array_t out, + py::array_t positions, + py::array_t atomic_numbers, + CellList cell_list + ); + + /** + * Get the number of features. + */ + int get_number_of_features() const; + + /** + * Get location of k1 features. + */ + pair get_location(int z1); + + /** + * Get location of k2 features. + */ + pair get_location(int z1, int z2); + + /** + * Get location of k3 features. + */ + pair get_location(int z1, int z2, int z3); + + /** + * Setters + */ + void set_species(py::array_t species); + void set_periodic(bool periodic); + void set_geometry(py::dict geometry); + void set_grid(py::dict grid); + void set_weighting(py::dict weighting); + void set_normalize_gaussians(bool normalize_gaussians); + void set_normalization(string normalization); + + /** + * Getters + */ + py::array_t get_species(); + bool get_periodic(); + py::dict get_geometry(); + py::dict get_grid(); + py::dict get_weighting(); + int get_k(); + bool get_normalize_gaussians(); + string get_normalization(); + map get_species_index_map(); + + py::dict geometry; + py::dict grid; + py::dict weighting; + bool normalize_gaussians; + string normalization; + py::array_t species; + bool periodic; + + private: + int k; + map species_index_map; + map index_species_map; + /** + * Performs all assertions. + */ + void validate(); + /** + * Checks that the configuration is valid for valle-oganoff. + */ + void assert_valle(); + /** + * Checks that the weighting is defined correctly when periodicity is + * taken into account. + */ + void assert_periodic_weighting(); + /** + * Checks that the weighting is defined correctly. + */ + void assert_weighting(); + /** + * Gets the radial cutoff value. + */ + double get_cutoff(); + /** + * Gets the exponential scaling factor. + */ + double get_scale(); + inline void add_gaussian( + double center, + double weight, + double start, + double dx, + double sigma, + int n, + const pair &loc, + py::detail::unchecked_mutable_reference &out + ); + void calculate_k1(py::array_t &out, System &system); + void calculate_k2(py::array_t &out, System &system, CellList &cell_list); + void calculate_k3(py::array_t &out, System &system, CellList &cell_list); + int get_number_of_k1_features() const; + int get_number_of_k2_features() const; + int get_number_of_k3_features() const; + void normalize_output(py::array_t &out, int start = 0, int end = -1); +}; + +#endif \ No newline at end of file diff --git a/setup.py b/setup.py index d7f40e6f..3f7776c3 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,7 @@ def __str__(self): "dscribe/ext/soapGTO.cpp", "dscribe/ext/soapGeneral.cpp", "dscribe/ext/acsf.cpp", - "dscribe/ext/mbtr.cpp", + "dscribe/ext/mbtr2.cpp", ], include_dirs=[ # Path to Eigen headers