From d551211e9e2e3f4a36bd7494f03e01d876842670 Mon Sep 17 00:00:00 2001 From: Pol Febrer Date: Fri, 15 Mar 2024 21:53:00 +0100 Subject: [PATCH 1/5] Pre-computation of orbital values. There is now the possibility to pre-compute orbital values to calculate the density. This results in much faster calculations, although it uses more memory. --- src/sisl/CMakeLists.txt | 2 +- src/sisl/_core/geometry.py | 210 +++- src/sisl/_sparse_grid.py | 455 +++++++++ src/sisl/_sparse_grid_ops.pyx | 904 ++++++++++++++++++ src/sisl/physics/densitymatrix.py | 109 ++- src/sisl/physics/tests/test_density_matrix.py | 51 +- src/sisl/tests/test_sparse_grid.py | 312 ++++++ 7 files changed, 1983 insertions(+), 60 deletions(-) create mode 100644 src/sisl/_sparse_grid.py create mode 100644 src/sisl/_sparse_grid_ops.pyx create mode 100644 src/sisl/tests/test_sparse_grid.py diff --git a/src/sisl/CMakeLists.txt b/src/sisl/CMakeLists.txt index 271f69c4b3..55f75ca404 100644 --- a/src/sisl/CMakeLists.txt +++ b/src/sisl/CMakeLists.txt @@ -1,4 +1,4 @@ -foreach(source _indices _math_small) +foreach(source _indices _math_small _sparse_grid_ops) add_cython_library( SOURCE ${source}.pyx LIBRARY ${source} diff --git a/src/sisl/_core/geometry.py b/src/sisl/_core/geometry.py index 02d8db6c37..9d76d3c831 100644 --- a/src/sisl/_core/geometry.py +++ b/src/sisl/_core/geometry.py @@ -32,6 +32,7 @@ tile, unique, ) +from scipy.sparse import csr_array import sisl._array as _a from sisl._category import Category, GenericCategory @@ -45,7 +46,7 @@ list_index_le, ) from sisl._internal import set_module -from sisl._math_small import cross3, is_ascending +from sisl._math_small import cross3, is_ascending, xyz_to_spherical_cos_phi from sisl._namedindex import NamedIndex from sisl.messages import SislError, deprecate_argument, deprecation, info, warn from sisl.shape import Cube, Shape, Sphere @@ -3610,6 +3611,213 @@ def within_inf( # infinite supercell indices return self.asc2uc(idx), xyz, isc + def _orbital_values(self, grid_shape: Tuple[int, int, int]): + r"""Calculates orbital values for a given grid. + + Parameters + ---------- + grid_shape: + the grid shape (i.e. resolution) in which to calculate the orbital values. + """ + # We need to import these here to avoid circular imports. + from sisl import Grid + from sisl._sparse_grid import SparseGridOrbitalBZ + + # In the following we don't care about division + # So 1) save error state, 2) turn off divide by 0, 3) calculate, 4) turn on old error state + old_err = np.seterr(divide="ignore", invalid="ignore") + + # Instead of looping all atoms in the supercell we find the exact atoms + # and their supercell indices. + add_R = _a.fulld(3, self.maxR()) + # Calculate the required additional vectors required to increase the fictitious + # supercell by add_R in each direction. + # For extremely skewed lattices this will be way too much, hence we make + # them square. + o = self.lattice.to.Cuboid(True) + lattice = Lattice(o._v + np.diag(2 * add_R), origin=o.origin - add_R) + + # Retrieve all atoms within the grid supercell + # (and the neighbours that connect into the cell) + IA, XYZ, ISC = self.within_inf(lattice, periodic=self.pbc) + XYZ -= self.lattice.origin.reshape(1, 3) + + # within_inf translates atoms to the unit cell to compute + # supercell indices. Here we revert that + ISC -= np.floor(self.fxyz[IA]).astype(int32) + + def xyz2spherical(xyz, offset): + """Calculate the spherical coordinates from indices""" + rx = xyz[:, 0] - offset[0] + ry = xyz[:, 1] - offset[1] + rz = xyz[:, 2] - offset[2] + + xyz_to_spherical_cos_phi(rx, ry, rz) + return rx, ry, rz + + def sphere_grid_index(grid, center, R): + + corners = np.mgrid[-1:2:2, -1:2:2, -1:2:2].T * R + center + corners = corners.reshape(-1, 3) + + corners_i = grid.index(corners) + + ix = np.arange( + max(corners_i[:, 0].min(), 0), + min(corners_i[:, 0].max() + 1, grid.shape[0]), + ) + iy = np.arange( + max(corners_i[:, 1].min(), 0), + min(corners_i[:, 1].max() + 1, grid.shape[1]), + ) + iz = np.arange( + max(corners_i[:, 2].min(), 0), + min(corners_i[:, 2].max() + 1, grid.shape[2]), + ) + + indices = np.meshgrid(ix, iy, iz) + indices = np.array(indices).T.reshape(-1, 3) + + return indices + + # Get the size of the auxiliary supercell needed to store orbital values. + nsc = abs(ISC).max(axis=0) * 2 + 1 + sp_grid_geom = self.copy() + sp_grid_geom.set_nsc(nsc) + + # Initialize a fake grid to compute some quantities related to the grid distribution + grid = Grid(grid_shape, geometry=self) + + # Estimate a top limit on how many values we need to store. We estimate it by expecting + # each orbital to fill a sphere of radius R, being R the radius of the orbital. We also + # add a margin of 1 voxel so that we don't underestimate because of rounding. + dvolume = grid.dvolume + margin_R = np.linalg.norm(grid.dcell.sum(axis=0)) + vol = 0.0 + for atom, indices in self.sub(IA).atoms.iter(species=True): + vol += (4 / 3 * np.pi * (atom.R + margin_R) ** 3).sum() * len(indices) + + max_vals = int(vol / dvolume) + + # Array storing all the grid values + grid_values = np.zeros(max_vals, dtype=np.float64) + # Orbital indices for each orbital that has a nonzero value in the grid. + orbital_indices = np.full(max_vals, -1, dtype=np.int32) + # For each value, its index of the grid. Even if the grid is 3 dimensional, + # we store the raveled index. That is, a single integer representing the position + # of the point. One can always unravel the index if needed. + grid_indices = np.zeros(max_vals, dtype=np.int32) + + # print( + # f"Estimated memory required:", + # (orbital_indices.size * 32 + grid_values.size * 64 + grid_indices.size * 32) / 8 / 1024 / 1024, + # "MB" + # ) + + # Temporal variables that will help us keep track of the construction of the arrays. + i_value = 0 + first_orbs = self.firsto + isc_off = sp_grid_geom.isc_off + + # Loop over all atoms in the grid-cell + for ia, ia_xyz, isc in zip(IA, XYZ, ISC): + # Get current atom + atom = self.atoms[ia] + + # Get the index of the cell where this atom is in the auxiliary supercell + index_sc = isc_off[isc[0], isc[1], isc[2]] + # And use it to calculate the offset on the orbital index. + io_offset = self.no * index_sc + + # Extract maximum R + R = atom.maxR() + + if R <= 0.0: + warn(f"Atom '{atom}' does not have a wave-function, skipping atom.") + continue + + idx = sphere_grid_index(grid, ia_xyz, R) + + if len(idx) == 0: + continue + + # Get real-space coordinates for the atom + grid_xyz = dot(idx, grid.dcell) + # Convert them to spherical coordinates + at_r, at_theta, at_cos_phi = xyz2spherical(grid_xyz, ia_xyz) + + del grid_xyz + # Merge the three components of spherical coordinates into one array. + at_spherical = np.array([at_r, at_theta, at_cos_phi]).T + + # Filter out points where the distance to the atom is less than its max R. + at_nonzero = at_spherical[:, 0] < R + idx = idx[at_nonzero] + at_spherical = at_spherical[at_nonzero] + + if len(idx) == 0: + continue + + # Ravel multi index to save space. That is, convert the 3D grid index + # into a single integer. One can always unravel them if needed. + idx = ( + idx[:, 0] * grid.shape[1] * grid.shape[2] + + idx[:, 1] * grid.shape[2] + + idx[:, 2] + ) + + # Loop over the orbitals + for io, orb in enumerate(atom.orbitals): + # Get the index of this orbital + uc_io = first_orbs[ia] + io + + orb_spherical = at_spherical + orb_indices = idx + + # The orbital's R might not be the maximum R of the atom. In that case, + # we don't need to calculate the values for all the grid points that are within + # the atom's range. + if R - orb.R > 1e-6: + # Check which coordinates are not within this orbital's range (the radius is bigger than orbital radius) + orb_nonzero = orb_spherical[:, 0] < orb.R + + orb_spherical = orb_spherical[orb_nonzero] + orb_indices = orb_indices[orb_nonzero] + + # Number of grid values that we are going to compute for this orbital + orb_nvals = orb_spherical.shape[0] + + # If there are no values to add, go to the next orbital + if orb_nvals == 0: + continue + + # Compute the psi values for the grid points we are interested in + psi = orb.psi_spher(*orb_spherical.T, cos_phi=True) + + # Update the data structure + values_i = slice(i_value, i_value + orb_nvals) + grid_values[values_i] = psi + grid_indices[values_i] = orb_indices + orbital_indices[values_i] = uc_io + io_offset + + # Update the index where new values should be stored + i_value += orb_nvals + + # Reset the error code for division + np.seterr(**old_err) + + # Cut the arrays to return only the parts that have been filled + grid_values = grid_values[:i_value] + grid_indices = grid_indices[:i_value] + orbital_indices = orbital_indices[:i_value] + + psi_values = csr_array( + (grid_values, (grid_indices, orbital_indices)), + shape=(np.prod(grid.shape), sp_grid_geom.no_s), + ) + + return SparseGridOrbitalBZ(grid.shape, psi_values, geometry=sp_grid_geom) + # Create pickling routines def __getstate__(self): """Returns the state of this object""" diff --git a/src/sisl/_sparse_grid.py b/src/sisl/_sparse_grid.py new file mode 100644 index 0000000000..bf349f569b --- /dev/null +++ b/src/sisl/_sparse_grid.py @@ -0,0 +1,455 @@ +from typing import Optional, Tuple, Union + +import numpy as np +from scipy.sparse import issparse, spmatrix + +from sisl import Grid, Lattice, SparseCSR +from sisl.physics import Overlap + +from ._sparse_grid_ops import ( + reduce_grid_matvec_multiply, + reduce_grid_matvecs_multiply, + reduce_grid_sum, + reduce_sc_products, + reduce_sc_products_multicoeffs, + reduce_sc_products_multicoeffs_sparse_denseindex, +) +from .physics._phase import phase_rsc + + +class Sparse4DGrid: + """Stores information on a 3D grid with an extra sparse dimension. + + The information is stored as a matrix where the rows are the raveled indices of the + grid and the columns represent the extra dimension. The sparsity of the matrix is + handled by the usual methods. + """ + + def __init__(self, grid_shape, *args, geometry=None, lattice=None, **kwargs): + self.grid_shape = tuple(grid_shape) + + self.geometry = geometry + if geometry is not None: + self.lattice = geometry.lattice + else: + self.lattice = lattice + + self._csr = SparseCSR(*args, **kwargs) + + def _transposed_grid_indices(self, *axes): + """Function that returns the transposed raveled indices. + + Might be useful to reorder rows in the CSR matrix that stores the grid. + + Not used for now. + """ + csr = self._csr + + # Transform the indices + old_indices = np.arange(csr.shape[0], dtype=np.int32) + old_indices_g = old_indices.reshape(self.grid_shape).ravel() + + new_indices_g = old_indices_g.transpose(*axes) + new_indices = new_indices_g.ravel() + new_shape = new_indices_g.shape + + return new_indices, new_shape + + def reduce_dimension( + self, weights: Optional[np.ndarray] = None, reduce_grid: Tuple[int, ...] = () + ) -> Union[Grid, np.ndarray]: + """Reduces the extra dimension on the grid, possibly applying weights. + + It can also reduce grid axes on the same go. See the `reduce_grid` parameter. + + Parameters + ---------- + weights: + Array of weights for the columns. The first dimension of this array must have length + equal to the extra dimension of this grid object. + + The array might optionally have a second dimension, + which will also be reflected as an extra dimension in the output grid. + reduce_grid: + Axes that you wish to reduce on the grid. Along these axes, the grid values will be + accumulated. This is very useful to reduce memory footprint (and probably also + computation time) if your final goal is to reduce over the grid axes, because with + this approach the grid axes are reduced on the fly (instead of first constructing the + full grid and then reducing). + + This might be the difference between being able to perform the calculation or not. + + Returns + ------- + Union[Grid, np.ndarray] + The reduced grid. If the weights array has more than one dimension, it will be a + numpy array. Otherwise it will be a sisl.Grid object. + """ + # Assess if we are dealing with multiple weights + multi_weights = weights is not None and ( + weights.ndim == 2 and weights.shape[1] != 1 + ) + if not multi_weights and weights is not None: + weights = weights.ravel() + + # If we don't need to reduce the grid axes, we use scipy functionality + if len(reduce_grid) == 0: + csr = self._csr.tocsr() + if weights is None: + reduced = csr.sum(axis=1) + else: + # Perform matrix-vector multiplication to reduce the extra dimension. + reduced = csr @ weights + + if multi_weights: + grid = reduced.reshape(*self.grid_shape, -1) + else: + # Create a new grid and set the result of the operation as the values + grid = Grid( + self.grid_shape, geometry=self.geometry, dtype=reduced.dtype + ) + del grid.grid + grid.grid = reduced.reshape(self.grid_shape) + # If we wish to reduce the axes, we use our own functionality that reduces the grid + # on the fly, because it is faster and uses much less memory than reducing a posteriori. + else: + csr = self._csr + + # Find out the reduced shape, and the reduce factor. The reduced factor is the number + # by which we have to divide the index to get the reduced index. + reduced_shape = list(self.grid_shape) + reduce_factor = 1 + + if len(reduce_grid) > 3: + raise ValueError( + f"There are only 3 grid axes, it is not possible to reduce axes {reduce_grid}" + ) + + # Check if we are reducing the last dimensions. Otherwise we will need to transpose the grid to + # put the dimension to reduce on the last axis. Note that we will not do that explicilty, transposing + # is done inside the cython routines (per index) to reduce memory footprint. + need_transpose = False + sorted_reduce_axes = np.sort(reduce_grid) + if len(reduce_grid) > 0: + need_transpose = sorted_reduce_axes[-1] != 2 + if len(reduce_grid) > 1: + need_transpose = need_transpose or sorted_reduce_axes[-2] != 1 + if len(reduce_grid) > 2: + need_transpose = need_transpose or sorted_reduce_axes[0] != 0 + + # Calculate the reducing factor and the new reduced shape. + for reduce_axis in sorted_reduce_axes: + reduce_factor *= self.grid_shape[reduce_axis] + reduced_shape[reduce_axis] = 1 + + # Prepare the quantities needed to transpose. + grid_shape = np.array(self.grid_shape, dtype=np.int32) + if need_transpose: + not_reduced_axes = np.sort(tuple(set([0, 1, 2]) - set(reduce_grid))) + new_axes_order = np.array( + [*not_reduced_axes, *reduce_grid], dtype=np.int32 + ) + else: + # A dummy array to pass to cython functions so that they don't complain + new_axes_order = np.zeros(0, dtype=np.int32) + + # Determine the datatype of the output + if weights is not None: + dtype = np.result_type(weights, csr.data) + else: + dtype = csr.dtype + + # If we do not have multiple weights, we store it in a sisl Grid, otherwise + # we store it in a raw numpy array, since sisl.Grid doesn't support extra + # dimensions. + if not multi_weights: + grid = Grid(reduced_shape, geometry=self.geometry, dtype=dtype) + out = grid.grid.ravel() + else: + grid = np.zeros((*reduced_shape, weights.shape[1]), dtype=dtype) + out = grid.reshape(-1, weights.shape[1]) + + # Apply the corresponding function to do the operation that we need to do. + if weights is None: + reduce_grid_sum( + csr.data[:, 0].astype(dtype), + csr.ptr, + csr.col, + reduce_factor=int(reduce_factor), + grid_shape=grid_shape, + new_axes=new_axes_order, + out=out, + ) + elif multi_weights: + reduce_grid_matvecs_multiply( + csr.data[:, 0].astype(dtype), + csr.ptr, + csr.col, + weights.astype(dtype), + reduce_factor=int(reduce_factor), + grid_shape=grid_shape, + new_axes=new_axes_order, + out=out, + ) + else: + reduce_grid_matvec_multiply( + csr.data[:, 0].astype(dtype), + csr.ptr, + csr.col, + weights.astype(dtype), + reduce_factor=int(reduce_factor), + grid_shape=grid_shape, + new_axes=new_axes_order, + out=out, + ) + + return grid + + +class SparseGridOrbitalBZ(Sparse4DGrid): + """Stores information on a 3D grid with an extra orbital dimension, which is sparse.""" + + def get_overlap_matrix(self) -> Overlap: + """Computes the overlap matrix. + + It does so by integrating the product between each pair of orbitals on the whole grid. + """ + + # Get the volume of each grid cell, because we will need to multiply + # the sum of the products by it. + dvolume = self.lattice.volume / np.prod(self.grid_shape) + + # Get the grid values as a scipy matrix + csr = self._csr.tocsr() + + # Compute the overlap matrix. It is a simple matrix multiplication. + all_overlap = csr.T @ csr + + # We would be done here, if it wasn't because the matrix multiplication + # returns a square matrix. We need to acumulate all the extra rows into + # the first set of rows (unit cell). For example, the overlap between + # supercell [0, 0, 1] and supercell [0, 0, 1] is an overlap within the same + # cell. We should add it to the unit cell overlap. + # In practice, what we do is to take the rows of the matrix that correspond + # to a supercell, shift the columns, and add them to the unit cell rows. + + # Number of orbitals in the unit cell + no = self.geometry.no + + # Part of the overlap matrix where we will fold everything. Shape (no, no * n_supercells) + overlap = all_overlap[:no] + # Compute number of supercells + n_s = overlap.shape[-1] // no + + # Then loop over supercell rows, shifting them and folding into the main matrix, + # as explained above. + for isc_row in range(1, n_s): + next_rows = all_overlap[isc_row * no : (isc_row + 1) * no] + for isc_col in range(n_s): + start_col = (isc_col - isc_row) * no + if start_col < 0: + start_col = overlap.shape[-1] + start_col + + overlap[:, start_col : start_col + no] += next_rows[ + :, isc_col * no : (isc_col + 1) * no + ] + + # Multiply by the volume of each grid cell and return an Overlap object. + return Overlap.fromsp(self.geometry, overlap * dvolume) + + def reduce_orbitals( + self, weights: np.ndarray, k: Tuple[float, float, float] = (0, 0, 0), **kwargs + ) -> Union[Grid, np.ndarray]: + """Reduces the orbitals dimension, applying phases if needed. + + Parameters + ---------- + weights: + Array of weights for the orbitals. The first dimension of this array must have length + equal to the number of orbitals in the unit cell. + + The array might optionally have a second dimension, + which will also be reflected as an extra dimension in the output grid. + k: + The k point for which the values are to be projected. This will be used to compute a + phase for the contributions of orbitals outside the unit cell. + **kwargs: + passed directly to reduce_dimension + + Returns + ------- + Union[Grid, np.ndarray] + The reduced grid. If the weights array has more than one dimension, it will be a + numpy array. Otherwise it will be a sisl.Grid object. + """ + # Assess if we are dealing with multiple weights + multi_weights = weights.ndim == 2 and weights.shape[1] != 1 + if not multi_weights: + weights = weights.reshape(-1, 1) + + if weights.shape[0] == self.geometry.no: + # We should expand the coefficients to the whole supercell, applying phases. + k = np.array(k, dtype=np.float64) + + if np.any(k > 1e-5): + # Compute phases + phases = phase_rsc(self.geometry.lattice, k, dtype=np.complex128) + # Expand the coefficients array. + weights = ( + phases.reshape(-1, 1) + .dot(weights.reshape(1, -1)) + .reshape(-1, weights.shape[1]) + ) + else: + # At gamma we just need to tile the coefficients array + n_s = self.geometry.n_s + weights = ( + np.tile(weights.T, n_s) + .reshape(weights.shape[1], weights.shape[0] * n_s) + .T + ) + + elif weights.shape[0] != self.geometry.no_s: + raise ValueError( + f"The coefficients array must be of length no ({self.geometry.no}) or no_s ({self.geometry.no_s}). It is of length {weights.shape[0]}" + ) + + return self.reduce_dimension(weights, **kwargs) + + def reduce_orbital_products( + self, + weights: Union[np.ndarray, SparseCSR, spmatrix], + weights_lattice: Lattice, + reduce_grid: Tuple[int, ...] = (), + out: Optional[np.ndarray] = None, + ) -> np.ndarray: + """Reduces the orbital dimension by taking products of orbitals with weights. + + Parameters + ---------- + weights: + The coefficient to apply to each orbital product. The first two dimensions of this + array must be ``(no, no_s)``. Therefore each entry ``weights[i, j]`` is the coefficient + to apply to the product of orbitals ``i`` and ``j``. + + The array might optionally have a third dimension, which will also be reflected as an + extra dimension in the output grid. This is useful to compute multiple quantities at + the same time, because orbital products are only computed once. + + The ``weights`` array be either a dense or sparse array. + weights_lattice: + The lattice associated to the weights array. It might be different from the one + associated to the ``SparseGridOrbitalBZ`` object. + reduce_grid: + Axes that you wish to reduce on the grid. Along these axes, the grid values will be + accumulated. This is very useful to reduce memory footprint (and probably also + computation time) if your final goal is to reduce over the grid axes, because with + this approach the grid axes are reduced on the fly (instead of first constructing the + full grid and then reducing). + + This might be the difference between being able to perform the calculation or not. + out: + If you wish to store the result in a preallocated array, you can pass it here. + It is your responsibility to ensure that the array has the correct shape and dtype. + + Returns + ------- + np.ndarray + The output grid. If ``out`` is not ``None``, it will be the same as ``out``. Otherwise + it will be a newly created array. + """ + csr = self._csr + + # Find out the reduced shape, and the reduce factor. The reduced factor is the number + # by which we have to divide the index to get the reduced index. + reduced_shape = list(self.grid_shape) + reduce_factor = 1 + + if len(reduce_grid) > 3: + raise ValueError( + f"There are only 3 grid axes, it is not possible to reduce axes {reduce_grid}" + ) + + # Check if we are reducing the last dimensions. Otherwise we will need to transpose the grid to + # put the dimension to reduce on the last axis. Note that we will not do that explicilty, transposing + # is done inside the cython routines (per index) to reduce memory footprint. + need_transpose = False + sorted_reduce_axes = np.sort(reduce_grid) + if len(reduce_grid) > 0: + need_transpose = sorted_reduce_axes[-1] != 2 + if len(reduce_grid) > 1: + need_transpose = need_transpose or sorted_reduce_axes[-2] != 1 + if len(reduce_grid) > 2: + need_transpose = need_transpose or sorted_reduce_axes[0] != 0 + + # Calculate the reducing factor and the new reduced shape. + for reduce_axis in sorted_reduce_axes: + reduce_factor *= self.grid_shape[reduce_axis] + reduced_shape[reduce_axis] = 1 + + # Prepare the quantities needed to transpose. + grid_shape = np.array(self.grid_shape, dtype=np.int32) + if need_transpose: + not_reduced_axes = np.sort(tuple(set([0, 1, 2]) - set(reduce_grid))) + new_axes_order = np.array([*not_reduced_axes, *reduce_grid], dtype=np.int32) + else: + # A dummy array to pass to cython functions so that they don't complain + new_axes_order = np.zeros(0, dtype=np.int32) + + # If it is a scipy sparse matrix, convert it to a sisl sparse matrix + if issparse(weights): + weights = SparseCSR(weights) + + sparse_coeffs = isinstance(weights, SparseCSR) + multi_coeffs = len(weights.shape) == 3 and weights.shape[2] > 1 + + if out is None: + dtype = np.result_type(weights, csr) + else: + dtype = out.dtype + + # Decide which function to use. + if sparse_coeffs: + if multi_coeffs: + reduce_func = reduce_sc_products_multicoeffs_sparse_denseindex + weights.astype = weights.copy + else: + reduce_func = reduce_sc_products + weights = weights.tocsr().toarray(order="C") + else: + if multi_coeffs: + reduce_func = reduce_sc_products_multicoeffs + else: + reduce_func = reduce_sc_products + + if out is not None: + grid = out + + if multi_coeffs: + if out is None: + grid = np.zeros((*reduced_shape, weights.shape[2]), dtype=dtype) + out = grid + + out = grid.reshape(-1, weights.shape[2]) + else: + if out is None: + grid = Grid(reduced_shape, geometry=self.geometry, dtype=dtype) + out = grid.grid + + out = out.ravel() + + reduce_func( + csr.data[:, 0].astype(dtype), + csr.ptr, + csr.col, + weights.astype(dtype=dtype), + self.geometry.no, + self.lattice.sc_off, + weights_lattice.isc_off, + reduce_factor=int(reduce_factor), + grid_shape=grid_shape, + new_axes=new_axes_order, + out=out, + ) + + return grid diff --git a/src/sisl/_sparse_grid_ops.pyx b/src/sisl/_sparse_grid_ops.pyx new file mode 100644 index 0000000000..8608ea8cb3 --- /dev/null +++ b/src/sisl/_sparse_grid_ops.pyx @@ -0,0 +1,904 @@ +import cython +import numpy as np + +cimport numpy as np + +import math + +from sisl import SparseCSR + + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.cdivision(True) +cpdef cython.int transpose_raveled_index(index: cython.int, grid_shape: np.int32_t[:], new_order: np.int32_t[:]): + """Transposes a raveled index, given the shape that raveled it and the new axis order. + + Currently it only works for 3D indices. + + Parameters + ----------- + index: + The index to transpose. + grid_shape: + The old grid shape. + new_order: + The new axes order. E.g. [1, 2, 0] will move the first axis to the last dimension. + """ + + unraveled: cython.int[3] + new_grid_shape: cython.int[3] + new_unraveled: cython.int[3] + + remaining: cython.int + iaxis: cython.int + + new_raveled: cython.int + + remaining = index + for iaxis in range(2, -1, -1): + unraveled[iaxis] = remaining % grid_shape[iaxis] + remaining = remaining // grid_shape[iaxis] + + for iaxis in range(3): + new_grid_shape[iaxis] = grid_shape[new_order[iaxis]] + new_unraveled[iaxis] = unraveled[new_order[iaxis]] + + new_raveled = new_unraveled[2] + new_unraveled[1] * new_grid_shape[2] + new_unraveled[0] * new_grid_shape[1] * new_grid_shape[2] + + return new_raveled + +# This function should be in sisl._sparse, but I don't know how to import it from there +@cython.boundscheck(False) +@cython.wraparound(False) +def dense_index(shape, ptr: np.int32_t[:], col: np.int32_t[:]): + """Returns a dense array containing the value index for each nonzero (row, col) element. + + Zero elements are asigned an index of -1, so routines using this dense index should + take this into account. + + Parameters + ----------- + shape: tuple of shape (2, ) + The shape of the sparse matrix + ptr, col: + ptr and col descriptors of the matrix in CSR format + + Returns + -------- + np.ndarray: + Numpy array of shape = matrix shape and data type np.int32. + """ + nrows: cython.int = ptr.shape[0] - 1 + row: cython.int + row_start: cython.int + row_end: cython.int + ival: cython.int + val_col: cython.int + + # Initialize the dense index + dense_index: np.int32_t[:, :] = np.full(shape, -1, dtype=np.int32) + + for row in range(nrows): + row_start = ptr[row] + row_end = ptr[row + 1] + + for ival in range(row_start, row_end): + val_col = col[ival] + dense_index[row, val_col] = ival + + return np.asarray(dense_index) + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.cdivision(True) +def reduce_grid_sum( + cython.numeric[:] data, ptr: np.int32_t[:], col: np.int32_t[:], + reduce_factor: cython.int, grid_shape: np.int32_t[:], new_axes: np.int32_t[:], + cython.numeric[:] out +): + """Performs sum over the extra dimension while reducing other dimensions of the grid. + + Parameters + ---------- + data: + The data values of the sparse matrix + ptr: + The array with pointers to the start of each row for the sparse matrix. + col: + The array containing the column index for each value in the sparse matrix. + reduce_factor: + Each row index is integer divided (//) by this factor to get the row index where + the result should be stored. + grid_shape: + If you want to transpose the grid indices before reducing it, you need to specify + the (old) grid shape. If not, pass here an array of shape 0. + new_axes: + If you want to transpose the grid indices before reducing it, you need to specify + the nex axes. As an example, [1, 2, 0] will move the first axis to the last dimension. + + If you don't want to transpose, pass an array of shape 0. + out: + The array where the output should be stored. + """ + nrows: cython.int = ptr.shape[0] - 1 + + i: cython.int + reduced_i: cython.int + j: cython.int + jcol: cython.int + + need_transpose: cython.bint = new_axes.shape[0] > 1 + + cdef cython.numeric row_value + + for i in range(nrows): + + if need_transpose: + reduced_i = transpose_raveled_index(i, grid_shape, new_axes) // reduce_factor + else: + reduced_i = i // reduce_factor + + row_value = 0 + for j in range(ptr[i], ptr[i + 1]): + jcol = col[j] + row_value += data[j] + + out[reduced_i] = out[reduced_i] + row_value + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.cdivision(True) +def reduce_grid_matvec_multiply( + cython.numeric[:] data, ptr: np.int32_t[:], col: np.int32_t[:], cython.numeric[:] V, + reduce_factor: cython.int, grid_shape: np.int32_t[:], new_axes: np.int32_t[:], + cython.numeric[:] out +): + """Performs a matrix-vector multiplication while reducing other dimensions of the grid. + + Parameters + ---------- + data: + The data values of the sparse matrix + ptr: + The array with pointers to the start of each row for the sparse matrix. + col: + The array containing the column index for each value in the sparse matrix. + V: + The dense vector by which the sparse matrix is to be multiplied. + reduce_factor: + Each row index is integer divided (//) by this factor to get the row index where + the result should be stored. + grid_shape: + If you want to transpose the grid indices before reducing it, you need to specify + the (old) grid shape. If not, pass here an array of shape 0. + new_axes: + If you want to transpose the grid indices before reducing it, you need to specify + the nex axes. As an example, [1, 2, 0] will move the first axis to the last dimension. + + If you don't want to transpose, pass an array of shape 0. + out: + The array where the output should be stored. + """ + + nrows: cython.int = ptr.shape[0] - 1 + + i: cython.int + reduced_i: cython.int + j: cython.int + jcol: cython.int + + need_transpose: cython.bint = new_axes.shape[0] > 1 + + cdef cython.numeric row_value + + for i in range(nrows): + reduced_i = i // reduce_factor + + if need_transpose: + reduced_i = transpose_raveled_index(i, grid_shape, new_axes) // reduce_factor + else: + reduced_i = i // reduce_factor + + row_value = 0 + for j in range(ptr[i], ptr[i + 1]): + jcol = col[j] + row_value += data[j] * V[jcol] + + out[reduced_i] = out[reduced_i] + row_value + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.cdivision(True) +def reduce_grid_matvecs_multiply( + cython.numeric[:] data, np.int32_t[:] ptr, np.int32_t[:] col, cython.numeric[:, :] V, + reduce_factor: cython.int, grid_shape: np.int32_t[:], new_axes: np.int32_t[:], + cython.numeric[:, :] out +): + """Performs a matrix-matrix multiplication while reducing other dimensions of the grid. + + Parameters + ---------- + data: + The data values of the sparse matrix + ptr: + The array with pointers to the start of each row for the sparse matrix. + col: + The array containing the column index for each value in the sparse matrix. + V: + The dense matrix by which the sparse matrix is to be multiplied. + reduce_factor: + Each row index is integer divided (//) by this factor to get the row index where + the result should be stored. + grid_shape: + If you want to transpose the grid indices before reducing it, you need to specify + the (old) grid shape. If not, pass here an array of shape 0. + new_axes: + If you want to transpose the grid indices before reducing it, you need to specify + the nex axes. As an example, [1, 2, 0] will move the first axis to the last dimension. + + If you don't want to transpose, pass an array of shape 0. + out: + The array where the output should be stored. + """ + + nrows: cython.int = ptr.shape[0] - 1 + nvecs: cython.int = V.shape[1] + + i: cython.int + reduced_i: cython.int + j: cython.int + jcol: cython.int + ivec: cython.int + + need_transpose: cython.bint = new_axes.shape[0] > 1 + + cdef cython.numeric[:] row_value = np.zeros(nvecs, np.asarray(data).dtype) + + for i in range(nrows): + if need_transpose: + reduced_i = transpose_raveled_index(i, grid_shape, new_axes) // reduce_factor + else: + reduced_i = i // reduce_factor + + row_value[:] = 0 + for j in range(ptr[i], ptr[i + 1]): + jcol = col[j] + + for ivec in range(nvecs): + row_value[ivec] = row_value[ivec] + data[j] * V[jcol, ivec] + + for ivec in range(nvecs): + out[reduced_i, ivec] = out[reduced_i, ivec] + row_value[ivec] + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.cdivision(True) +def reduce_sc_products( + cython.floating[:] data, ptr: np.int32_t[:], col: np.int32_t[:], cython.floating[:, :] coeffs, + uc_ncol: cython.int, data_sc_off: np.int32_t[:, :], coeffs_isc_off: np.int32_t[:, :, :], + reduce_factor: cython.int, grid_shape: np.int32_t[:], new_axes: np.int32_t[:], + cython.floating[:] out +): + """For each row, sums all possible products between column pairs. + + A coefficients array is accepted to weight each product. + + This function is to be used when columns are some local property of the cell (orbital, atom...) and + there are interactions between neighbouring cells. + + For each pair of columns ij, the offset between columns is calculated. Then, the function looks + in the coeffs array (which potentially contains information for interactions between cells) for the ij pair + with that offset. + + This + + Parameters + ---------- + data: + The data values of the sparse matrix + ptr: + The array with pointers to the start of each row for the sparse matrix. + col: + The array containing the column index for each value in the sparse matrix. + coeffs: + The dense matrix containing weights for all possible products. + uc_ncol: + Number of columns for each unit cell. + reduce_factor: + Each row index is integer divided (//) by this factor to get the row index where + the result should be stored. + grid_shape: + If you want to transpose the grid indices before reducing it, you need to specify + the (old) grid shape. If not, pass here an array of shape 0. + new_axes: + If you want to transpose the grid indices before reducing it, you need to specify + the nex axes. As an example, [1, 2, 0] will move the first axis to the last dimension. + + If you don't want to transpose, pass an array of shape 0. + out: + The array where the output should be stored. + """ + nrows: cython.int = ptr.shape[0] - 1 + + # Indices to handle rows + row: cython.int + reduced_row: cython.int + row_start: cython.int + row_end: cython.int + + # Indices to handle pairs of columns (ij) + i: cython.int + icol: cython.int + uc_icol: cython.int + sc_icol: cython.int + icol_sc: cython.int + ipair_sc: cython.int + + j: cython.int + jcol: cython.int + uc_jcol: cython.int + sc_jcol: cython.int + jcol_sc: cython.int + jpair_sc: cython.int + + # Temporal storage to build values + cdef cython.floating row_value + cdef cython.floating i_row_value + + # Index to loop over axes of the grid. + iaxis: cython.int + + # Variables that will help managing orbital pairs that are not within the same cell. + coeffs_nsc: cython.int[3] + sc_diff: cython.int[3] + inv_sc_diff: cython.int[3] + force_same_cell: cython.bint + same_cell: cython.bint + + # Boolean to store whether we should reduce row indices + grid_reduce: cython.bint = reduce_factor > 1 + # Do we need to transpose while reducing? + need_transpose: cython.bint = new_axes.shape[0] > 1 + + # Calculate the number of cells in each direction that the supercell is built of. + # This will be useful just to convert negative supercell indices to positive ones. + # If the number of supercells is 1, we assume that even intercell overlaps + # (if any) have been stored in the unit cell. This is what SIESTA does for gamma point calculations + # with nsc <= 3. + force_same_cell = True + for iaxis in range(3): + coeffs_nsc[iaxis] = coeffs_isc_off.shape[iaxis] + if coeffs_nsc[iaxis] != 1: + force_same_cell = False + + # Loop over rows. + for row in range(nrows): + # Get the potentially reduced index of the output row where we should store the + # results for this row. + reduced_row = row + if grid_reduce: + + if need_transpose: + reduced_row = transpose_raveled_index(row, grid_shape, new_axes) + + reduced_row = reduced_row // reduce_factor + + # Get the limits of this row + row_start = ptr[row] + row_end = ptr[row + 1] + + # Initialize the row value. + row_value = 0 + + # For each row, loop over pairs of columns (ij). + # We add both ij and ji contributions, therefore we only need to loop over j greater than i. + # We do this because it is very easy if orbitals i and j are in the same cell. We also save + # some computation if they are not. + for i in range(row_start, row_end): + icol = col[i] + # Initialize the value for all pairs that we found for i + i_row_value = 0 + + # Precompute the supercell index of icol (will compare it to that of jcol) + icol_sc = icol // uc_ncol + # And also its unit cell index + uc_icol = icol % uc_ncol + + # Get also the + for j in range(i, row_end): + jcol = col[j] + + jcol_sc = jcol // uc_ncol + # Get the unit cell index of jcol + uc_jcol = jcol % uc_ncol + + same_cell = force_same_cell + # If same cell interactions are not forced, we need to discover if this pair + # of columns is within the same cell. + if not force_same_cell: + same_cell = icol_sc == jcol_sc + + # If the columns are not in the same cell, we need to + # (1) Calculate the supercell offset between icol and jcol + # (2) And then calculate the new index for jcol, moving icol to the unit cell + # (3) Do the same in the reverse direction (jcol -> icol) + if not same_cell: + # Calculate the sc offset between both orbitals. + for iaxis in range(3): + sc_diff[iaxis] = data_sc_off[jcol_sc, iaxis] - data_sc_off[icol_sc, iaxis] + # Calculate also the offset in the reverse direction + inv_sc_diff[iaxis] = - sc_diff[iaxis] + + # If the sc_difference is negative, convert it to positive so that we can + # use it to index the isc_off array (we switched off the handling of negative + # indices in cython with wraparound(False)) + if sc_diff[iaxis] < 0: + sc_diff[iaxis] = coeffs_nsc[iaxis] + sc_diff[iaxis] + elif inv_sc_diff[iaxis] < 0: + inv_sc_diff[iaxis] = coeffs_nsc[iaxis] + inv_sc_diff[iaxis] + + # Get the supercell offset index of jcol with respect to icol + jpair_sc = coeffs_isc_off[sc_diff[0], sc_diff[1], sc_diff[2]] + # And use it to calculate the supercell index of the j orbital in this ij pair + sc_jcol = jpair_sc * uc_ncol + uc_jcol + + # Do the same for the ji pair + ipair_sc = coeffs_isc_off[inv_sc_diff[0], inv_sc_diff[1], inv_sc_diff[2]] + sc_icol = ipair_sc * uc_ncol + uc_icol + + # Add the contribution of this column pair to the row total value. Note that we only + # multiply the coefficients by data[j] here. This is because this loop is over all j + # that pair with a given i. data[i] is a common factor and therefore we can multiply + # after the loop to save operations. + if same_cell: + if icol == jcol: + i_row_value += coeffs[uc_icol, uc_jcol] * data[j] + else: + i_row_value += (coeffs[uc_icol, uc_jcol] + coeffs[uc_jcol, uc_icol]) * data[j] + else: + i_row_value += (coeffs[uc_icol, sc_jcol] + coeffs[uc_jcol, sc_icol]) * data[j] + + # Multiply all the contributions of ij pairs with this i by data[i], as explained inside the j loop. + i_row_value *= data[i] + + # Add the contribution of all ij pairs for this i to the row value. + row_value += i_row_value + + # Store the row value in the output + out[reduced_row] = out[reduced_row] + row_value + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.cdivision(True) +def reduce_sc_products_multicoeffs( + cython.floating[:] data, ptr: np.int32_t[:], col: np.int32_t[:], cython.floating[:, :, :] coeffs, + uc_ncol: cython.int, data_sc_off: np.int32_t[:, :], coeffs_isc_off: np.int32_t[:, :, :], + reduce_factor: cython.int, grid_shape: np.int32_t[:], new_axes: np.int32_t[:], + cython.floating[:, :] out, +): + """For each row, sums all possible products between column pairs. + + A coefficients array is accepted to weight each product. The with reduce_sc_products is that + this function accepts multiple + + This function is to be used when columns are some local property of the cell (orbital, atom...) and + there are interactions between neighbouring cells. + + For each pair of columns ij, the offset between columns is calculated. Then, the function looks + in the coeffs array (which potentially contains information for interactions between cells) for the ij pair + with that offset. + + Parameters + ---------- + data: + The data values of the sparse matrix + ptr: + The array with pointers to the start of each row for the sparse matrix. + col: + The array containing the column index for each value in the sparse matrix. + coeffs: + The dense matrix containing weights for all possible products. + uc_ncol: + Number of columns for each unit cell. + reduce_factor: + Each row index is integer divided (//) by this factor to get the row index where + the result should be stored. + grid_shape: + If you want to transpose the grid indices before reducing it, you need to specify + the (old) grid shape. If not, pass here an array of shape 0. + new_axes: + If you want to transpose the grid indices before reducing it, you need to specify + the nex axes. As an example, [1, 2, 0] will move the first axis to the last dimension. + + If you don't want to transpose, pass an array of shape 0. + out: + The array where the output should be stored. + """ + nrows: cython.int = ptr.shape[0] - 1 + + # Indices to handle rows + row: cython.int + reduced_row: cython.int + row_start: cython.int + row_end: cython.int + + # Indices to handle pairs of columns (ij) + i: cython.int + icol: cython.int + uc_icol: cython.int + sc_icol: cython.int + icol_sc: cython.int + ipair_sc: cython.int + + j: cython.int + jcol: cython.int + uc_jcol: cython.int + sc_jcol: cython.int + jcol_sc: cython.int + jpair_sc: cython.int + + # Variables to handle multiple productcoefficients + ncoeffs: cython.int = coeffs.shape[2] + icoeff: cython.int + + # Temporal storage to build values + cdef cython.floating[:] row_value = np.zeros(ncoeffs, dtype=np.asarray(data).dtype) + cdef cython.floating[:] i_row_value = np.zeros(ncoeffs, dtype=np.asarray(data).dtype) + + # Index to loop over axes of the grid. + iaxis: cython.int + + # Variables that will help managing orbital pairs that are not within the same cell. + coeffs_nsc: cython.int[3] + sc_diff: cython.int[3] + inv_sc_diff: cython.int[3] + force_same_cell: cython.bint + same_cell: cython.bint + + # Boolean to store whether we should reduce row indices + grid_reduce: cython.bint = reduce_factor > 1 + # Do we need to transpose while reducing? + need_transpose: cython.bint = new_axes.shape[0] > 1 + + # Calculate the number of cells in each direction that the supercell is built of. + # This will be useful just to convert negative supercell indices to positive ones. + # If the number of supercells is 1, we assume that even intercell overlaps + # (if any) have been stored in the unit cell. This is what SIESTA does for gamma point calculations + # with nsc <= 3. + force_same_cell = True + for iaxis in range(3): + coeffs_nsc[iaxis] = coeffs_isc_off.shape[iaxis] + if coeffs_nsc[iaxis] != 1: + force_same_cell = False + + # Loop over rows. + for row in range(nrows): + # Get the potentially reduced index of the output row where we should store the + # results for this row. + reduced_row = row + if grid_reduce: + + if need_transpose: + reduced_row = transpose_raveled_index(row, grid_shape, new_axes) + + reduced_row = reduced_row // reduce_factor + + # Get the limits of this row + row_start = ptr[row] + row_end = ptr[row + 1] + + # Initialize the row value. + row_value[:] = 0 + + # For each row, loop over pairs of columns (ij). + # We add both ij and ji contributions, therefore we only need to loop over j greater than i. + # We do this because it is very easy if orbitals i and j are in the same cell. We also save + # some computation if they are not. + for i in range(row_start, row_end): + icol = col[i] + # Initialize the value for all pairs that we found for i + i_row_value[:] = 0 + + # Precompute the supercell index of icol (will compare it to that of jcol) + icol_sc = icol // uc_ncol + # And also its unit cell index + uc_icol = icol % uc_ncol + + # Get also the + for j in range(i, row_end): + jcol = col[j] + + jcol_sc = jcol // uc_ncol + # Get the unit cell index of jcol + uc_jcol = jcol % uc_ncol + + same_cell = force_same_cell + # If same cell interactions are not forced, we need to discover if this pair + # of columns is within the same cell. + if not force_same_cell: + same_cell = icol_sc == jcol_sc + + # If the columns are not in the same cell, we need to + # (1) Calculate the supercell offset between icol and jcol + # (2) And then calculate the new index for jcol, moving icol to the unit cell + # (3) Do the same in the reverse direction (jcol -> icol) + if not same_cell: + # Calculate the sc offset between both orbitals. + for iaxis in range(3): + sc_diff[iaxis] = data_sc_off[jcol_sc, iaxis] - data_sc_off[icol_sc, iaxis] + # Calculate also the offset in the reverse direction + inv_sc_diff[iaxis] = - sc_diff[iaxis] + + # If the sc_difference is negative, convert it to positive so that we can + # use it to index the isc_off array (we switched off the handling of negative + # indices in cython with wraparound(False)) + if sc_diff[iaxis] < 0: + sc_diff[iaxis] = coeffs_nsc[iaxis] + sc_diff[iaxis] + elif inv_sc_diff[iaxis] < 0: + inv_sc_diff[iaxis] = coeffs_nsc[iaxis] + inv_sc_diff[iaxis] + + # Get the supercell offset index of jcol with respect to icol + jpair_sc = coeffs_isc_off[sc_diff[0], sc_diff[1], sc_diff[2]] + # And use it to calculate the supercell index of the j orbital in this ij pair + sc_jcol = jpair_sc * uc_ncol + uc_jcol + + # Do the same for the ji pair + ipair_sc = coeffs_isc_off[inv_sc_diff[0], inv_sc_diff[1], inv_sc_diff[2]] + sc_icol = ipair_sc * uc_ncol + uc_icol + + # Add the contribution of this column pair to the row total value. Note that we only + # multiply the coefficients by data[j] here. This is because this loop is over all j + # that pair with a given i. data[i] is a common factor and therefore we can multiply + # after the loop to save operations. + + # Do it for all coefficients. + for icoeff in range(ncoeffs): + if same_cell: + if icol == jcol: + i_row_value[icoeff] += coeffs[uc_icol, uc_jcol, icoeff] * data[j] + else: + i_row_value[icoeff] += (coeffs[uc_icol, uc_jcol, icoeff] + coeffs[uc_jcol, uc_icol, icoeff]) * data[j] + else: + i_row_value[icoeff] += (coeffs[uc_icol, sc_jcol, icoeff] + coeffs[uc_jcol, sc_icol, icoeff]) * data[j] + + for icoeff in range(ncoeffs): + # Multiply all the contributions of ij pairs with this i by data[i], as explained inside the j loop. + i_row_value[icoeff] *= data[i] + + # Add the contribution of all ij pairs for this i to the row value. + row_value[icoeff] += i_row_value[icoeff] + + for icoeff in range(ncoeffs): + # Store the row value in the output + out[reduced_row, icoeff] = out[reduced_row, icoeff] + row_value[icoeff] + +@cython.boundscheck(False) +@cython.wraparound(False) +@cython.cdivision(True) +def reduce_sc_products_multicoeffs_sparse_denseindex( + cython.floating[:] data, ptr: np.int32_t[:], col: np.int32_t[:], coeffs_csr: SparseCSR, + uc_ncol: cython.int, data_sc_off: np.int32_t[:, :], coeffs_isc_off: np.int32_t[:, :, :], + reduce_factor: cython.int, grid_shape: np.int32_t[:], new_axes: np.int32_t[:], + cython.floating[:, :] out, +): + """For each row, sums all possible products between column pairs. + + A coefficients array is accepted to weight each product. The with reduce_sc_products is that + this function accepts multiple + + This function is to be used when columns are some local property of the cell (orbital, atom...) and + there are interactions between neighbouring cells. + + For each pair of columns ij, the offset between columns is calculated. Then, the function looks + in the coeffs array (which potentially contains information for interactions between cells) for the ij pair + with that offset. + + Parameters + ---------- + data: + The data values of the sparse matrix + ptr: + The array with pointers to the start of each row for the sparse matrix. + col: + The array containing the column index for each value in the sparse matrix. + coeffs: + The dense matrix containing weights for all possible products. + uc_ncol: + Number of columns for each unit cell. + reduce_factor: + Each row index is integer divided (//) by this factor to get the row index where + the result should be stored. + grid_shape: + If you want to transpose the grid indices before reducing it, you need to specify + the (old) grid shape. If not, pass here an array of shape 0. + new_axes: + If you want to transpose the grid indices before reducing it, you need to specify + the nex axes. As an example, [1, 2, 0] will move the first axis to the last dimension. + + If you don't want to transpose, pass an array of shape 0. + out: + The array where the output should be stored. + """ + nrows: cython.int = ptr.shape[0] - 1 + + # Indices to handle rows + row: cython.int + reduced_row: cython.int + row_start: cython.int + row_end: cython.int + + # Indices to handle pairs of columns (ij) + i: cython.int + icol: cython.int + uc_icol: cython.int + sc_icol: cython.int + icol_sc: cython.int + ipair_sc: cython.int + + j: cython.int + jcol: cython.int + uc_jcol: cython.int + sc_jcol: cython.int + jcol_sc: cython.int + jpair_sc: cython.int + + # Extra variables to handle coefficients + cdef cython.floating[:, :] coeffs = coeffs_csr.data + # Get the array containing all the dense indices. + dense_idx: np.int32_t[:, :] = dense_index(coeffs_csr.shape[:2], coeffs_csr.ptr, coeffs_csr.col) + coeff_index: cython.int + coeff_index2: cython.int + + # Variables to handle multiple productcoefficients + ncoeffs: cython.int = coeffs.shape[1] + icoeff: cython.int + + # Temporal storage to build values + cdef cython.floating[:] row_value = np.zeros(ncoeffs, dtype=np.asarray(data).dtype) + cdef cython.floating[:] i_row_value = np.zeros(ncoeffs, dtype=np.asarray(data).dtype) + + # Index to loop over axes of the grid. + iaxis: cython.int + + # Variables that will help managing orbital pairs that are not within the same cell. + coeffs_nsc: cython.int[3] + sc_diff: cython.int[3] + inv_sc_diff: cython.int[3] + force_same_cell: cython.bint + same_cell: cython.bint + + # Boolean to store whether we should reduce row indices + grid_reduce: cython.bint = reduce_factor > 1 + # Do we need to transpose while reducing? + need_transpose: cython.bint = new_axes.shape[0] > 1 + + # Calculate the number of cells in each direction that the supercell is built of. + # This will be useful just to convert negative supercell indices to positive ones. + # If the number of supercells is 1, we assume that even intercell overlaps + # (if any) have been stored in the unit cell. This is what SIESTA does for gamma point calculations + # with nsc <= 3. + force_same_cell = True + for iaxis in range(3): + coeffs_nsc[iaxis] = coeffs_isc_off.shape[iaxis] + if coeffs_nsc[iaxis] != 1: + force_same_cell = False + + # Loop over rows. + for row in range(nrows): + # Get the potentially reduced index of the output row where we should store the + # results for this row. + reduced_row = row + if grid_reduce: + + if need_transpose: + reduced_row = transpose_raveled_index(row, grid_shape, new_axes) + + reduced_row = reduced_row // reduce_factor + + # Get the limits of this row + row_start = ptr[row] + row_end = ptr[row + 1] + + # Initialize the row value. + row_value[:] = 0 + + # For each row, loop over pairs of columns (ij). + # We add both ij and ji contributions, therefore we only need to loop over j greater than i. + # We do this because it is very easy if orbitals i and j are in the same cell. We also save + # some computation if they are not. + for i in range(row_start, row_end): + icol = col[i] + # Initialize the value for all pairs that we found for i + i_row_value[:] = 0 + + # Precompute the supercell index of icol (will compare it to that of jcol) + icol_sc = icol // uc_ncol + # And also its unit cell index + uc_icol = icol % uc_ncol + + # Get also the + for j in range(i, row_end): + jcol = col[j] + + jcol_sc = jcol // uc_ncol + # Get the unit cell index of jcol + uc_jcol = jcol % uc_ncol + + same_cell = force_same_cell + # If same cell interactions are not forced, we need to discover if this pair + # of columns is within the same cell. + if not force_same_cell: + same_cell = icol_sc == jcol_sc + + # If the columns are not in the same cell, we need to + # (1) Calculate the supercell offset between icol and jcol + # (2) And then calculate the new index for jcol, moving icol to the unit cell + # (3) Do the same in the reverse direction (jcol -> icol) + if not same_cell: + # Calculate the sc offset between both orbitals. + for iaxis in range(3): + sc_diff[iaxis] = data_sc_off[jcol_sc, iaxis] - data_sc_off[icol_sc, iaxis] + # Calculate also the offset in the reverse direction + inv_sc_diff[iaxis] = - sc_diff[iaxis] + + # If the sc_difference is negative, convert it to positive so that we can + # use it to index the isc_off array (we switched off the handling of negative + # indices in cython with wraparound(False)) + if sc_diff[iaxis] < 0: + sc_diff[iaxis] = coeffs_nsc[iaxis] + sc_diff[iaxis] + elif inv_sc_diff[iaxis] < 0: + inv_sc_diff[iaxis] = coeffs_nsc[iaxis] + inv_sc_diff[iaxis] + + # Get the supercell offset index of jcol with respect to icol + jpair_sc = coeffs_isc_off[sc_diff[0], sc_diff[1], sc_diff[2]] + # And use it to calculate the supercell index of the j orbital in this ij pair + sc_jcol = jpair_sc * uc_ncol + uc_jcol + + # Do the same for the ji pair + ipair_sc = coeffs_isc_off[inv_sc_diff[0], inv_sc_diff[1], inv_sc_diff[2]] + sc_icol = ipair_sc * uc_ncol + uc_icol + + # Get the index needed to find the coefficients that we want from the coeffs array. + if same_cell: + if icol == jcol: + coeff_index = dense_idx[uc_icol, uc_jcol] + coeff_index2 = 0 + else: + coeff_index = dense_idx[uc_icol, uc_jcol] + coeff_index2 = dense_idx[uc_jcol, uc_icol] + else: + coeff_index = dense_idx[uc_icol, sc_jcol] + coeff_index2 = dense_idx[uc_jcol, sc_icol] + + # If the index for the needed (row, col) element is -1, it means that the element is 0. + # Just go to next iteration if all elements that we need are 0. Note that we assume here + # that if (row, col) is zero (col, row) is also zero. + if coeff_index < 0: + continue + + # Add the contribution of this column pair to the row total value. Note that we only + # multiply the coefficients by data[j] here. This is because this loop is over all j + # that pair with a given i. data[i] is a common factor and therefore we can multiply + # after the loop to save operations. + + # Do it for all coefficients. + for icoeff in range(ncoeffs): + if same_cell: + if icol == jcol: + i_row_value[icoeff] += coeffs[coeff_index, icoeff] * data[j] + else: + i_row_value[icoeff] += (coeffs[coeff_index, icoeff] + coeffs[coeff_index2, icoeff]) * data[j] + else: + i_row_value[icoeff] += (coeffs[coeff_index, icoeff] + coeffs[coeff_index2, icoeff]) * data[j] + + for icoeff in range(ncoeffs): + # Multiply all the contributions of ij pairs with this i by data[i], as explained inside the j loop. + i_row_value[icoeff] *= data[i] + + # Add the contribution of all ij pairs for this i to the row value. + row_value[icoeff] += i_row_value[icoeff] + + for icoeff in range(ncoeffs): + # Store the row value in the output + out[reduced_row, icoeff] = out[reduced_row, icoeff] + row_value[icoeff] diff --git a/src/sisl/physics/densitymatrix.py b/src/sisl/physics/densitymatrix.py index 5112653604..770479cbb9 100644 --- a/src/sisl/physics/densitymatrix.py +++ b/src/sisl/physics/densitymatrix.py @@ -5,7 +5,7 @@ import math as m from numbers import Integral -from typing import Optional +from typing import Literal, Optional import numpy as np from numpy import add, dot, logical_and, repeat, subtract, unique @@ -15,7 +15,7 @@ import sisl._array as _a from sisl import BoundaryCondition as BC -from sisl import Geometry, Lattice +from sisl import Geometry, Grid, Lattice from sisl._core.sparse import SparseCSR, _ncol_to_indptr, _to_coo from sisl._core.sparse_geometry import SparseAtom, SparseOrbital from sisl._indices import indices_fabs_le, indices_le @@ -688,7 +688,15 @@ def get_BO(geom, D, S, rows, cols): "0.15", "0.16", ) - def density(self, grid, spinor=None, atol: float = 1e-7, eta=None): + def density( + self, + grid: Grid, + spinor=None, + atol: float = 1e-7, + eta: Optional[bool] = False, + method: Literal["pre-compute", "direct"] = "pre-compute", + **kwargs, + ): r"""Expand the density matrix to the charge density on a grid This routine calculates the real-space density components on a specified grid. @@ -727,29 +735,23 @@ def density(self, grid, spinor=None, atol: float = 1e-7, eta=None): the tolerance, they will be treated as strictly zeros. eta : bool, optional show a progressbar on stdout - """ - geometry = self.geometry - # Check that the atomic coordinates, really are all within the intrinsic supercell. - # If not, it may mean that the DM does not conform to the primary unit-cell paradigm - # of matrix elements. It complicates things. - fxyz = geometry.fxyz - f_min = fxyz.min() - f_max = fxyz.max() - del fxyz, f_min, f_max + method: + It determines if the orbital values are computed on the fly (direct) or they are all pre-computed + on the grid at the beginning(pre-compute). + Pre computing orbitals results in a faster computation, but it requires more memory. - # Extract sub variables used throughout the loop - shape = _a.asarrayi(grid.shape) - dcell = grid.dcell - - # Sparse matrix data - csr = self._csr + Notes + ----- - # In the following we don't care about division - # So 1) save error state, 2) turn off divide by 0, 3) calculate, 4) turn on old error state - old_err = np.seterr(divide="ignore", invalid="ignore") + The `method` argument may change at will since this is an experimental feature. + """ + # Translate the density matrix to have all the unit cell atoms actually inside + # the unit cell, since this will facilitate things greatly and it gives the + # same result. + uc_dm = self.translate2uc() - # Placeholder for the resulting coefficients - DM = None + # Get the DM components with which we want to compute the density + csr = uc_dm._csr if self.spin.kind > Spin.POLARIZED: if spinor is None: # Default to the total density @@ -779,6 +781,15 @@ def density(self, grid, spinor=None, atol: float = 1e-7, eta=None): # Perform dot-product with spinor, and take out the diagonal real part DM = dot(DM, spinor.T)[:, [0, 1], [0, 1]].sum(1).real + # Create the DM csr matrix. + csrDM = csr_matrix( + (DM, csr.col[idx], _ncol_to_indptr(csr.ncol)), + shape=(self.shape[:2]), + dtype=DM.dtype, + ) + + del idx, DM + elif self.spin.kind == Spin.POLARIZED: if spinor is None: spinor = _a.onesd(2) @@ -797,22 +808,50 @@ def density(self, grid, spinor=None, atol: float = 1e-7, eta=None): "argument as an integer, or a vector of length 2" ) - idx = _a.array_arange(csr.ptr[:-1], n=csr.ncol) - DM = csr._D[idx, 0] * spinor[0] + csr._D[idx, 1] * spinor[1] + csrDM = csr.tocsr(dim=0) * spinor[0] + csr.tocsr(dim=1) * spinor[1] else: - idx = _a.array_arange(csr.ptr[:-1], n=csr.ncol) - DM = csr._D[idx, 0] + csrDM = csr.tocsr(dim=0) - # Create the DM csr matrix. - csrDM = csr_matrix( - (DM, csr.col[idx], _ncol_to_indptr(csr.ncol)), - shape=(self.shape[:2]), - dtype=DM.dtype, - ) + if method == "pre-compute": + # Compute orbital values on the grid + psi_values = uc_dm.geometry._orbital_values(grid.shape) + + psi_values.reduce_orbital_products( + csrDM, uc_dm.lattice, out=grid.grid, **kwargs + ) + elif method == "direct": + self._density_direct(grid, csrDM, atol=atol, eta=eta) + + def _density_direct( + self, grid: Grid, csrDM, atol: float = 1e-7, eta: Optional[bool] = None + ): + r"""Compute the density by calculating the orbital values on the fly. - # Clean-up - del idx, DM + Parameters + ---------- + grid : Grid + the grid on which to add the density (the density is in ``e/Ang^3``) + spinor : (2,) or (2, 2), optional + the spinor matrix to obtain the diagonal components of the density. For un-polarized density matrices + this keyword has no influence. For spin-polarized it *has* to be either 1 integer or a vector of + length 2 (defaults to total density). + For non-collinear/spin-orbit density matrices it has to be a 2x2 matrix (defaults to total density). + atol : float, optional + DM tolerance for accepted values. For all density matrix elements with absolute values below + the tolerance, they will be treated as strictly zeros. + eta : bool, optional + show a progressbar on stdout + """ + geometry = self.geometry + + # Extract sub variables used throughout the loop + shape = _a.asarrayi(grid.shape) + dcell = grid.dcell + + # In the following we don't care about division + # So 1) save error state, 2) turn off divide by 0, 3) calculate, 4) turn on old error state + old_err = np.seterr(divide="ignore", invalid="ignore") # To heavily speed up the construction of the density we can recreate # the sparse csrDM matrix by summing the lower and upper triangular part. diff --git a/src/sisl/physics/tests/test_density_matrix.py b/src/sisl/physics/tests/test_density_matrix.py index dc263a2936..a7fe7e160a 100644 --- a/src/sisl/physics/tests/test_density_matrix.py +++ b/src/sisl/physics/tests/test_density_matrix.py @@ -88,6 +88,11 @@ def func(D, ia, atoms, atoms_xyz): return t() +@pytest.fixture(scope="module", params=["direct", "pre-compute"]) +def density_method(request): + return request.param + + @pytest.mark.physics @pytest.mark.densitymatrix class TestDensityMatrix: @@ -183,14 +188,14 @@ def test_bond_order(self, setup, method, option, projection): assert isinstance(BO, SparseOrbital) assert BO.shape[:2] == (D.geometry.no, D.geometry.no_s) - def test_rho1(self, setup): + def test_rho1(self, setup, density_method): D = setup.D.copy() D.construct(setup.func) grid = Grid(0.2, geometry=setup.D.geometry) - D.density(grid) + D.density(grid, method=density_method) @pytest.mark.filterwarnings("ignore", message="*is NOT Hermitian for on-site") - def test_rho2(self): + def test_rho2(self, density_method): bond = 1.42 sq3h = 3.0**0.5 * 0.5 lattice = Lattice( @@ -214,23 +219,23 @@ def test_rho2(self): D = DensityMatrix(g) D.construct([[0.1, bond + 0.01], [1.0, 0.1]]) grid = Grid(0.2, geometry=D.geometry) - D.density(grid) + D.density(grid, method=density_method) D = DensityMatrix(g, spin=Spin("P")) D.construct([[0.1, bond + 0.01], [(1.0, 0.5), (0.1, 0.1)]]) grid = Grid(0.2, geometry=D.geometry) - D.density(grid) - D.density(grid, [1.0, -1]) - D.density(grid, 0) - D.density(grid, 1) + D.density(grid, method=density_method) + D.density(grid, [1.0, -1], method=density_method) + D.density(grid, 0, method=density_method) + D.density(grid, 1, method=density_method) D = DensityMatrix(g, spin=Spin("NC")) D.construct( [[0.1, bond + 0.01], [(1.0, 0.5, 0.01, 0.01), (0.1, 0.1, 0.1, 0.1)]] ) grid = Grid(0.2, geometry=D.geometry) - D.density(grid) - D.density(grid, [[1.0, 0.0], [0.0, -1]]) + D.density(grid, method=density_method) + D.density(grid, [[1.0, 0.0], [0.0, -1]], method=density_method) D = DensityMatrix(g, spin=Spin("SO")) D.construct( @@ -243,11 +248,11 @@ def test_rho2(self): ] ) grid = Grid(0.2, geometry=D.geometry) - D.density(grid) - D.density(grid, [[1.0, 0.0], [0.0, -1]]) - D.density(grid, Spin.X) - D.density(grid, Spin.Y) - D.density(grid, Spin.Z) + D.density(grid, method=density_method) + D.density(grid, [[1.0, 0.0], [0.0, -1]], method=density_method) + D.density(grid, Spin.X, method=density_method) + D.density(grid, Spin.Y, method=density_method) + D.density(grid, Spin.Z, method=density_method) @pytest.mark.filterwarnings("ignore", message="*is NOT Hermitian for on-site") def test_orbital_momentum(self): @@ -474,20 +479,20 @@ def test_spin_rotate_so(self): assert not np.allclose(D_mull, d_mull) assert np.allclose(D_mull[0], d_mull[0]) - def test_rho_eta(self, setup): + def test_rho_eta(self, setup, density_method): D = setup.D.copy() D.construct(setup.func) grid = Grid(0.2, geometry=setup.D.geometry) - D.density(grid, eta=True) + D.density(grid, eta=True, method=density_method) - def test_rho_smaller_grid1(self, setup): + def test_rho_smaller_grid1(self, setup, density_method): D = setup.D.copy() D.construct(setup.func) lattice = setup.D.geometry.cell.copy() / 2 grid = Grid(0.2, geometry=setup.D.geometry.copy(), lattice=lattice) - D.density(grid) + D.density(grid, method=density_method) - def test_rho_fail_p(self): + def test_rho_fail_p(self, density_method): bond = 1.42 sq3h = 3.0**0.5 * 0.5 lattice = Lattice( @@ -513,9 +518,9 @@ def test_rho_fail_p(self): D.construct([[0.1, bond + 0.01], [(1.0, 0.5), (0.1, 0.1)]]) grid = Grid(0.2, geometry=D.geometry) with pytest.raises(ValueError): - D.density(grid, [1.0, -1, 0.0]) + D.density(grid, [1.0, -1, 0.0], method=density_method) - def test_rho_fail_nc(self): + def test_rho_fail_nc(self, density_method): bond = 1.42 sq3h = 3.0**0.5 * 0.5 lattice = Lattice( @@ -543,7 +548,7 @@ def test_rho_fail_nc(self): ) grid = Grid(0.2, geometry=D.geometry) with pytest.raises(ValueError): - D.density(grid, [1.0, 0.0]) + D.density(grid, [1.0, 0.0], method=density_method) def test_pickle(self, setup): import pickle as p diff --git a/src/sisl/tests/test_sparse_grid.py b/src/sisl/tests/test_sparse_grid.py new file mode 100644 index 0000000000..24cfff6c30 --- /dev/null +++ b/src/sisl/tests/test_sparse_grid.py @@ -0,0 +1,312 @@ +import numpy as np +import pytest + +import sisl +from sisl import Grid +from sisl._sparse_grid_ops import transpose_raveled_index + + +@pytest.fixture +def geometry(): + + r = np.linspace(0, 3.5, 50) + f = np.exp(-r) + + orb = sisl.AtomicOrbital("2pzZ", (r, f)) + geom = sisl.geom.graphene(orthogonal=True, atoms=sisl.Atom(6, orb)) + + return geom + + +@pytest.fixture +def grid_shape(): + return (8, 10, 12) + + +@pytest.fixture +def psi_values(geometry, grid_shape): + return geometry._orbital_values(grid_shape) + + +@pytest.fixture +def H(geometry): + H = sisl.Hamiltonian(geometry) + H.construct( + [(0.1, 1.44), (0, -2.7)], + ) + return H + + +def test_transpose_raveled(): + """Tests the cython implemented function that transposes + raveled indices.""" + + # Define the reference function, which calls numpy + def transpose_raveled_numpy(index, grid_shape, new_order): + grid_shape = np.array(grid_shape) + + unraveled = np.unravel_index(index, grid_shape) + unraveled = np.array(unraveled) + new_grid_shape = grid_shape[new_order] + new_unraveled = unraveled[new_order] + + return np.ravel_multi_index(new_unraveled, new_grid_shape) + + grid_shape = np.array([3, 4, 5], dtype=np.int32) + grid_size = np.prod(grid_shape) + # Move the first axis to the last dimension. + new_order = np.array([1, 2, 0], dtype=np.int32) + + for i in range(grid_size): + transposed = transpose_raveled_index(i, grid_shape, new_order) + transposed_np = transpose_raveled_numpy(i, grid_shape, new_order) + + assert transposed == transposed_np + + # Move the last axis to the first dimension. + new_order = np.array([2, 0, 1], dtype=np.int32) + + for i in range(grid_size): + transposed = transpose_raveled_index(i, grid_shape, new_order) + transposed_np = transpose_raveled_numpy(i, grid_shape, new_order) + + assert transposed == transposed_np + + +@pytest.mark.parametrize("k", [(0, 0, 0), (0.5, 0, 0)]) +def test_wavefunction_correct(H, grid_shape, psi_values, k): + """Checks that the wavefunction computed with the precalculated + psi values is the same as the wavefunction computed directly.""" + + eig = H.eigenstate(k=k)[0] + + wf_grid = sisl.Grid(grid_shape, geometry=H.geometry, dtype=np.complex128) + + eig.wavefunction(wf_grid) + + from_psi_grid = psi_values.reduce_orbitals(eig.state.T, k=k) + + assert np.allclose(from_psi_grid.grid, wf_grid.grid) + + +@pytest.mark.parametrize( + ["k", "ncoeffs"], + [[(0, 0, 0), 1], [(0, 0, 0), 2], [(0.25, 0, 0), 1], [(0.25, 0, 0), 2]], +) +def test_onthefly_reduction(geometry, psi_values, k, ncoeffs): + """Checks that the on the fly reduction produces the same + results as computing the whole grid and then reducing.""" + + coeffs = np.random.random((geometry.no, ncoeffs)) + + not_reduced = psi_values.reduce_orbitals(coeffs, k=k) + + # Reducing the last axis + reduced = psi_values.reduce_orbitals(coeffs, k=k, reduce_grid=(2,)) + + post_reduced = not_reduced.sum(2) + assert reduced.size == post_reduced.size + if ncoeffs == 1: + assert np.allclose(reduced.grid, post_reduced.grid) + else: + assert np.allclose(reduced.reshape(post_reduced.shape), post_reduced) + + # Reducing the two last axes + reduced = psi_values.reduce_orbitals(coeffs, k=k, reduce_grid=(2, 1)) + + post_reduced = not_reduced.sum(2).sum(1) + assert reduced.size == post_reduced.size + if ncoeffs == 1: + assert np.allclose(reduced.grid, post_reduced.grid) + else: + assert np.allclose(reduced.reshape(post_reduced.shape), post_reduced) + + # Now we are going to reduce the leading axes. This is more involved, so + # it is more likely to fail. + + # Reducing the first axis + reduced = psi_values.reduce_orbitals(coeffs, k=k, reduce_grid=(0,)) + + post_reduced = not_reduced.sum(0) + assert reduced.size == post_reduced.size + if ncoeffs == 1: + assert np.allclose(reduced.grid, post_reduced.grid) + else: + assert np.allclose(reduced.reshape(post_reduced.shape), post_reduced) + + # Reducing the first and second axis + reduced = psi_values.reduce_orbitals(coeffs, k=k, reduce_grid=(0, 1)) + + post_reduced = not_reduced.sum(1).sum(0) + assert reduced.size == post_reduced.size + if ncoeffs == 1: + assert np.allclose(reduced.grid, post_reduced.grid) + else: + assert np.allclose(reduced.reshape(post_reduced.shape), post_reduced) + + +def test_orbital_products(geometry): + """Very simple tests to see if the orbital products are computed correctly""" + geometry = geometry.copy() + geometry.set_nsc([1, 1, 1]) + geometry.lattice.set_boundary_condition("open") + + # Don't get periodic contributions for orbital values, then predicting + # grid values will be much easier. + psi_values = geometry._orbital_values((10, 10, 10)) + + orb_csr = psi_values._csr.tocsr() + orb_0 = orb_csr[:, 0].toarray().ravel() + orb_1 = orb_csr[:, 1].toarray().ravel() + + # Compute the orbital products with one coefficient + DM = sisl.DensityMatrix(geometry, dim=1, dtype=np.float64) + DM[0, 0] = 1.0 + + dens = psi_values.reduce_orbital_products(DM.tocsr(), geometry.sc) + assert isinstance(dens, Grid) + assert dens.shape == psi_values.grid_shape + assert np.any(dens.grid != 0) + + predicted = orb_0**2 + + assert np.allclose(dens.grid.ravel(), predicted) + + # Compute the orbital products with two diagonal coefficients + DM = sisl.DensityMatrix(geometry, dim=1, dtype=np.float64) + DM[0, 0] = 1.0 + DM[1, 1] = 2.0 + + dens = psi_values.reduce_orbital_products(DM.tocsr(), DM.sc) + predicted = (orb_0**2) * 1.0 + (orb_1**2) * 2.0 + + assert np.allclose(dens.grid.ravel(), predicted) + + # Compute the orbital products with an off-diagonal coefficient + DM = sisl.DensityMatrix(geometry, dim=1, dtype=np.float64) + DM[0, 0] = 1.0 + DM[0, 1] = 2.0 + + dens = psi_values.reduce_orbital_products(DM.tocsr(), DM.sc) + predicted = (orb_0**2) * 1.0 + (orb_0 * orb_1) * 2.0 + + assert np.allclose(dens.grid.ravel(), predicted) + + # Compute the orbital products with both opposite off-diagonal coefficients + DM = sisl.DensityMatrix(geometry, dim=1, dtype=np.float64) + DM[0, 0] = 1.0 + DM[0, 1] = 1.0 + DM[1, 0] = 0.5 + + dens = psi_values.reduce_orbital_products(DM.tocsr(), DM.sc) + predicted = (orb_0**2) * 1.0 + (orb_0 * orb_1) * 1.5 + + assert np.allclose(dens.grid.ravel(), predicted) + + +def test_orbital_products_periodic(geometry): + """Very simple tests to see if the orbital products are computed correctly + when there are periodic conditions. + """ + geometry = geometry.copy() + geometry.set_nsc([3, 1, 1]) + geometry.lattice.set_boundary_condition(a="periodic", b="open", c="open") + + # Don't get periodic contributions for orbital values, then predicting + # grid values will be much easier. + psi_values = geometry._orbital_values((10, 10, 10)) + + orb_csr = psi_values._csr.tocsr() + orb_0 = orb_csr[:, [0, geometry.no, 2 * geometry.no]].toarray() + orb_1 = orb_csr[:, [1, geometry.no + 1, 2 * geometry.no + 1]].toarray() + + # Compute the orbital products with one coefficient + DM = sisl.DensityMatrix(geometry, dim=1, dtype=np.float64) + DM[0, 0] = 1.0 + + dens = psi_values.reduce_orbital_products(DM.tocsr(), geometry.sc) + + predicted = (orb_0**2).sum(axis=1) + + assert np.allclose(dens.grid.ravel(), predicted) + + # Compute the orbital products with two diagonal coefficients + DM = sisl.DensityMatrix(geometry, dim=1, dtype=np.float64) + DM[0, 0] = 1.0 + DM[1, 1] = 2.0 + + dens = psi_values.reduce_orbital_products(DM.tocsr(), DM.sc) + predicted = (orb_0**2).sum(axis=1) + (orb_1**2).sum(axis=1) * 2.0 + + assert np.allclose(dens.grid.ravel(), predicted) + + # Predicting the result with periodic non-diagonal coefficients is a bit more involved. + # We don't check for now. + + +@pytest.mark.parametrize("ncoeffs", [1, 2]) +def test_orbital_products_onthefly_reduction(geometry, psi_values, ncoeffs): + """Checks that the on the fly reduction produces the same + results as computing the whole grid and then reducing.""" + + # Build a test DM + DM = sisl.DensityMatrix(geometry, dim=1, dtype=np.float64) + DM[0, 0] = 1.0 + DM[1, 1] = 2.0 + DM[2, 2] = 0.75 + DM[3, 3] = 0.92 + DM[0, 1] = 0.5 + DM[1, 0] = 0.5 + + csr = DM.tocsr() + # If this run is to be done with multiple coefficients, build the csr + # matrix with dim > 1. + if ncoeffs > 1: + csr = sisl.SparseCSR(csr, dim=ncoeffs) + csr.data[:, :] = csr.data[:, 0].reshape(-1, 1) + + # Compute the values on the full grid + not_reduced = psi_values.reduce_orbital_products(csr, DM.sc) + + # Reducing the last axis + reduced = psi_values.reduce_orbital_products(csr, DM.sc, reduce_grid=(2,)) + + post_reduced = not_reduced.sum(2) + assert reduced.size == post_reduced.size + if ncoeffs == 1: + assert np.allclose(reduced.grid, post_reduced.grid) + else: + assert np.allclose(reduced.reshape(post_reduced.shape), post_reduced) + + # Reducing the two last axes + reduced = psi_values.reduce_orbital_products(csr, DM.sc, reduce_grid=(2, 1)) + + post_reduced = not_reduced.sum(2).sum(1) + assert reduced.size == post_reduced.size + if ncoeffs == 1: + assert np.allclose(reduced.grid, post_reduced.grid) + else: + assert np.allclose(reduced.reshape(post_reduced.shape), post_reduced) + + # Now we are going to reduce the leading axes. This is more involved, so + # it is more likely to fail. + + # Reducing the first axis + reduced = psi_values.reduce_orbital_products(csr, DM.sc, reduce_grid=(0,)) + + post_reduced = not_reduced.sum(0) + assert reduced.size == post_reduced.size + if ncoeffs == 1: + assert np.allclose(reduced.grid, post_reduced.grid) + else: + assert np.allclose(reduced.reshape(post_reduced.shape), post_reduced) + + # Reducing the first and second axis + reduced = psi_values.reduce_orbital_products(csr, DM.sc, reduce_grid=(0, 1)) + + post_reduced = not_reduced.sum(1).sum(0) + assert reduced.size == post_reduced.size + if ncoeffs == 1: + assert np.allclose(reduced.grid, post_reduced.grid) + else: + assert np.allclose(reduced.reshape(post_reduced.shape), post_reduced) From 289b685f0b20a8a384a1e4d8694a8427b48fc178 Mon Sep 17 00:00:00 2001 From: Pol Febrer Date: Fri, 15 Mar 2024 22:22:19 +0100 Subject: [PATCH 2/5] fix tests --- src/sisl/_sparse_grid.py | 8 ++++++-- src/sisl/tests/test_sparse_grid.py | 22 +++++++++++----------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/sisl/_sparse_grid.py b/src/sisl/_sparse_grid.py index bf349f569b..56a531c848 100644 --- a/src/sisl/_sparse_grid.py +++ b/src/sisl/_sparse_grid.py @@ -412,7 +412,6 @@ def reduce_orbital_products( if sparse_coeffs: if multi_coeffs: reduce_func = reduce_sc_products_multicoeffs_sparse_denseindex - weights.astype = weights.copy else: reduce_func = reduce_sc_products weights = weights.tocsr().toarray(order="C") @@ -438,11 +437,16 @@ def reduce_orbital_products( out = out.ravel() + if isinstance(weights, SparseCSR): + weights = weights.copy(dtype=dtype) + else: + weights = weights.astype(dtype) + reduce_func( csr.data[:, 0].astype(dtype), csr.ptr, csr.col, - weights.astype(dtype=dtype), + weights, self.geometry.no, self.lattice.sc_off, weights_lattice.isc_off, diff --git a/src/sisl/tests/test_sparse_grid.py b/src/sisl/tests/test_sparse_grid.py index 24cfff6c30..17800fbc97 100644 --- a/src/sisl/tests/test_sparse_grid.py +++ b/src/sisl/tests/test_sparse_grid.py @@ -163,7 +163,7 @@ def test_orbital_products(geometry): DM = sisl.DensityMatrix(geometry, dim=1, dtype=np.float64) DM[0, 0] = 1.0 - dens = psi_values.reduce_orbital_products(DM.tocsr(), geometry.sc) + dens = psi_values.reduce_orbital_products(DM.tocsr(), geometry.lattice) assert isinstance(dens, Grid) assert dens.shape == psi_values.grid_shape assert np.any(dens.grid != 0) @@ -177,7 +177,7 @@ def test_orbital_products(geometry): DM[0, 0] = 1.0 DM[1, 1] = 2.0 - dens = psi_values.reduce_orbital_products(DM.tocsr(), DM.sc) + dens = psi_values.reduce_orbital_products(DM.tocsr(), DM.lattice) predicted = (orb_0**2) * 1.0 + (orb_1**2) * 2.0 assert np.allclose(dens.grid.ravel(), predicted) @@ -187,7 +187,7 @@ def test_orbital_products(geometry): DM[0, 0] = 1.0 DM[0, 1] = 2.0 - dens = psi_values.reduce_orbital_products(DM.tocsr(), DM.sc) + dens = psi_values.reduce_orbital_products(DM.tocsr(), DM.lattice) predicted = (orb_0**2) * 1.0 + (orb_0 * orb_1) * 2.0 assert np.allclose(dens.grid.ravel(), predicted) @@ -198,7 +198,7 @@ def test_orbital_products(geometry): DM[0, 1] = 1.0 DM[1, 0] = 0.5 - dens = psi_values.reduce_orbital_products(DM.tocsr(), DM.sc) + dens = psi_values.reduce_orbital_products(DM.tocsr(), DM.lattice) predicted = (orb_0**2) * 1.0 + (orb_0 * orb_1) * 1.5 assert np.allclose(dens.grid.ravel(), predicted) @@ -224,7 +224,7 @@ def test_orbital_products_periodic(geometry): DM = sisl.DensityMatrix(geometry, dim=1, dtype=np.float64) DM[0, 0] = 1.0 - dens = psi_values.reduce_orbital_products(DM.tocsr(), geometry.sc) + dens = psi_values.reduce_orbital_products(DM.tocsr(), geometry.lattice) predicted = (orb_0**2).sum(axis=1) @@ -235,7 +235,7 @@ def test_orbital_products_periodic(geometry): DM[0, 0] = 1.0 DM[1, 1] = 2.0 - dens = psi_values.reduce_orbital_products(DM.tocsr(), DM.sc) + dens = psi_values.reduce_orbital_products(DM.tocsr(), DM.lattice) predicted = (orb_0**2).sum(axis=1) + (orb_1**2).sum(axis=1) * 2.0 assert np.allclose(dens.grid.ravel(), predicted) @@ -266,10 +266,10 @@ def test_orbital_products_onthefly_reduction(geometry, psi_values, ncoeffs): csr.data[:, :] = csr.data[:, 0].reshape(-1, 1) # Compute the values on the full grid - not_reduced = psi_values.reduce_orbital_products(csr, DM.sc) + not_reduced = psi_values.reduce_orbital_products(csr, DM.lattice) # Reducing the last axis - reduced = psi_values.reduce_orbital_products(csr, DM.sc, reduce_grid=(2,)) + reduced = psi_values.reduce_orbital_products(csr, DM.lattice, reduce_grid=(2,)) post_reduced = not_reduced.sum(2) assert reduced.size == post_reduced.size @@ -279,7 +279,7 @@ def test_orbital_products_onthefly_reduction(geometry, psi_values, ncoeffs): assert np.allclose(reduced.reshape(post_reduced.shape), post_reduced) # Reducing the two last axes - reduced = psi_values.reduce_orbital_products(csr, DM.sc, reduce_grid=(2, 1)) + reduced = psi_values.reduce_orbital_products(csr, DM.lattice, reduce_grid=(2, 1)) post_reduced = not_reduced.sum(2).sum(1) assert reduced.size == post_reduced.size @@ -292,7 +292,7 @@ def test_orbital_products_onthefly_reduction(geometry, psi_values, ncoeffs): # it is more likely to fail. # Reducing the first axis - reduced = psi_values.reduce_orbital_products(csr, DM.sc, reduce_grid=(0,)) + reduced = psi_values.reduce_orbital_products(csr, DM.lattice, reduce_grid=(0,)) post_reduced = not_reduced.sum(0) assert reduced.size == post_reduced.size @@ -302,7 +302,7 @@ def test_orbital_products_onthefly_reduction(geometry, psi_values, ncoeffs): assert np.allclose(reduced.reshape(post_reduced.shape), post_reduced) # Reducing the first and second axis - reduced = psi_values.reduce_orbital_products(csr, DM.sc, reduce_grid=(0, 1)) + reduced = psi_values.reduce_orbital_products(csr, DM.lattice, reduce_grid=(0, 1)) post_reduced = not_reduced.sum(1).sum(0) assert reduced.size == post_reduced.size From 220b2ec172291abe0c028ae8595619379a5db313 Mon Sep 17 00:00:00 2001 From: Pol Febrer Date: Wed, 20 Mar 2024 13:33:00 +0100 Subject: [PATCH 3/5] changed to pure python syntax --- src/sisl/CMakeLists.txt | 13 +- ...parse_grid_ops.pyx => _sparse_grid_ops.py} | 239 +++++++++++------- 2 files changed, 165 insertions(+), 87 deletions(-) rename src/sisl/{_sparse_grid_ops.pyx => _sparse_grid_ops.py} (83%) diff --git a/src/sisl/CMakeLists.txt b/src/sisl/CMakeLists.txt index 55f75ca404..4fff724b44 100644 --- a/src/sisl/CMakeLists.txt +++ b/src/sisl/CMakeLists.txt @@ -1,4 +1,4 @@ -foreach(source _indices _math_small _sparse_grid_ops) +foreach(source _indices _math_small) add_cython_library( SOURCE ${source}.pyx LIBRARY ${source} @@ -8,6 +8,17 @@ foreach(source _indices _math_small _sparse_grid_ops) DESTINATION ${SKBUILD_PROJECT_NAME}) endforeach() +# Python files that can be compiled with cython (Pure Python syntax) +foreach(source _sparse_grid_ops) + add_cython_library( + SOURCE ${source}.py + LIBRARY ${source} + OUTPUT ${source}_C + ) + install(TARGETS ${source} LIBRARY + DESTINATION ${SKBUILD_PROJECT_NAME}) +endforeach() + # Add other sub-directories add_subdirectory("_core") add_subdirectory("io") diff --git a/src/sisl/_sparse_grid_ops.pyx b/src/sisl/_sparse_grid_ops.py similarity index 83% rename from src/sisl/_sparse_grid_ops.pyx rename to src/sisl/_sparse_grid_ops.py index 8608ea8cb3..ec37686589 100644 --- a/src/sisl/_sparse_grid_ops.pyx +++ b/src/sisl/_sparse_grid_ops.py @@ -1,17 +1,18 @@ import cython +import cython.cimports.numpy as cnp import numpy as np -cimport numpy as np - -import math - from sisl import SparseCSR @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) -cpdef cython.int transpose_raveled_index(index: cython.int, grid_shape: np.int32_t[:], new_order: np.int32_t[:]): +@cython.ccall +@cython.exceptval(check=False) +def transpose_raveled_index( + index: cython.int, grid_shape: cnp.int32_t[:], new_order: cnp.int32_t[:] +) -> cython.int: """Transposes a raveled index, given the shape that raveled it and the new axis order. Currently it only works for 3D indices. @@ -44,14 +45,19 @@ new_grid_shape[iaxis] = grid_shape[new_order[iaxis]] new_unraveled[iaxis] = unraveled[new_order[iaxis]] - new_raveled = new_unraveled[2] + new_unraveled[1] * new_grid_shape[2] + new_unraveled[0] * new_grid_shape[1] * new_grid_shape[2] + new_raveled = ( + new_unraveled[2] + + new_unraveled[1] * new_grid_shape[2] + + new_unraveled[0] * new_grid_shape[1] * new_grid_shape[2] + ) return new_raveled + # This function should be in sisl._sparse, but I don't know how to import it from there @cython.boundscheck(False) @cython.wraparound(False) -def dense_index(shape, ptr: np.int32_t[:], col: np.int32_t[:]): +def dense_index(shape, ptr: cnp.int32_t[:], col: cnp.int32_t[:]) -> cnp.int32_t[:, :]: """Returns a dense array containing the value index for each nonzero (row, col) element. Zero elements are asigned an index of -1, so routines using this dense index should @@ -69,15 +75,12 @@ def dense_index(shape, ptr: np.int32_t[:], col: np.int32_t[:]): np.ndarray: Numpy array of shape = matrix shape and data type np.int32. """ - nrows: cython.int = ptr.shape[0] - 1 + nrows = ptr.shape[0] - 1 row: cython.int - row_start: cython.int - row_end: cython.int ival: cython.int - val_col: cython.int # Initialize the dense index - dense_index: np.int32_t[:, :] = np.full(shape, -1, dtype=np.int32) + dense_index: cnp.int32_t[:, :] = np.full(shape, -1, dtype=np.int32) for row in range(nrows): row_start = ptr[row] @@ -89,13 +92,18 @@ def dense_index(shape, ptr: np.int32_t[:], col: np.int32_t[:]): return np.asarray(dense_index) + @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) def reduce_grid_sum( - cython.numeric[:] data, ptr: np.int32_t[:], col: np.int32_t[:], - reduce_factor: cython.int, grid_shape: np.int32_t[:], new_axes: np.int32_t[:], - cython.numeric[:] out + data: cython.numeric[:], + ptr: cnp.int32_t[:], + col: cnp.int32_t[:], + reduce_factor: cython.int, + grid_shape: cnp.int32_t[:], + new_axes: cnp.int32_t[:], + out: cython.numeric[:], ): """Performs sum over the extra dimension while reducing other dimensions of the grid. @@ -121,38 +129,43 @@ def reduce_grid_sum( out: The array where the output should be stored. """ - nrows: cython.int = ptr.shape[0] - 1 + nrows = ptr.shape[0] - 1 i: cython.int - reduced_i: cython.int j: cython.int - jcol: cython.int need_transpose: cython.bint = new_axes.shape[0] > 1 - cdef cython.numeric row_value + row_value: cython.numeric for i in range(nrows): if need_transpose: - reduced_i = transpose_raveled_index(i, grid_shape, new_axes) // reduce_factor + reduced_i = ( + transpose_raveled_index(i, grid_shape, new_axes) // reduce_factor + ) else: reduced_i = i // reduce_factor row_value = 0 for j in range(ptr[i], ptr[i + 1]): - jcol = col[j] row_value += data[j] out[reduced_i] = out[reduced_i] + row_value + @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) def reduce_grid_matvec_multiply( - cython.numeric[:] data, ptr: np.int32_t[:], col: np.int32_t[:], cython.numeric[:] V, - reduce_factor: cython.int, grid_shape: np.int32_t[:], new_axes: np.int32_t[:], - cython.numeric[:] out + data: cython.numeric[:], + ptr: cnp.int32_t[:], + col: cnp.int32_t[:], + V: cython.numeric[:], + reduce_factor: cython.int, + grid_shape: cnp.int32_t[:], + new_axes: cnp.int32_t[:], + out: cython.numeric[:], ): """Performs a matrix-vector multiplication while reducing other dimensions of the grid. @@ -190,13 +203,15 @@ def reduce_grid_matvec_multiply( need_transpose: cython.bint = new_axes.shape[0] > 1 - cdef cython.numeric row_value + row_value: cython.numeric for i in range(nrows): reduced_i = i // reduce_factor if need_transpose: - reduced_i = transpose_raveled_index(i, grid_shape, new_axes) // reduce_factor + reduced_i = ( + transpose_raveled_index(i, grid_shape, new_axes) // reduce_factor + ) else: reduced_i = i // reduce_factor @@ -207,13 +222,19 @@ def reduce_grid_matvec_multiply( out[reduced_i] = out[reduced_i] + row_value + @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) def reduce_grid_matvecs_multiply( - cython.numeric[:] data, np.int32_t[:] ptr, np.int32_t[:] col, cython.numeric[:, :] V, - reduce_factor: cython.int, grid_shape: np.int32_t[:], new_axes: np.int32_t[:], - cython.numeric[:, :] out + data: cython.numeric[:], + ptr: cnp.int32_t[:], + col: cnp.int32_t[:], + V: cython.numeric[:, :], + reduce_factor: cython.int, + grid_shape: cnp.int32_t[:], + new_axes: cnp.int32_t[:], + out: cython.numeric[:, :], ): """Performs a matrix-matrix multiplication while reducing other dimensions of the grid. @@ -242,22 +263,22 @@ def reduce_grid_matvecs_multiply( The array where the output should be stored. """ - nrows: cython.int = ptr.shape[0] - 1 - nvecs: cython.int = V.shape[1] + nrows = ptr.shape[0] - 1 + nvecs = V.shape[1] i: cython.int - reduced_i: cython.int j: cython.int - jcol: cython.int ivec: cython.int need_transpose: cython.bint = new_axes.shape[0] > 1 - cdef cython.numeric[:] row_value = np.zeros(nvecs, np.asarray(data).dtype) + row_value: cython.numeric[:] = np.zeros(nvecs, np.asarray(data).dtype) for i in range(nrows): if need_transpose: - reduced_i = transpose_raveled_index(i, grid_shape, new_axes) // reduce_factor + reduced_i = ( + transpose_raveled_index(i, grid_shape, new_axes) // reduce_factor + ) else: reduced_i = i // reduce_factor @@ -271,14 +292,22 @@ def reduce_grid_matvecs_multiply( for ivec in range(nvecs): out[reduced_i, ivec] = out[reduced_i, ivec] + row_value[ivec] + @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) def reduce_sc_products( - cython.floating[:] data, ptr: np.int32_t[:], col: np.int32_t[:], cython.floating[:, :] coeffs, - uc_ncol: cython.int, data_sc_off: np.int32_t[:, :], coeffs_isc_off: np.int32_t[:, :, :], - reduce_factor: cython.int, grid_shape: np.int32_t[:], new_axes: np.int32_t[:], - cython.floating[:] out + data: cython.floating[:], + ptr: cnp.int32_t[:], + col: cnp.int32_t[:], + coeffs: cython.floating[:, :], + uc_ncol: cython.int, + data_sc_off: cnp.int32_t[:, :], + coeffs_isc_off: cnp.int32_t[:, :, :], + reduce_factor: cython.int, + grid_shape: cnp.int32_t[:], + new_axes: cnp.int32_t[:], + out: cython.floating[:], ): """For each row, sums all possible products between column pairs. @@ -319,13 +348,11 @@ def reduce_sc_products( out: The array where the output should be stored. """ - nrows: cython.int = ptr.shape[0] - 1 + nrows = ptr.shape[0] - 1 # Indices to handle rows row: cython.int reduced_row: cython.int - row_start: cython.int - row_end: cython.int # Indices to handle pairs of columns (ij) i: cython.int @@ -343,14 +370,13 @@ def reduce_sc_products( jpair_sc: cython.int # Temporal storage to build values - cdef cython.floating row_value - cdef cython.floating i_row_value + row_value: cython.floating + i_row_value: cython.floating # Index to loop over axes of the grid. iaxis: cython.int # Variables that will help managing orbital pairs that are not within the same cell. - coeffs_nsc: cython.int[3] sc_diff: cython.int[3] inv_sc_diff: cython.int[3] force_same_cell: cython.bint @@ -367,8 +393,8 @@ def reduce_sc_products( # (if any) have been stored in the unit cell. This is what SIESTA does for gamma point calculations # with nsc <= 3. force_same_cell = True + coeffs_nsc = coeffs_isc_off.shape for iaxis in range(3): - coeffs_nsc[iaxis] = coeffs_isc_off.shape[iaxis] if coeffs_nsc[iaxis] != 1: force_same_cell = False @@ -426,9 +452,11 @@ def reduce_sc_products( if not same_cell: # Calculate the sc offset between both orbitals. for iaxis in range(3): - sc_diff[iaxis] = data_sc_off[jcol_sc, iaxis] - data_sc_off[icol_sc, iaxis] + sc_diff[iaxis] = ( + data_sc_off[jcol_sc, iaxis] - data_sc_off[icol_sc, iaxis] + ) # Calculate also the offset in the reverse direction - inv_sc_diff[iaxis] = - sc_diff[iaxis] + inv_sc_diff[iaxis] = -sc_diff[iaxis] # If the sc_difference is negative, convert it to positive so that we can # use it to index the isc_off array (we switched off the handling of negative @@ -444,7 +472,9 @@ def reduce_sc_products( sc_jcol = jpair_sc * uc_ncol + uc_jcol # Do the same for the ji pair - ipair_sc = coeffs_isc_off[inv_sc_diff[0], inv_sc_diff[1], inv_sc_diff[2]] + ipair_sc = coeffs_isc_off[ + inv_sc_diff[0], inv_sc_diff[1], inv_sc_diff[2] + ] sc_icol = ipair_sc * uc_ncol + uc_icol # Add the contribution of this column pair to the row total value. Note that we only @@ -455,9 +485,13 @@ def reduce_sc_products( if icol == jcol: i_row_value += coeffs[uc_icol, uc_jcol] * data[j] else: - i_row_value += (coeffs[uc_icol, uc_jcol] + coeffs[uc_jcol, uc_icol]) * data[j] + i_row_value += ( + coeffs[uc_icol, uc_jcol] + coeffs[uc_jcol, uc_icol] + ) * data[j] else: - i_row_value += (coeffs[uc_icol, sc_jcol] + coeffs[uc_jcol, sc_icol]) * data[j] + i_row_value += ( + coeffs[uc_icol, sc_jcol] + coeffs[uc_jcol, sc_icol] + ) * data[j] # Multiply all the contributions of ij pairs with this i by data[i], as explained inside the j loop. i_row_value *= data[i] @@ -468,14 +502,22 @@ def reduce_sc_products( # Store the row value in the output out[reduced_row] = out[reduced_row] + row_value + @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) def reduce_sc_products_multicoeffs( - cython.floating[:] data, ptr: np.int32_t[:], col: np.int32_t[:], cython.floating[:, :, :] coeffs, - uc_ncol: cython.int, data_sc_off: np.int32_t[:, :], coeffs_isc_off: np.int32_t[:, :, :], - reduce_factor: cython.int, grid_shape: np.int32_t[:], new_axes: np.int32_t[:], - cython.floating[:, :] out, + data: cython.floating[:], + ptr: cnp.int32_t[:], + col: cnp.int32_t[:], + coeffs: cython.floating[:, :, :], + uc_ncol: cython.int, + data_sc_off: cnp.int32_t[:, :], + coeffs_isc_off: cnp.int32_t[:, :, :], + reduce_factor: cython.int, + grid_shape: cnp.int32_t[:], + new_axes: cnp.int32_t[:], + out: cython.floating[:, :], ): """For each row, sums all possible products between column pairs. @@ -515,13 +557,11 @@ def reduce_sc_products_multicoeffs( out: The array where the output should be stored. """ - nrows: cython.int = ptr.shape[0] - 1 + nrows = ptr.shape[0] - 1 # Indices to handle rows row: cython.int reduced_row: cython.int - row_start: cython.int - row_end: cython.int # Indices to handle pairs of columns (ij) i: cython.int @@ -543,14 +583,13 @@ def reduce_sc_products_multicoeffs( icoeff: cython.int # Temporal storage to build values - cdef cython.floating[:] row_value = np.zeros(ncoeffs, dtype=np.asarray(data).dtype) - cdef cython.floating[:] i_row_value = np.zeros(ncoeffs, dtype=np.asarray(data).dtype) + row_value: cython.floating[:] = np.zeros(ncoeffs, dtype=np.asarray(data).dtype) + i_row_value: cython.floating[:] = np.zeros(ncoeffs, dtype=np.asarray(data).dtype) # Index to loop over axes of the grid. iaxis: cython.int # Variables that will help managing orbital pairs that are not within the same cell. - coeffs_nsc: cython.int[3] sc_diff: cython.int[3] inv_sc_diff: cython.int[3] force_same_cell: cython.bint @@ -567,8 +606,8 @@ def reduce_sc_products_multicoeffs( # (if any) have been stored in the unit cell. This is what SIESTA does for gamma point calculations # with nsc <= 3. force_same_cell = True + coeffs_nsc = coeffs_isc_off.shape for iaxis in range(3): - coeffs_nsc[iaxis] = coeffs_isc_off.shape[iaxis] if coeffs_nsc[iaxis] != 1: force_same_cell = False @@ -626,9 +665,11 @@ def reduce_sc_products_multicoeffs( if not same_cell: # Calculate the sc offset between both orbitals. for iaxis in range(3): - sc_diff[iaxis] = data_sc_off[jcol_sc, iaxis] - data_sc_off[icol_sc, iaxis] + sc_diff[iaxis] = ( + data_sc_off[jcol_sc, iaxis] - data_sc_off[icol_sc, iaxis] + ) # Calculate also the offset in the reverse direction - inv_sc_diff[iaxis] = - sc_diff[iaxis] + inv_sc_diff[iaxis] = -sc_diff[iaxis] # If the sc_difference is negative, convert it to positive so that we can # use it to index the isc_off array (we switched off the handling of negative @@ -644,7 +685,9 @@ def reduce_sc_products_multicoeffs( sc_jcol = jpair_sc * uc_ncol + uc_jcol # Do the same for the ji pair - ipair_sc = coeffs_isc_off[inv_sc_diff[0], inv_sc_diff[1], inv_sc_diff[2]] + ipair_sc = coeffs_isc_off[ + inv_sc_diff[0], inv_sc_diff[1], inv_sc_diff[2] + ] sc_icol = ipair_sc * uc_ncol + uc_icol # Add the contribution of this column pair to the row total value. Note that we only @@ -656,11 +699,19 @@ def reduce_sc_products_multicoeffs( for icoeff in range(ncoeffs): if same_cell: if icol == jcol: - i_row_value[icoeff] += coeffs[uc_icol, uc_jcol, icoeff] * data[j] + i_row_value[icoeff] += ( + coeffs[uc_icol, uc_jcol, icoeff] * data[j] + ) else: - i_row_value[icoeff] += (coeffs[uc_icol, uc_jcol, icoeff] + coeffs[uc_jcol, uc_icol, icoeff]) * data[j] + i_row_value[icoeff] += ( + coeffs[uc_icol, uc_jcol, icoeff] + + coeffs[uc_jcol, uc_icol, icoeff] + ) * data[j] else: - i_row_value[icoeff] += (coeffs[uc_icol, sc_jcol, icoeff] + coeffs[uc_jcol, sc_icol, icoeff]) * data[j] + i_row_value[icoeff] += ( + coeffs[uc_icol, sc_jcol, icoeff] + + coeffs[uc_jcol, sc_icol, icoeff] + ) * data[j] for icoeff in range(ncoeffs): # Multiply all the contributions of ij pairs with this i by data[i], as explained inside the j loop. @@ -673,14 +724,22 @@ def reduce_sc_products_multicoeffs( # Store the row value in the output out[reduced_row, icoeff] = out[reduced_row, icoeff] + row_value[icoeff] + @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) def reduce_sc_products_multicoeffs_sparse_denseindex( - cython.floating[:] data, ptr: np.int32_t[:], col: np.int32_t[:], coeffs_csr: SparseCSR, - uc_ncol: cython.int, data_sc_off: np.int32_t[:, :], coeffs_isc_off: np.int32_t[:, :, :], - reduce_factor: cython.int, grid_shape: np.int32_t[:], new_axes: np.int32_t[:], - cython.floating[:, :] out, + data: cython.floating[:], + ptr: cnp.int32_t[:], + col: cnp.int32_t[:], + coeffs_csr: SparseCSR, + uc_ncol: cython.int, + data_sc_off: cnp.int32_t[:, :], + coeffs_isc_off: cnp.int32_t[:, :, :], + reduce_factor: cython.int, + grid_shape: cnp.int32_t[:], + new_axes: cnp.int32_t[:], + out: cython.floating[:, :], ): """For each row, sums all possible products between column pairs. @@ -720,13 +779,11 @@ def reduce_sc_products_multicoeffs_sparse_denseindex( out: The array where the output should be stored. """ - nrows: cython.int = ptr.shape[0] - 1 + nrows = ptr.shape[0] - 1 # Indices to handle rows row: cython.int reduced_row: cython.int - row_start: cython.int - row_end: cython.int # Indices to handle pairs of columns (ij) i: cython.int @@ -744,9 +801,11 @@ def reduce_sc_products_multicoeffs_sparse_denseindex( jpair_sc: cython.int # Extra variables to handle coefficients - cdef cython.floating[:, :] coeffs = coeffs_csr.data + coeffs: cython.floating[:, :] = coeffs_csr.data # Get the array containing all the dense indices. - dense_idx: np.int32_t[:, :] = dense_index(coeffs_csr.shape[:2], coeffs_csr.ptr, coeffs_csr.col) + dense_idx: cnp.int32_t[:, :] = dense_index( + coeffs_csr.shape[:2], coeffs_csr.ptr, coeffs_csr.col + ) coeff_index: cython.int coeff_index2: cython.int @@ -755,14 +814,13 @@ def reduce_sc_products_multicoeffs_sparse_denseindex( icoeff: cython.int # Temporal storage to build values - cdef cython.floating[:] row_value = np.zeros(ncoeffs, dtype=np.asarray(data).dtype) - cdef cython.floating[:] i_row_value = np.zeros(ncoeffs, dtype=np.asarray(data).dtype) + row_value: cython.floating[:] = np.zeros(ncoeffs, dtype=np.asarray(data).dtype) + i_row_value: cython.floating[:] = np.zeros(ncoeffs, dtype=np.asarray(data).dtype) # Index to loop over axes of the grid. iaxis: cython.int # Variables that will help managing orbital pairs that are not within the same cell. - coeffs_nsc: cython.int[3] sc_diff: cython.int[3] inv_sc_diff: cython.int[3] force_same_cell: cython.bint @@ -779,8 +837,8 @@ def reduce_sc_products_multicoeffs_sparse_denseindex( # (if any) have been stored in the unit cell. This is what SIESTA does for gamma point calculations # with nsc <= 3. force_same_cell = True + coeffs_nsc = coeffs_isc_off.shape for iaxis in range(3): - coeffs_nsc[iaxis] = coeffs_isc_off.shape[iaxis] if coeffs_nsc[iaxis] != 1: force_same_cell = False @@ -838,9 +896,11 @@ def reduce_sc_products_multicoeffs_sparse_denseindex( if not same_cell: # Calculate the sc offset between both orbitals. for iaxis in range(3): - sc_diff[iaxis] = data_sc_off[jcol_sc, iaxis] - data_sc_off[icol_sc, iaxis] + sc_diff[iaxis] = ( + data_sc_off[jcol_sc, iaxis] - data_sc_off[icol_sc, iaxis] + ) # Calculate also the offset in the reverse direction - inv_sc_diff[iaxis] = - sc_diff[iaxis] + inv_sc_diff[iaxis] = -sc_diff[iaxis] # If the sc_difference is negative, convert it to positive so that we can # use it to index the isc_off array (we switched off the handling of negative @@ -856,7 +916,9 @@ def reduce_sc_products_multicoeffs_sparse_denseindex( sc_jcol = jpair_sc * uc_ncol + uc_jcol # Do the same for the ji pair - ipair_sc = coeffs_isc_off[inv_sc_diff[0], inv_sc_diff[1], inv_sc_diff[2]] + ipair_sc = coeffs_isc_off[ + inv_sc_diff[0], inv_sc_diff[1], inv_sc_diff[2] + ] sc_icol = ipair_sc * uc_ncol + uc_icol # Get the index needed to find the coefficients that we want from the coeffs array. @@ -888,9 +950,14 @@ def reduce_sc_products_multicoeffs_sparse_denseindex( if icol == jcol: i_row_value[icoeff] += coeffs[coeff_index, icoeff] * data[j] else: - i_row_value[icoeff] += (coeffs[coeff_index, icoeff] + coeffs[coeff_index2, icoeff]) * data[j] + i_row_value[icoeff] += ( + coeffs[coeff_index, icoeff] + + coeffs[coeff_index2, icoeff] + ) * data[j] else: - i_row_value[icoeff] += (coeffs[coeff_index, icoeff] + coeffs[coeff_index2, icoeff]) * data[j] + i_row_value[icoeff] += ( + coeffs[coeff_index, icoeff] + coeffs[coeff_index2, icoeff] + ) * data[j] for icoeff in range(ncoeffs): # Multiply all the contributions of ij pairs with this i by data[i], as explained inside the j loop. From c0986ad1be941d45eac06586b74910d8f15b875c Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Wed, 19 Jun 2024 12:58:10 +0200 Subject: [PATCH 4/5] further improved speed Signed-off-by: Nick Papior --- src/sisl/_core/geometry.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/src/sisl/_core/geometry.py b/src/sisl/_core/geometry.py index 9d76d3c831..df526c5074 100644 --- a/src/sisl/_core/geometry.py +++ b/src/sisl/_core/geometry.py @@ -3662,21 +3662,14 @@ def sphere_grid_index(grid, center, R): corners_i = grid.index(corners) - ix = np.arange( - max(corners_i[:, 0].min(), 0), - min(corners_i[:, 0].max() + 1, grid.shape[0]), - ) - iy = np.arange( - max(corners_i[:, 1].min(), 0), - min(corners_i[:, 1].max() + 1, grid.shape[1]), - ) - iz = np.arange( - max(corners_i[:, 2].min(), 0), - min(corners_i[:, 2].max() + 1, grid.shape[2]), - ) + cmin = np.maximum(corners_i.min(axis=0), 0) + cmax = np.maximum(corners_i.max(axis=0) + 1, 0) + + rx = slice(cmin[0], min(cmax[0], grid.shape[0])) + ry = slice(cmin[1], min(cmax[1], grid.shape[1])) + rz = slice(cmin[2], min(cmax[2], grid.shape[2])) - indices = np.meshgrid(ix, iy, iz) - indices = np.array(indices).T.reshape(-1, 3) + indices = np.mgrid[rx, ry, rz].reshape(3, -1).T return indices From 718b7ab1827458fe5481416f3b221c4b83b9596e Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Wed, 19 Jun 2024 15:12:02 +0200 Subject: [PATCH 5/5] added doc note to _orbital_values Signed-off-by: Nick Papior --- src/sisl/_core/geometry.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/sisl/_core/geometry.py b/src/sisl/_core/geometry.py index df526c5074..0f3b3d3faa 100644 --- a/src/sisl/_core/geometry.py +++ b/src/sisl/_core/geometry.py @@ -3618,6 +3618,10 @@ def _orbital_values(self, grid_shape: Tuple[int, int, int]): ---------- grid_shape: the grid shape (i.e. resolution) in which to calculate the orbital values. + + Notes + ----- + This method does not belong on this geometry. It will be removed eventually. """ # We need to import these here to avoid circular imports. from sisl import Grid