From 7a55e36805133999b2f51d5b8b112577b00a68ec Mon Sep 17 00:00:00 2001 From: corochann Date: Mon, 18 Oct 2021 13:58:18 +0900 Subject: [PATCH 01/14] torch.jit.script support --- torch_dftd/functions/dftd2.py | 4 +- torch_dftd/functions/dftd3.py | 35 +++--- torch_dftd/functions/distance.py | 34 +++++- torch_dftd/functions/smoothing.py | 29 ++++- torch_dftd/functions/triplets.py | 152 +++++++++++++----------- torch_dftd/functions/triplets_kernel.py | 1 + torch_dftd/nn/dftd2_module.py | 17 ++- torch_dftd/nn/dftd3_module.py | 23 ++-- 8 files changed, 190 insertions(+), 105 deletions(-) diff --git a/torch_dftd/functions/dftd2.py b/torch_dftd/functions/dftd2.py index 13e9347..513bc3a 100644 --- a/torch_dftd/functions/dftd2.py +++ b/torch_dftd/functions/dftd2.py @@ -6,6 +6,7 @@ from torch_dftd.functions.smoothing import poly_smoothing +@torch.jit.script def edisp_d2( Z: Tensor, r: Tensor, @@ -44,7 +45,7 @@ def edisp_d2( r2 = r ** 2 r6 = r2 ** 3 - idx_i, idx_j = edge_index + idx_i, idx_j = edge_index[0], edge_index[1] # compute all necessary quantities Zi = Z[idx_i] # (n_edges,) Zj = Z[idx_j] @@ -71,6 +72,7 @@ def edisp_d2( # (1,) g = e6.sum()[None] else: + assert batch is not None # (n_graphs,) if batch.size()[0] == 0: n_graphs = 1 diff --git a/torch_dftd/functions/dftd3.py b/torch_dftd/functions/dftd3.py index dc5d61d..ef1c9fb 100644 --- a/torch_dftd/functions/dftd3.py +++ b/torch_dftd/functions/dftd3.py @@ -9,8 +9,8 @@ # conversion factors used in grimme d3 code -d3_autoang = 0.52917726 # for converting distance from bohr to angstrom -d3_autoev = 27.21138505 # for converting a.u. to eV +d3_autoang: float = 0.52917726 # for converting distance from bohr to angstrom +d3_autoev: float = 27.21138505 # for converting a.u. to eV d3_k1 = 16.000 d3_k2 = 4 / 3 @@ -18,6 +18,7 @@ d3_maxc = 5 # maximum number of coordination complexes +@torch.jit.script def _ncoord( Z: Tensor, r: Tensor, @@ -66,6 +67,7 @@ def _ncoord( return g # (n_atoms,) +@torch.jit.script def _getc6( Zi: Tensor, Zj: Tensor, nci: Tensor, ncj: Tensor, c6ab: Tensor, k3: float = d3_k3 ) -> Tensor: @@ -104,6 +106,7 @@ def _getc6( return c6 +@torch.jit.script def edisp( Z: Tensor, r: Tensor, @@ -120,12 +123,12 @@ def edisp( shift_pos: Optional[Tensor] = None, pos: Optional[Tensor] = None, cell: Optional[Tensor] = None, - r2=None, - r6=None, - r8=None, - k1=d3_k1, - k2=d3_k2, - k3=d3_k3, + r2: Optional[Tensor] = None, + r6: Optional[Tensor] = None, + r8: Optional[Tensor] = None, + k1: float = d3_k1, + k2: float = d3_k2, + k3: float = d3_k3, cutoff_smoothing: str = "none", damping: str = "zero", bidirectional: bool = False, @@ -171,7 +174,7 @@ def edisp( if r8 is None: r8 = r6 * r2 - idx_i, idx_j = edge_index + idx_i, idx_j = edge_index[0], edge_index[1] # compute all necessary quantities Zi = Z[idx_i] # (n_edges,) Zj = Z[idx_j] @@ -250,6 +253,7 @@ def edisp( g = e68.to(torch.float64).sum()[None] else: # (n_graphs,) + assert batch is not None if batch.size()[0] == 0: n_graphs = 1 else: @@ -261,6 +265,7 @@ def edisp( g *= 2.0 if abc: + assert cnthr is not None within_cutoff = r <= cnthr # r_abc = r[within_cutoff] # r2_abc = r2[within_cutoff] @@ -284,10 +289,11 @@ def edisp( with torch.no_grad(): # triplet_node_index, triplet_edge_index = calc_triplets_cycle(edge_index_abc, n_atoms, shift=shift_abc) # Type hinting - triplet_node_index: Tensor - multiplicity: Tensor - edge_jk: Tensor - batch_triplets: Optional[Tensor] + # triplet_node_index: Tensor + # multiplicity: Tensor + # edge_jk: Tensor + # batch_triplets: Optional[Tensor] + assert pos is not None triplet_node_index, multiplicity, edge_jk, batch_triplets = calc_triplets( edge_index_abc, shift_pos=shift_abc, @@ -303,7 +309,7 @@ def edisp( ) r_jk = calc_distances(pos, torch.stack([idx_j, idx_k], dim=0), cell, shift_jk) kj_within_cutoff = r_jk <= cnthr - del shift_jk + # del shift_jk triplet_node_index = triplet_node_index[kj_within_cutoff] multiplicity, edge_jk, batch_triplets = ( @@ -355,5 +361,6 @@ def edisp( e6abc = e3.to(torch.float64).sum() g += e6abc else: + assert batch_triplets is not None g.scatter_add_(0, batch_triplets, e3.to(torch.float64)) return g # (n_graphs,) diff --git a/torch_dftd/functions/distance.py b/torch_dftd/functions/distance.py index e177bdb..847cf26 100644 --- a/torch_dftd/functions/distance.py +++ b/torch_dftd/functions/distance.py @@ -4,12 +4,13 @@ from torch import Tensor +@torch.jit.script def calc_distances( pos: Tensor, edge_index: Tensor, cell: Optional[Tensor] = None, shift_pos: Optional[Tensor] = None, - eps=1e-20, + eps: float = 1e-20, ) -> Tensor: """Distance calculation function. @@ -17,6 +18,7 @@ def calc_distances( pos (Tensor): (n_atoms, 3) atom positions. edge_index (Tensor): (2, n_edges) edge_index for graph. cell (Tensor): cell size, None for non periodic system. + This it NOT USED now, it is left for backward compatibility. shift_pos (Tensor): (n_edges, 3) position shift vectors of edges owing to the periodic boundary. It should be length unit. eps (float): Small float value to avoid NaN in backward when the distance is 0. @@ -25,12 +27,38 @@ def calc_distances( """ - idx_i, idx_j = edge_index + idx_i, idx_j = edge_index[0], edge_index[1] # calculate interatomic distances Ri = pos[idx_i] Rj = pos[idx_j] - if cell is not None: + if shift_pos is not None: Rj += shift_pos # eps is to avoid Nan in backward when Dij = 0 with sqrt. Dij = torch.sqrt(torch.sum((Ri - Rj) ** 2, dim=-1) + eps) return Dij + + +if __name__ == '__main__': + num_runs = 50 + torch._C._jit_set_num_profiled_runs(num_runs) + print("profiled runs: ", torch._C._jit_get_num_profiled_runs()) + + device = "cuda:0" + # device = "cpu" + n_atoms = 1000 + pos = torch.randn(n_atoms, 3, device=device) * n_atoms + edge_index = torch.stack([torch.arange(n_atoms), torch.arange(n_atoms - 1, -1, -1)], dim=0).to(device) + print("edge_index", edge_index.shape) + calc_distances(pos, edge_index) + from time import perf_counter + + time_list = [] + for i in range(100): + torch.cuda.synchronize() + s0 = perf_counter() + calc_distances(pos, edge_index) + torch.cuda.synchronize() + e0 = perf_counter() + time_list.append(e0 - s0) + print("Time: ", time_list) + import IPython; IPython.embed() diff --git a/torch_dftd/functions/smoothing.py b/torch_dftd/functions/smoothing.py index 1f8595f..fe5f0ba 100644 --- a/torch_dftd/functions/smoothing.py +++ b/torch_dftd/functions/smoothing.py @@ -2,12 +2,13 @@ from torch import Tensor +@torch.jit.script def poly_smoothing(r: Tensor, cutoff: float) -> Tensor: """Computes a smooth step from 1 to 0 starting at 1 bohr before the cutoff Args: r (Tensor): (n_edges,) - cutoff (float): () + cutoff (float): cutoff length Returns: r (Tensor): Smoothed `r` @@ -23,3 +24,29 @@ def poly_smoothing(r: Tensor, cutoff: float) -> Tensor: torch.ones_like(x), torch.where(r >= cutoff, torch.zeros_like(x), 6 * x5 - 15 * x4 + 10 * x3), ) + + +if __name__ == '__main__': + from time import perf_counter + device = "cpu" + n_edges = 10000 + r = torch.rand(n_edges).to(device) * 10.0 + + time_list = [] + for i in range(200): + torch.cuda.synchronize() + s0 = perf_counter() + r2 = poly_smoothing(r, 5.0) + torch.cuda.synchronize() + e0 = perf_counter() + time_list.append(e0 - s0) + print("Time: ", time_list) + import numpy as np + + time_array = np.array(time_list) + print("time:", + np.mean(time_array), + np.mean(time_array[10:]), + np.mean(time_array[50:]), + np.mean(time_array[100:])) + import IPython; IPython.embed() diff --git a/torch_dftd/functions/triplets.py b/torch_dftd/functions/triplets.py index a21be85..61d537a 100644 --- a/torch_dftd/functions/triplets.py +++ b/torch_dftd/functions/triplets.py @@ -1,70 +1,13 @@ -from typing import Optional, Tuple +from typing import Optional, Tuple, List import torch from torch import Tensor from torch_dftd.functions.triplets_kernel import _calc_triplets_core_gpu -def calc_triplets( - edge_index: Tensor, - shift_pos: Optional[Tensor] = None, - dtype=torch.float32, - batch_edge: Optional[Tensor] = None, -) -> Tuple[Tensor, Tensor, Tensor, Tensor]: - """Calculate triplet edge index. - - Args: - edge_index (Tensor): (2, n_edges) edge_index for graph. It must be bidirectional edge. - shift_pos (Tensor or None): (n_edges, 3) used to calculate unique atoms when pbc=True. - dtype: dtype for `multiplicity` - batch_edge (Tensor or None): Specify batch indices for `edge_index`. - - Returns: - triplet_node_index (Tensor): (3, n_triplets) index for node `i`, `j`, `k` respectively. - i.e.: idx_i, idx_j, idx_k = triplet_node_index - multiplicity (Tensor): (n_triplets,) multiplicity indicates duplication of same triplet pair. - It only takes 1 in non-pbc, but it takes 2 or 3 in pbc case. dtype is specified in the argument. - edge_jk (Tensor): (n_triplet_edges, 2=(j, k)) edge indices for j and k. - i.e.: idx_j, idx_k = edge_jk[:, 0], edge_jk[:, 1] - batch_triplets (Tensor): (n_triplets,) batch indices for each triplets. - """ - dst, src = edge_index - is_larger = dst >= src - dst = dst[is_larger] - src = src[is_larger] - # sort `src` - sort_inds = torch.argsort(src) - src = src[sort_inds] - dst = dst[sort_inds] - - if shift_pos is None: - edge_indices = torch.arange(src.shape[0], dtype=torch.long, device=edge_index.device) - else: - edge_indices = torch.arange(shift_pos.shape[0], dtype=torch.long, device=edge_index.device) - edge_indices = edge_indices[is_larger][sort_inds] - - if batch_edge is None: - batch_edge = torch.zeros(src.shape[0], dtype=torch.long, device=edge_index.device) - else: - batch_edge = batch_edge[is_larger][sort_inds] - - unique, counts = torch.unique_consecutive(src, return_counts=True) - counts_cumsum = torch.cumsum(counts, dim=0) - counts_cumsum = torch.cat( - [torch.zeros((1,), device=counts.device, dtype=torch.long), counts_cumsum], dim=0 - ) - - if str(unique.device) == "cpu": - return _calc_triplets_core( - counts, unique, dst, edge_indices, batch_edge, counts_cumsum, dtype=dtype - ) - else: - return _calc_triplets_core_gpu( - counts, unique, dst, edge_indices, batch_edge, counts_cumsum, dtype=dtype - ) - - -def _calc_triplets_core(counts, unique, dst, edge_indices, batch_edge, counts_cumsum, dtype): +@torch.jit.script +def _calc_triplets_core(counts: Tensor, unique: Tensor, dst: Tensor, edge_indices: Tensor, batch_edge: Tensor, counts_cumsum: Tensor, + dtype: torch.dtype): device = unique.device n_triplets = torch.sum(counts * (counts - 1) / 2) if n_triplets == 0: @@ -78,20 +21,25 @@ def _calc_triplets_core(counts, unique, dst, edge_indices, batch_edge, counts_cu batch_triplets = torch.zeros((0,), dtype=torch.long, device=device) return triplet_node_index, multiplicity, edge_jk, batch_triplets - triplet_node_index_list = [] # (n_triplet_edges, 3) - edge_jk_list = [] # (n_triplet_edges, 2) represents j and k indices - multiplicity_list = [] # (n_triplet_edges) represents multiplicity - batch_triplets_list = [] # (n_triplet_edges) represents batch index for triplets + triplet_node_index_list: List[List[int]] = [] # (n_triplet_edges, 3) + edge_jk_list: List[List[int]] = [] # (n_triplet_edges, 2) represents j and k indices + multiplicity_list: List[float] = [] # (n_triplet_edges) represents multiplicity + batch_triplets_list: List[int] = [] # (n_triplet_edges) represents batch index for triplets + + unique_list: List[int] = unique.tolist() + dst_list: List[int] = dst.tolist() + counts_cumsum_list: List[int] = counts_cumsum.tolist() + batch_edge_list: List[int] = batch_edge.tolist() for i in range(len(unique)): - _src = unique[i].item() - _n_edges = counts[i].item() - _dst = dst[counts_cumsum[i] : counts_cumsum[i + 1]] - _offset = counts_cumsum[i].item() - _batch_index = batch_edge[counts_cumsum[i]].item() + _src: int = unique_list[i] + _n_edges = counts_cumsum_list[i] + _dst: List[int] = dst_list[counts_cumsum_list[i] : counts_cumsum_list[i + 1]] + _offset = counts_cumsum_list[i] + _batch_index = batch_edge_list[counts_cumsum_list[i]] for j in range(_n_edges - 1): for k in range(j + 1, _n_edges): - _dst0 = _dst[j].item() # _dst0 maybe swapped with _dst1, need to reset here. - _dst1 = _dst[k].item() + _dst0: int = _dst[j] # _dst0 maybe swapped with _dst1, need to reset here. + _dst1: int = _dst[k] batch_triplets_list.append(_batch_index) # --- triplet_node_index_list & shift_list in sorted way... --- # sort order to be _src <= _dst0 <= _dst1, and i <= _j <= _k @@ -135,3 +83,63 @@ def _calc_triplets_core(counts, unique, dst, edge_indices, batch_edge, counts_cu # (n_triplet_edges, 3=(ij, ik, jk), 3=(xyz) ) batch_triplets = torch.as_tensor(batch_triplets_list, dtype=torch.long, device=device) return triplet_node_index, multiplicity, edge_jk, batch_triplets + + +@torch.jit.script +def calc_triplets( + edge_index: Tensor, + shift_pos: Optional[Tensor] = None, + dtype: torch.dtype = torch.float32, + batch_edge: Optional[Tensor] = None, +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + """Calculate triplet edge index. + + Args: + edge_index (Tensor): (2, n_edges) edge_index for graph. It must be bidirectional edge. + shift_pos (Tensor or None): (n_edges, 3) used to calculate unique atoms when pbc=True. + dtype: dtype for `multiplicity` + batch_edge (Tensor or None): Specify batch indices for `edge_index`. + + Returns: + triplet_node_index (Tensor): (3, n_triplets) index for node `i`, `j`, `k` respectively. + i.e.: idx_i, idx_j, idx_k = triplet_node_index + multiplicity (Tensor): (n_triplets,) multiplicity indicates duplication of same triplet pair. + It only takes 1 in non-pbc, but it takes 2 or 3 in pbc case. dtype is specified in the argument. + edge_jk (Tensor): (n_triplet_edges, 2=(j, k)) edge indices for j and k. + i.e.: idx_j, idx_k = edge_jk[:, 0], edge_jk[:, 1] + batch_triplets (Tensor): (n_triplets,) batch indices for each triplets. + """ + dst, src = edge_index[0], edge_index[1] + is_larger = dst >= src + dst = dst[is_larger] + src = src[is_larger] + # sort `src` + sort_inds = torch.argsort(src) + src = src[sort_inds] + dst = dst[sort_inds] + + if shift_pos is None: + edge_indices = torch.arange(src.shape[0], dtype=torch.long, device=edge_index.device) + else: + edge_indices = torch.arange(shift_pos.shape[0], dtype=torch.long, device=edge_index.device) + edge_indices = edge_indices[is_larger][sort_inds] + + if batch_edge is None: + batch_edge = torch.zeros(src.shape[0], dtype=torch.long, device=edge_index.device) + else: + batch_edge = batch_edge[is_larger][sort_inds] + + unique, counts = torch.unique_consecutive(src, return_counts=True) + counts_cumsum = torch.cumsum(counts, dim=0) + counts_cumsum = torch.cat( + [torch.zeros((1,), device=counts.device, dtype=torch.long), counts_cumsum], dim=0 + ) + + if str(unique.device) == "cpu": + return _calc_triplets_core( + counts, unique, dst, edge_indices, batch_edge, counts_cumsum, dtype=dtype + ) + else: + return _calc_triplets_core_gpu( + counts, unique, dst, edge_indices, batch_edge, counts_cumsum, dtype=dtype + ) diff --git a/torch_dftd/functions/triplets_kernel.py b/torch_dftd/functions/triplets_kernel.py index cf4d96a..34e24a2 100644 --- a/torch_dftd/functions/triplets_kernel.py +++ b/torch_dftd/functions/triplets_kernel.py @@ -113,6 +113,7 @@ def _cupy2torch(array: cp.ndarray) -> Tensor: _calc_triplets_core_gpu_kernel = None +@torch.jit.unused def _calc_triplets_core_gpu( counts: Tensor, unique: Tensor, diff --git a/torch_dftd/nn/dftd2_module.py b/torch_dftd/nn/dftd2_module.py index 57fd76d..8a70a07 100644 --- a/torch_dftd/nn/dftd2_module.py +++ b/torch_dftd/nn/dftd2_module.py @@ -20,6 +20,9 @@ class DFTD2Module(BaseDFTDModule): bidirectional (bool): calculated `edge_index` is bidirectional or not. """ + c6ab: Tensor + r0ab: Tensor + def __init__( self, params: Dict[str, float], @@ -52,24 +55,26 @@ def calc_energy_batch( batch: Optional[Tensor] = None, batch_edge: Optional[Tensor] = None, damping: str = "zero", + autoang: float = d3_autoang, + autoev: float = d3_autoev ) -> Tensor: """Forward computation to calculate atomic wise dispersion energy""" shift_pos = pos.new_zeros((edge_index.size()[1], 3, 3)) if shift_pos is None else shift_pos - pos_bohr = pos / d3_autoang # angstrom -> bohr + pos_bohr = pos / autoang # angstrom -> bohr if cell is None: cell_bohr: Optional[Tensor] = None else: - cell_bohr = cell / d3_autoang # angstrom -> bohr - shift_bohr = shift_pos / d3_autoang # angstrom -> bohr + cell_bohr = cell / autoang # angstrom -> bohr + shift_bohr = shift_pos / autoang # angstrom -> bohr r = calc_distances(pos_bohr, edge_index, cell_bohr, shift_bohr) # E_disp (n_graphs,): Energy in eV unit - E_disp = d3_autoev * edisp_d2( + E_disp = autoev * edisp_d2( Z, r, edge_index, - c6ab=self.c6ab, # type:ignore - r0ab=self.r0ab, # type:ignore + c6ab=self.c6ab, + r0ab=self.r0ab, params=self.params, damping=damping, bidirectional=self.bidirectional, diff --git a/torch_dftd/nn/dftd3_module.py b/torch_dftd/nn/dftd3_module.py index 6fe0a84..a40c0f6 100644 --- a/torch_dftd/nn/dftd3_module.py +++ b/torch_dftd/nn/dftd3_module.py @@ -24,6 +24,11 @@ class DFTD3Module(BaseDFTDModule): bidirectional (bool): calculated `edge_index` is bidirectional or not. """ + c6ab: Tensor + r0ab: Tensor + rcov: Tensor + r2r4: Tensor + def __init__( self, params: Dict[str, float], @@ -74,25 +79,27 @@ def calc_energy_batch( batch: Optional[Tensor] = None, batch_edge: Optional[Tensor] = None, damping: str = "zero", + autoang: float = d3_autoang, + autoev: float = d3_autoev, ) -> Tensor: """Forward computation to calculate atomic wise dispersion energy""" shift_pos = pos.new_zeros((edge_index.size()[1], 3, 3)) if shift_pos is None else shift_pos - pos_bohr = pos / d3_autoang # angstrom -> bohr + pos_bohr = pos / autoang # angstrom -> bohr if cell is None: cell_bohr: Optional[Tensor] = None else: - cell_bohr = cell / d3_autoang # angstrom -> bohr - shift_bohr = shift_pos / d3_autoang # angstrom -> bohr + cell_bohr = cell / autoang # angstrom -> bohr + shift_bohr = shift_pos / autoang # angstrom -> bohr r = calc_distances(pos_bohr, edge_index, cell_bohr, shift_bohr) # E_disp (n_graphs,): Energy in eV unit - E_disp = d3_autoev * edisp( + E_disp = autoev * edisp( Z, r, edge_index, - c6ab=self.c6ab, # type:ignore - r0ab=self.r0ab, # type:ignore - rcov=self.rcov, # type:ignore - r2r4=self.r2r4, # type:ignore + c6ab=self.c6ab, + r0ab=self.r0ab, + rcov=self.rcov, + r2r4=self.r2r4, params=self.params, cutoff=self.cutoff / Bohr, cnthr=self.cnthr / Bohr, From 5890892516b7a6918e25257f6cc73b251337eab8 Mon Sep 17 00:00:00 2001 From: corochann Date: Mon, 18 Oct 2021 14:01:09 +0900 Subject: [PATCH 02/14] black, isort --- torch_dftd/functions/distance.py | 10 +++++++--- torch_dftd/functions/smoothing.py | 19 ++++++++++++------- torch_dftd/functions/triplets.py | 13 ++++++++++--- torch_dftd/nn/dftd2_module.py | 2 +- 4 files changed, 30 insertions(+), 14 deletions(-) diff --git a/torch_dftd/functions/distance.py b/torch_dftd/functions/distance.py index 847cf26..614ad4c 100644 --- a/torch_dftd/functions/distance.py +++ b/torch_dftd/functions/distance.py @@ -38,7 +38,7 @@ def calc_distances( return Dij -if __name__ == '__main__': +if __name__ == "__main__": num_runs = 50 torch._C._jit_set_num_profiled_runs(num_runs) print("profiled runs: ", torch._C._jit_get_num_profiled_runs()) @@ -47,7 +47,9 @@ def calc_distances( # device = "cpu" n_atoms = 1000 pos = torch.randn(n_atoms, 3, device=device) * n_atoms - edge_index = torch.stack([torch.arange(n_atoms), torch.arange(n_atoms - 1, -1, -1)], dim=0).to(device) + edge_index = torch.stack([torch.arange(n_atoms), torch.arange(n_atoms - 1, -1, -1)], dim=0).to( + device + ) print("edge_index", edge_index.shape) calc_distances(pos, edge_index) from time import perf_counter @@ -61,4 +63,6 @@ def calc_distances( e0 = perf_counter() time_list.append(e0 - s0) print("Time: ", time_list) - import IPython; IPython.embed() + import IPython + + IPython.embed() diff --git a/torch_dftd/functions/smoothing.py b/torch_dftd/functions/smoothing.py index fe5f0ba..2d431b1 100644 --- a/torch_dftd/functions/smoothing.py +++ b/torch_dftd/functions/smoothing.py @@ -26,8 +26,9 @@ def poly_smoothing(r: Tensor, cutoff: float) -> Tensor: ) -if __name__ == '__main__': +if __name__ == "__main__": from time import perf_counter + device = "cpu" n_edges = 10000 r = torch.rand(n_edges).to(device) * 10.0 @@ -44,9 +45,13 @@ def poly_smoothing(r: Tensor, cutoff: float) -> Tensor: import numpy as np time_array = np.array(time_list) - print("time:", - np.mean(time_array), - np.mean(time_array[10:]), - np.mean(time_array[50:]), - np.mean(time_array[100:])) - import IPython; IPython.embed() + print( + "time:", + np.mean(time_array), + np.mean(time_array[10:]), + np.mean(time_array[50:]), + np.mean(time_array[100:]), + ) + import IPython + + IPython.embed() diff --git a/torch_dftd/functions/triplets.py b/torch_dftd/functions/triplets.py index 61d537a..37b6dfd 100644 --- a/torch_dftd/functions/triplets.py +++ b/torch_dftd/functions/triplets.py @@ -1,4 +1,4 @@ -from typing import Optional, Tuple, List +from typing import List, Optional, Tuple import torch from torch import Tensor @@ -6,8 +6,15 @@ @torch.jit.script -def _calc_triplets_core(counts: Tensor, unique: Tensor, dst: Tensor, edge_indices: Tensor, batch_edge: Tensor, counts_cumsum: Tensor, - dtype: torch.dtype): +def _calc_triplets_core( + counts: Tensor, + unique: Tensor, + dst: Tensor, + edge_indices: Tensor, + batch_edge: Tensor, + counts_cumsum: Tensor, + dtype: torch.dtype, +): device = unique.device n_triplets = torch.sum(counts * (counts - 1) / 2) if n_triplets == 0: diff --git a/torch_dftd/nn/dftd2_module.py b/torch_dftd/nn/dftd2_module.py index 8a70a07..cc08a03 100644 --- a/torch_dftd/nn/dftd2_module.py +++ b/torch_dftd/nn/dftd2_module.py @@ -56,7 +56,7 @@ def calc_energy_batch( batch_edge: Optional[Tensor] = None, damping: str = "zero", autoang: float = d3_autoang, - autoev: float = d3_autoev + autoev: float = d3_autoev, ) -> Tensor: """Forward computation to calculate atomic wise dispersion energy""" shift_pos = pos.new_zeros((edge_index.size()[1], 3, 3)) if shift_pos is None else shift_pos From c45e19ea952fd962b87a490f79bac065c5651bd5 Mon Sep 17 00:00:00 2001 From: corochann Date: Mon, 18 Oct 2021 17:44:07 +0900 Subject: [PATCH 03/14] fix --- torch_dftd/functions/dftd3.py | 6 +++--- torch_dftd/functions/triplets_kernel.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/torch_dftd/functions/dftd3.py b/torch_dftd/functions/dftd3.py index ef1c9fb..85d80c1 100644 --- a/torch_dftd/functions/dftd3.py +++ b/torch_dftd/functions/dftd3.py @@ -54,7 +54,7 @@ def _ncoord( Zi = Z[idx_i] Zj = Z[idx_j] rco = rcov[Zi] + rcov[Zj] # (n_edges,) - rr = rco.type(r.dtype) / r + rr = rco.to(r.dtype) / r damp = 1.0 / (1.0 + torch.exp(-k1 * (rr - 1.0))) if cutoff is not None and cutoff_smoothing == "poly": damp *= poly_smoothing(r, cutoff) @@ -86,7 +86,7 @@ def _getc6( """ # gather the relevant entries from the table # c6ab (95, 95, 5, 5, 3) --> c6ab_ (n_edges, 5, 5, 3) - c6ab_ = c6ab[Zi, Zj].type(nci.dtype) + c6ab_ = c6ab[Zi, Zj].to(nci.dtype) # calculate c6 coefficients # cn0, cn1, cn2 (n_edges, 5, 5) @@ -195,7 +195,7 @@ def edisp( ncj = nc[idx_j] c6 = _getc6(Zi, Zj, nci, ncj, c6ab=c6ab, k3=k3) # c6 coefficients - c8 = 3 * c6 * r2r4[Zi].type(c6.dtype) * r2r4[Zj].type(c6.dtype) # c8 coefficient + c8 = 3 * c6 * r2r4[Zi].to(c6.dtype) * r2r4[Zj].to(c6.dtype) # c8 coefficient s6 = params["s6"] s8 = params["s18"] diff --git a/torch_dftd/functions/triplets_kernel.py b/torch_dftd/functions/triplets_kernel.py index 34e24a2..edf5f75 100644 --- a/torch_dftd/functions/triplets_kernel.py +++ b/torch_dftd/functions/triplets_kernel.py @@ -113,7 +113,7 @@ def _cupy2torch(array: cp.ndarray) -> Tensor: _calc_triplets_core_gpu_kernel = None -@torch.jit.unused +@torch.jit.ignore def _calc_triplets_core_gpu( counts: Tensor, unique: Tensor, From e2f94e812758da75836110ab714faa850b1c6c1d Mon Sep 17 00:00:00 2001 From: corochann Date: Mon, 18 Oct 2021 18:07:58 +0900 Subject: [PATCH 04/14] tmp --- torch_dftd/functions/distance.py | 12 +++++++++++- torch_dftd/functions/smoothing.py | 9 +++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/torch_dftd/functions/distance.py b/torch_dftd/functions/distance.py index 614ad4c..25a85f1 100644 --- a/torch_dftd/functions/distance.py +++ b/torch_dftd/functions/distance.py @@ -40,7 +40,13 @@ def calc_distances( if __name__ == "__main__": num_runs = 50 - torch._C._jit_set_num_profiled_runs(num_runs) + + old_prof_exec_state = torch._C._jit_set_profiling_executor(False) + old_prof_mode_state = torch._C._jit_set_profiling_mode(False) + old_num_prof_runs = torch._C._jit_set_num_profiled_runs(num_runs) + print("old_prof_exec_state", old_prof_exec_state, + "old_prof_mode_state", old_prof_mode_state, + "old_num_prof_runs", old_num_prof_runs) print("profiled runs: ", torch._C._jit_get_num_profiled_runs()) device = "cuda:0" @@ -59,6 +65,10 @@ def calc_distances( torch.cuda.synchronize() s0 = perf_counter() calc_distances(pos, edge_index) + # if i < 10: + # print(f"----- {i} ------------------------") + # print(torch.jit.last_executed_optimized_graph()) + torch.cuda.synchronize() e0 = perf_counter() time_list.append(e0 - s0) diff --git a/torch_dftd/functions/smoothing.py b/torch_dftd/functions/smoothing.py index 2d431b1..9f230f3 100644 --- a/torch_dftd/functions/smoothing.py +++ b/torch_dftd/functions/smoothing.py @@ -29,6 +29,15 @@ def poly_smoothing(r: Tensor, cutoff: float) -> Tensor: if __name__ == "__main__": from time import perf_counter + num_runs = 50 + old_prof_exec_state = torch._C._jit_set_profiling_executor(False) + old_prof_mode_state = torch._C._jit_set_profiling_mode(False) + old_num_prof_runs = torch._C._jit_set_num_profiled_runs(num_runs) + print("old_prof_exec_state", old_prof_exec_state, + "old_prof_mode_state", old_prof_mode_state, + "old_num_prof_runs", old_num_prof_runs) + print("profiled runs: ", torch._C._jit_get_num_profiled_runs()) + device = "cpu" n_edges = 10000 r = torch.rand(n_edges).to(device) * 10.0 From c9c3a3490dfba049bc29155600f2998c58ad249b Mon Sep 17 00:00:00 2001 From: corochann Date: Mon, 18 Oct 2021 21:44:45 +0900 Subject: [PATCH 05/14] update --- torch_dftd/dftd3_xc_params.py | 2 +- torch_dftd/functions/distance.py | 11 +- torch_dftd/functions/smoothing.py | 11 +- torch_dftd/nn/base_dftd_module.py | 152 ++++++++++++++++++++------- torch_dftd/nn/dftd2_module.py | 3 +- torch_dftd/nn/dftd3_module.py | 5 +- torch_dftd/torch_dftd3_calculator.py | 6 +- 7 files changed, 138 insertions(+), 52 deletions(-) diff --git a/torch_dftd/dftd3_xc_params.py b/torch_dftd/dftd3_xc_params.py index 00c806c..6789deb 100644 --- a/torch_dftd/dftd3_xc_params.py +++ b/torch_dftd/dftd3_xc_params.py @@ -535,7 +535,7 @@ def get_dftd3_default_params( rs6 = 1.1 s18 = 0.0 alp = 20.0 - rs18 = None # Not used. + rs18 = 0.0 # It is DUMMY value. Not used. if xc == "b-lyp": s6 = 1.2 elif xc == "b-p": diff --git a/torch_dftd/functions/distance.py b/torch_dftd/functions/distance.py index 25a85f1..033b2ca 100644 --- a/torch_dftd/functions/distance.py +++ b/torch_dftd/functions/distance.py @@ -44,9 +44,14 @@ def calc_distances( old_prof_exec_state = torch._C._jit_set_profiling_executor(False) old_prof_mode_state = torch._C._jit_set_profiling_mode(False) old_num_prof_runs = torch._C._jit_set_num_profiled_runs(num_runs) - print("old_prof_exec_state", old_prof_exec_state, - "old_prof_mode_state", old_prof_mode_state, - "old_num_prof_runs", old_num_prof_runs) + print( + "old_prof_exec_state", + old_prof_exec_state, + "old_prof_mode_state", + old_prof_mode_state, + "old_num_prof_runs", + old_num_prof_runs, + ) print("profiled runs: ", torch._C._jit_get_num_profiled_runs()) device = "cuda:0" diff --git a/torch_dftd/functions/smoothing.py b/torch_dftd/functions/smoothing.py index 9f230f3..b006dab 100644 --- a/torch_dftd/functions/smoothing.py +++ b/torch_dftd/functions/smoothing.py @@ -33,9 +33,14 @@ def poly_smoothing(r: Tensor, cutoff: float) -> Tensor: old_prof_exec_state = torch._C._jit_set_profiling_executor(False) old_prof_mode_state = torch._C._jit_set_profiling_mode(False) old_num_prof_runs = torch._C._jit_set_num_profiled_runs(num_runs) - print("old_prof_exec_state", old_prof_exec_state, - "old_prof_mode_state", old_prof_mode_state, - "old_num_prof_runs", old_num_prof_runs) + print( + "old_prof_exec_state", + old_prof_exec_state, + "old_prof_mode_state", + old_prof_mode_state, + "old_num_prof_runs", + old_num_prof_runs, + ) print("profiled runs: ", torch._C._jit_get_num_profiled_runs()) device = "cpu" diff --git a/torch_dftd/nn/base_dftd_module.py b/torch_dftd/nn/base_dftd_module.py index 48248ff..57f4c40 100644 --- a/torch_dftd/nn/base_dftd_module.py +++ b/torch_dftd/nn/base_dftd_module.py @@ -1,15 +1,14 @@ -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple, Union -import numpy as np import torch -from ase.neighborlist import primitive_neighbor_list -from ase.units import Bohr from torch import Tensor, nn +from torch_dftd.functions.dftd3 import d3_autoang, d3_autoev class BaseDFTDModule(nn.Module): """BaseDFTDModule""" + @torch.jit.ignore def calc_energy_batch( self, Z: Tensor, @@ -21,6 +20,8 @@ def calc_energy_batch( batch: Optional[Tensor] = None, batch_edge: Optional[Tensor] = None, damping: str = "zero", + autoang: float = d3_autoang, + autoev: float = d3_autoev, ) -> Tensor: """Forward computation to calculate atomic wise dispersion energy. @@ -36,12 +37,15 @@ def calc_energy_batch( batch (Tensor): (n_atoms,) Specify which graph this atom belongs to batch_edge (Tensor): (n_edges, 3) Specify which graph this edge belongs to damping (str): + autoang (float): + autoev (float): Returns: energy (Tensor): (n_atoms,) """ raise NotImplementedError() + @torch.jit.export def calc_energy( self, Z: Tensor, @@ -53,7 +57,9 @@ def calc_energy( batch: Optional[Tensor] = None, batch_edge: Optional[Tensor] = None, damping: str = "zero", - ) -> List[Dict[str, Any]]: + autoang: float = d3_autoang, + autoev: float = d3_autoev, + ) -> List[Dict[str, float]]: """Forward computation of dispersion energy Backward computation is skipped for fast computation of only energy. @@ -68,24 +74,38 @@ def calc_energy( batch (Tensor): batch_edge (Tensor): damping (str): damping method. "zero", "bj", "zerom", "bjm" + autoang (float): + autoev (float): Returns: results_list (list): calculated result. It contains calculate energy in "energy" key. """ with torch.no_grad(): E_disp = self.calc_energy_batch( - Z, pos, edge_index, cell, pbc, shift_pos, batch, batch_edge, damping=damping + Z, + pos, + edge_index, + cell, + pbc, + shift_pos, + batch, + batch_edge, + damping=damping, + autoang=autoang, + autoev=autoev, ) + E_disp_list: List[float] = E_disp.tolist() if batch is None: - return [{"energy": E_disp.item()}] + return [{"energy": E_disp_list[0]}] else: if batch.size()[0] == 0: n_graphs = 1 else: n_graphs = int(batch[-1]) + 1 - return [{"energy": E_disp[i].item()} for i in range(n_graphs)] + return [{"energy": E_disp_list[i]} for i in range(n_graphs)] - def calc_energy_and_forces( + @torch.jit.export + def _calc_energy_and_forces_core( self, Z: Tensor, pos: Tensor, @@ -96,23 +116,9 @@ def calc_energy_and_forces( batch: Optional[Tensor] = None, batch_edge: Optional[Tensor] = None, damping: str = "zero", - ) -> List[Dict[str, Any]]: - """Forward computation of dispersion energy, force and stress - - Args: - Z (Tensor): (n_atoms,) atomic numbers. - pos (Tensor): atom positions in angstrom - cell (Tensor): cell size in angstrom, None for non periodic system. - pbc (Tensor): pbc condition, None for non periodic system. - shift_pos (Tensor): (n_atoms, 3) shift vector (length unit). - damping (str): damping method. "zero", "bj", "zerom", "bjm" - - Returns: - results (list): calculated results. Contains following: - "energy": () - "forces": (n_atoms, 3) - "stress": (6,) - """ + autoang: float = d3_autoang, + autoev: float = d3_autoev, + ) -> Tuple[Tensor, Tensor, Optional[Tensor]]: pos.requires_grad_(True) if cell is not None: # pos is depending on `cell` size @@ -123,21 +129,21 @@ def calc_energy_and_forces( shift_pos.requires_grad_(True) E_disp = self.calc_energy_batch( - Z, pos, edge_index, cell, pbc, shift_pos, batch, batch_edge, damping=damping + Z, + pos, + edge_index, + cell, + pbc, + shift_pos, + batch, + batch_edge, + damping=damping, + autoang=autoang, + autoev=autoev, ) E_disp.sum().backward() - forces = -pos.grad # [eV/angstrom] - if batch is None: - results_list = [{"energy": E_disp.item(), "forces": forces.cpu().numpy()}] - else: - if batch.size()[0] == 0: - n_graphs = 1 - else: - n_graphs = int(batch[-1]) + 1 - results_list = [{"energy": E_disp[i].item()} for i in range(n_graphs)] - for i in range(n_graphs): - results_list[i]["forces"] = forces[batch == i].cpu().numpy() + pos_grad = pos.grad if cell is not None: # stress = torch.mm(cell_grad, cell.T) / cell_volume @@ -155,9 +161,15 @@ def calc_energy_and_forces( dim=0, ) stress = cell_grad.to(cell.dtype) / cell_volume - results_list[0]["stress"] = stress.detach().cpu().numpy() + # results_list[0]["stress"] = stress.detach().cpu().numpy() else: + assert isinstance(batch, Tensor) assert isinstance(batch_edge, Tensor) + if batch.size()[0] == 0: + n_graphs = 1 + else: + n_graphs = int(batch[-1]) + 1 + # cell (bs, 3, 3) cell_volume = torch.det(cell).abs() cell_grad = pos.new_zeros((n_graphs, 6), dtype=torch.float64) @@ -172,6 +184,68 @@ def calc_energy_and_forces( (shift_pos[:, voigt_left] * shift_pos.grad[:, voigt_right]).to(torch.float64), ) stress = cell_grad.to(cell.dtype) / cell_volume[:, None] + # stress = stress.detach().cpu().numpy() + stress = stress.detach().cpu() + else: + stress = None + return E_disp, pos_grad, stress + + @torch.jit.ignore + def calc_energy_and_forces( + self, + Z: Tensor, + pos: Tensor, + edge_index: Tensor, + cell: Optional[Tensor] = None, + pbc: Optional[Tensor] = None, + shift_pos: Optional[Tensor] = None, + batch: Optional[Tensor] = None, + batch_edge: Optional[Tensor] = None, + damping: str = "zero", + autoang: float = d3_autoang, + autoev: float = d3_autoev, + ) -> List[Dict[str, Any]]: + """Forward computation of dispersion energy, force and stress + + Args: + Z (Tensor): (n_atoms,) atomic numbers. + pos (Tensor): atom positions in angstrom + cell (Tensor): cell size in angstrom, None for non periodic system. + pbc (Tensor): pbc condition, None for non periodic system. + shift_pos (Tensor): (n_atoms, 3) shift vector (length unit). + damping (str): damping method. "zero", "bj", "zerom", "bjm" + autoang (float): + autoev (float): + + Returns: + results (list): calculated results. Contains following: + "energy": () + "forces": (n_atoms, 3) + "stress": (6,) + """ + E_disp, pos_grad, stress = self._calc_energy_and_forces_core( + Z, pos, edge_index, cell, pbc, shift_pos, batch, batch_edge, damping, autoang, autoev + ) + + forces = pos_grad + n_graphs = 0 # Just to declare for torch.jit.script. + if batch is None: + results_list = [{"energy": E_disp.item(), "forces": forces.cpu().numpy()}] + else: + if batch.size()[0] == 0: + n_graphs = 1 + else: + n_graphs = int(batch[-1]) + 1 + results_list = [{"energy": E_disp[i].item()} for i in range(n_graphs)] + for i in range(n_graphs): + results_list[i]["forces"] = forces[batch == i].cpu().numpy() + + if stress is not None: + # stress = torch.mm(cell_grad, cell.T) / cell_volume + # Get stress in Voigt notation (xx, yy, zz, yz, xz, xy) + if batch is None: + results_list[0]["stress"] = stress.detach().cpu().numpy() + else: stress = stress.detach().cpu().numpy() for i in range(n_graphs): results_list[i]["stress"] = stress[i] diff --git a/torch_dftd/nn/dftd2_module.py b/torch_dftd/nn/dftd2_module.py index cc08a03..5940af2 100644 --- a/torch_dftd/nn/dftd2_module.py +++ b/torch_dftd/nn/dftd2_module.py @@ -44,6 +44,7 @@ def __init__( # atom pair distance (95, 95) self.register_buffer("r0ab", r0ab) + @torch.jit.export def calc_energy_batch( self, Z: Tensor, @@ -78,7 +79,7 @@ def calc_energy_batch( params=self.params, damping=damping, bidirectional=self.bidirectional, - cutoff=self.cutoff / Bohr, + cutoff=self.cutoff / autoang, batch=batch, batch_edge=batch_edge, cutoff_smoothing=self.cutoff_smoothing, diff --git a/torch_dftd/nn/dftd3_module.py b/torch_dftd/nn/dftd3_module.py index a40c0f6..5259086 100644 --- a/torch_dftd/nn/dftd3_module.py +++ b/torch_dftd/nn/dftd3_module.py @@ -68,6 +68,7 @@ def __init__( self.bidirectional = bidirectional self.cutoff_smoothing = cutoff_smoothing + @torch.jit.export def calc_energy_batch( self, Z: Tensor, @@ -101,8 +102,8 @@ def calc_energy_batch( rcov=self.rcov, r2r4=self.r2r4, params=self.params, - cutoff=self.cutoff / Bohr, - cnthr=self.cnthr / Bohr, + cutoff=self.cutoff / autoang, + cnthr=self.cnthr / autoang, batch=batch, batch_edge=batch_edge, shift_pos=shift_bohr, diff --git a/torch_dftd/torch_dftd3_calculator.py b/torch_dftd/torch_dftd3_calculator.py index 5c050ad..fa68456 100644 --- a/torch_dftd/torch_dftd3_calculator.py +++ b/torch_dftd/torch_dftd3_calculator.py @@ -58,7 +58,7 @@ def __init__( self.old = old self.device = torch.device(device) if old: - self.dftd_module: torch.nn.Module = DFTD2Module( + dftd_module: torch.nn.Module = DFTD2Module( self.params, cutoff=cutoff, dtype=dtype, @@ -66,7 +66,7 @@ def __init__( cutoff_smoothing=cutoff_smoothing, ) else: - self.dftd_module = DFTD3Module( + dftd_module = DFTD3Module( self.params, cutoff=cutoff, cnthr=cnthr, @@ -75,7 +75,7 @@ def __init__( bidirectional=bidirectional, cutoff_smoothing=cutoff_smoothing, ) - self.dftd_module.to(device) + self.dftd_module = torch.jit.script(dftd_module.to(device)) self.dtype = dtype self.cutoff = cutoff self.bidirectional = bidirectional From f3d0f54fff8807fd605711aa0f5b480a2f187a5b Mon Sep 17 00:00:00 2001 From: corochann Date: Tue, 19 Oct 2021 13:32:57 +0900 Subject: [PATCH 06/14] Use original conversion. --- torch_dftd/nn/dftd2_module.py | 3 ++- torch_dftd/nn/dftd3_module.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/torch_dftd/nn/dftd2_module.py b/torch_dftd/nn/dftd2_module.py index 5940af2..d6269a9 100644 --- a/torch_dftd/nn/dftd2_module.py +++ b/torch_dftd/nn/dftd2_module.py @@ -38,6 +38,7 @@ def __init__( self.dtype = dtype self.bidirectional = bidirectional self.cutoff_smoothing = cutoff_smoothing + self._bohr = Bohr # For torch.jit, `Bohr` must be local variable. r0ab, c6ab = get_dftd2_params() # atom pair coefficient (87, 87) self.register_buffer("c6ab", c6ab) @@ -79,7 +80,7 @@ def calc_energy_batch( params=self.params, damping=damping, bidirectional=self.bidirectional, - cutoff=self.cutoff / autoang, + cutoff=self.cutoff / self._bohr, batch=batch, batch_edge=batch_edge, cutoff_smoothing=self.cutoff_smoothing, diff --git a/torch_dftd/nn/dftd3_module.py b/torch_dftd/nn/dftd3_module.py index 5259086..0003dd4 100644 --- a/torch_dftd/nn/dftd3_module.py +++ b/torch_dftd/nn/dftd3_module.py @@ -67,6 +67,7 @@ def __init__( self.dtype = dtype self.bidirectional = bidirectional self.cutoff_smoothing = cutoff_smoothing + self._bohr = Bohr # For torch.jit, `Bohr` must be local variable. @torch.jit.export def calc_energy_batch( @@ -102,8 +103,8 @@ def calc_energy_batch( rcov=self.rcov, r2r4=self.r2r4, params=self.params, - cutoff=self.cutoff / autoang, - cnthr=self.cnthr / autoang, + cutoff=self.cutoff / self._bohr, + cnthr=self.cnthr / self._bohr, batch=batch, batch_edge=batch_edge, shift_pos=shift_bohr, From ab17f589d8f7802d854980b2ff55ea924f68bc3c Mon Sep 17 00:00:00 2001 From: corochann Date: Tue, 19 Oct 2021 13:44:49 +0900 Subject: [PATCH 07/14] refactor --- torch_dftd/functions/dftd3.py | 7 ----- torch_dftd/functions/distance.py | 45 ------------------------------- torch_dftd/functions/smoothing.py | 45 ------------------------------- torch_dftd/nn/base_dftd_module.py | 13 +++++---- 4 files changed, 6 insertions(+), 104 deletions(-) diff --git a/torch_dftd/functions/dftd3.py b/torch_dftd/functions/dftd3.py index 85d80c1..f13a47d 100644 --- a/torch_dftd/functions/dftd3.py +++ b/torch_dftd/functions/dftd3.py @@ -287,12 +287,6 @@ def edisp( # (n_edges, ) -> (n_edges * 2, ) shift_abc = None if shift_abc is None else torch.cat([shift_abc, -shift_abc], dim=0) with torch.no_grad(): - # triplet_node_index, triplet_edge_index = calc_triplets_cycle(edge_index_abc, n_atoms, shift=shift_abc) - # Type hinting - # triplet_node_index: Tensor - # multiplicity: Tensor - # edge_jk: Tensor - # batch_triplets: Optional[Tensor] assert pos is not None triplet_node_index, multiplicity, edge_jk, batch_triplets = calc_triplets( edge_index_abc, @@ -309,7 +303,6 @@ def edisp( ) r_jk = calc_distances(pos, torch.stack([idx_j, idx_k], dim=0), cell, shift_jk) kj_within_cutoff = r_jk <= cnthr - # del shift_jk triplet_node_index = triplet_node_index[kj_within_cutoff] multiplicity, edge_jk, batch_triplets = ( diff --git a/torch_dftd/functions/distance.py b/torch_dftd/functions/distance.py index 033b2ca..90a4f0b 100644 --- a/torch_dftd/functions/distance.py +++ b/torch_dftd/functions/distance.py @@ -36,48 +36,3 @@ def calc_distances( # eps is to avoid Nan in backward when Dij = 0 with sqrt. Dij = torch.sqrt(torch.sum((Ri - Rj) ** 2, dim=-1) + eps) return Dij - - -if __name__ == "__main__": - num_runs = 50 - - old_prof_exec_state = torch._C._jit_set_profiling_executor(False) - old_prof_mode_state = torch._C._jit_set_profiling_mode(False) - old_num_prof_runs = torch._C._jit_set_num_profiled_runs(num_runs) - print( - "old_prof_exec_state", - old_prof_exec_state, - "old_prof_mode_state", - old_prof_mode_state, - "old_num_prof_runs", - old_num_prof_runs, - ) - print("profiled runs: ", torch._C._jit_get_num_profiled_runs()) - - device = "cuda:0" - # device = "cpu" - n_atoms = 1000 - pos = torch.randn(n_atoms, 3, device=device) * n_atoms - edge_index = torch.stack([torch.arange(n_atoms), torch.arange(n_atoms - 1, -1, -1)], dim=0).to( - device - ) - print("edge_index", edge_index.shape) - calc_distances(pos, edge_index) - from time import perf_counter - - time_list = [] - for i in range(100): - torch.cuda.synchronize() - s0 = perf_counter() - calc_distances(pos, edge_index) - # if i < 10: - # print(f"----- {i} ------------------------") - # print(torch.jit.last_executed_optimized_graph()) - - torch.cuda.synchronize() - e0 = perf_counter() - time_list.append(e0 - s0) - print("Time: ", time_list) - import IPython - - IPython.embed() diff --git a/torch_dftd/functions/smoothing.py b/torch_dftd/functions/smoothing.py index b006dab..e993e39 100644 --- a/torch_dftd/functions/smoothing.py +++ b/torch_dftd/functions/smoothing.py @@ -24,48 +24,3 @@ def poly_smoothing(r: Tensor, cutoff: float) -> Tensor: torch.ones_like(x), torch.where(r >= cutoff, torch.zeros_like(x), 6 * x5 - 15 * x4 + 10 * x3), ) - - -if __name__ == "__main__": - from time import perf_counter - - num_runs = 50 - old_prof_exec_state = torch._C._jit_set_profiling_executor(False) - old_prof_mode_state = torch._C._jit_set_profiling_mode(False) - old_num_prof_runs = torch._C._jit_set_num_profiled_runs(num_runs) - print( - "old_prof_exec_state", - old_prof_exec_state, - "old_prof_mode_state", - old_prof_mode_state, - "old_num_prof_runs", - old_num_prof_runs, - ) - print("profiled runs: ", torch._C._jit_get_num_profiled_runs()) - - device = "cpu" - n_edges = 10000 - r = torch.rand(n_edges).to(device) * 10.0 - - time_list = [] - for i in range(200): - torch.cuda.synchronize() - s0 = perf_counter() - r2 = poly_smoothing(r, 5.0) - torch.cuda.synchronize() - e0 = perf_counter() - time_list.append(e0 - s0) - print("Time: ", time_list) - import numpy as np - - time_array = np.array(time_list) - print( - "time:", - np.mean(time_array), - np.mean(time_array[10:]), - np.mean(time_array[50:]), - np.mean(time_array[100:]), - ) - import IPython - - IPython.embed() diff --git a/torch_dftd/nn/base_dftd_module.py b/torch_dftd/nn/base_dftd_module.py index 57f4c40..590418e 100644 --- a/torch_dftd/nn/base_dftd_module.py +++ b/torch_dftd/nn/base_dftd_module.py @@ -161,7 +161,6 @@ def _calc_energy_and_forces_core( dim=0, ) stress = cell_grad.to(cell.dtype) / cell_volume - # results_list[0]["stress"] = stress.detach().cpu().numpy() else: assert isinstance(batch, Tensor) assert isinstance(batch_edge, Tensor) @@ -184,8 +183,7 @@ def _calc_energy_and_forces_core( (shift_pos[:, voigt_left] * shift_pos.grad[:, voigt_right]).to(torch.float64), ) stress = cell_grad.to(cell.dtype) / cell_volume[:, None] - # stress = stress.detach().cpu().numpy() - stress = stress.detach().cpu() + stress = stress else: stress = None return E_disp, pos_grad, stress @@ -227,18 +225,19 @@ def calc_energy_and_forces( Z, pos, edge_index, cell, pbc, shift_pos, batch, batch_edge, damping, autoang, autoev ) - forces = pos_grad + forces = (-pos_grad).cpu().numpy() n_graphs = 0 # Just to declare for torch.jit.script. if batch is None: - results_list = [{"energy": E_disp.item(), "forces": forces.cpu().numpy()}] + results_list = [{"energy": E_disp.item(), "forces": forces}] else: if batch.size()[0] == 0: n_graphs = 1 else: n_graphs = int(batch[-1]) + 1 - results_list = [{"energy": E_disp[i].item()} for i in range(n_graphs)] + E_disp_list = E_disp.tolist() + results_list = [{"energy": E_disp_list[i]} for i in range(n_graphs)] for i in range(n_graphs): - results_list[i]["forces"] = forces[batch == i].cpu().numpy() + results_list[i]["forces"] = forces[batch == i] if stress is not None: # stress = torch.mm(cell_grad, cell.T) / cell_volume From 0ae6e75a02db754e272818e45c54ca8bed2a901f Mon Sep 17 00:00:00 2001 From: corochann Date: Wed, 20 Oct 2021 05:14:23 +0900 Subject: [PATCH 08/14] update --- .flexci/pytest_script.sh | 2 +- torch_dftd/functions/triplets.py | 9 +++++---- torch_dftd/functions/triplets_kernel.py | 22 ++++++++++++++++++++-- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/.flexci/pytest_script.sh b/.flexci/pytest_script.sh index 0a0960b..fa395e4 100644 --- a/.flexci/pytest_script.sh +++ b/.flexci/pytest_script.sh @@ -20,7 +20,7 @@ main() { docker run --runtime=nvidia --rm --volume="$(pwd)":/workspace -w /workspace \ ${IMAGE} \ bash -x -c "pip install flake8 pytest pytest-cov pytest-xdist pytest-benchmark && \ - pip install cupy-cuda102 pytorch-pfn-extras!=0.5.0 && \ + pip install cupy-cuda102 pytorch-pfn-extras<0.5.0 && \ pip install -e .[develop] && \ pysen run lint && \ pytest --cov=torch_dftd -n $(nproc) -m 'not slow' tests && diff --git a/torch_dftd/functions/triplets.py b/torch_dftd/functions/triplets.py index 37b6dfd..50c3297 100644 --- a/torch_dftd/functions/triplets.py +++ b/torch_dftd/functions/triplets.py @@ -13,10 +13,10 @@ def _calc_triplets_core( edge_indices: Tensor, batch_edge: Tensor, counts_cumsum: Tensor, - dtype: torch.dtype, -): + dtype: torch.dtype = torch.float32, +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: device = unique.device - n_triplets = torch.sum(counts * (counts - 1) / 2) + n_triplets = int(torch.sum(counts * (counts - 1) / 2)) if n_triplets == 0: # (n_triplet_edges, 3) triplet_node_index = torch.zeros((0, 3), dtype=torch.long, device=device) @@ -35,11 +35,12 @@ def _calc_triplets_core( unique_list: List[int] = unique.tolist() dst_list: List[int] = dst.tolist() + counts_list: List[int] = counts.tolist() counts_cumsum_list: List[int] = counts_cumsum.tolist() batch_edge_list: List[int] = batch_edge.tolist() for i in range(len(unique)): _src: int = unique_list[i] - _n_edges = counts_cumsum_list[i] + _n_edges: int = counts_list[i] _dst: List[int] = dst_list[counts_cumsum_list[i] : counts_cumsum_list[i + 1]] _offset = counts_cumsum_list[i] _batch_index = batch_edge_list[counts_cumsum_list[i]] diff --git a/torch_dftd/functions/triplets_kernel.py b/torch_dftd/functions/triplets_kernel.py index edf5f75..52c5edc 100644 --- a/torch_dftd/functions/triplets_kernel.py +++ b/torch_dftd/functions/triplets_kernel.py @@ -114,15 +114,15 @@ def _cupy2torch(array: cp.ndarray) -> Tensor: @torch.jit.ignore -def _calc_triplets_core_gpu( +def _calc_triplets_core_gpu_run_kernel( counts: Tensor, unique: Tensor, dst: Tensor, edge_indices: Tensor, batch_edge: Tensor, counts_cumsum: Tensor, - dtype: torch.dtype = torch.float32, ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + dtype: torch.dtype = torch.float32 if not _ppe_available: raise ImportError("Please install pytorch_pfn_extras to use `_calc_triplets_core_gpu`!") if not _cupy_available: @@ -157,3 +157,21 @@ def _calc_triplets_core_gpu( ) # torch tensor buffer is already modified in above cupy functions. return triplet_node_index, multiplicity, edge_jk, batch_triplets + + +@torch.jit.script +def _calc_triplets_core_gpu( + counts: Tensor, + unique: Tensor, + dst: Tensor, + edge_indices: Tensor, + batch_edge: Tensor, + counts_cumsum: Tensor, + dtype: torch.dtype = torch.float32, +) -> Tuple[Tensor, Tensor, Tensor, Tensor]: + # dtype cannot be used inside @torch.jit.ignore function... + # https://github.com/pytorch/pytorch/issues/51941 + triplet_node_index, multiplicity, edge_jk, batch_triplets = _calc_triplets_core_gpu_run_kernel( + counts, unique, dst, edge_indices, batch_edge, counts_cumsum + ) + return triplet_node_index, multiplicity.to(dtype), edge_jk, batch_triplets From 5e2c452f80ae7c4954c6e2258337353e5baad979 Mon Sep 17 00:00:00 2001 From: corochann Date: Wed, 20 Oct 2021 05:21:41 +0900 Subject: [PATCH 09/14] update --- .flexci/pytest_script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.flexci/pytest_script.sh b/.flexci/pytest_script.sh index fa395e4..cae4544 100644 --- a/.flexci/pytest_script.sh +++ b/.flexci/pytest_script.sh @@ -20,7 +20,7 @@ main() { docker run --runtime=nvidia --rm --volume="$(pwd)":/workspace -w /workspace \ ${IMAGE} \ bash -x -c "pip install flake8 pytest pytest-cov pytest-xdist pytest-benchmark && \ - pip install cupy-cuda102 pytorch-pfn-extras<0.5.0 && \ + pip install cupy-cuda102 'pytorch-pfn-extras<0.5.0' && \ pip install -e .[develop] && \ pysen run lint && \ pytest --cov=torch_dftd -n $(nproc) -m 'not slow' tests && From d80c8231c3bc4d8f24c366c6606af9f2c1f84ca4 Mon Sep 17 00:00:00 2001 From: corochann Date: Wed, 20 Oct 2021 05:43:24 +0900 Subject: [PATCH 10/14] update test env --- .flexci/build_and_push.sh | 6 +++--- .flexci/pytest_script.sh | 2 +- README.md | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.flexci/build_and_push.sh b/.flexci/build_and_push.sh index 2e8a018..75a00a5 100644 --- a/.flexci/build_and_push.sh +++ b/.flexci/build_and_push.sh @@ -32,10 +32,10 @@ docker_build_and_push() { WAIT_PIDS="" # PyTorch 1.5 + Python 3.6 -docker_build_and_push torch15 \ +docker_build_and_push torch19 \ --build-arg base_image="nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04" \ - --build-arg python_version="3.6.12" \ - --build-arg pip_packages="torch==1.5.* torchvision==0.6.* ${TEST_PIP_PACKAGES}" & + --build-arg python_version="3.7.9" \ + --build-arg pip_packages="torch==1.9.1 ${TEST_PIP_PACKAGES}" & WAIT_PIDS="$! ${WAIT_PIDS}" # Wait until the build complete. diff --git a/.flexci/pytest_script.sh b/.flexci/pytest_script.sh index cae4544..25deb85 100644 --- a/.flexci/pytest_script.sh +++ b/.flexci/pytest_script.sh @@ -20,7 +20,7 @@ main() { docker run --runtime=nvidia --rm --volume="$(pwd)":/workspace -w /workspace \ ${IMAGE} \ bash -x -c "pip install flake8 pytest pytest-cov pytest-xdist pytest-benchmark && \ - pip install cupy-cuda102 'pytorch-pfn-extras<0.5.0' && \ + pip install cupy-cuda102 pytorch-pfn-extras==0.4.2 && \ pip install -e .[develop] && \ pysen run lint && \ pytest --cov=torch_dftd -n $(nproc) -m 'not slow' tests && diff --git a/README.md b/README.md index 50ecbea..8a5ba44 100644 --- a/README.md +++ b/README.md @@ -34,14 +34,14 @@ print(f"forces {forces}") ## Dependency The library is tested under following environment. - - python: 3.6 + - python: 3.7 - CUDA: 10.2 ```bash -torch==1.5.1 +torch==1.9.1 ase==3.21.1 # Below is only for 3-body term -cupy-cuda102==8.6.0 -pytorch-pfn-extras==0.3.2 +cupy-cuda102==9.5.0 +pytorch-pfn-extras==0.4.2 ``` ## Development tips From ba0b172fd06829dc2ba5776d83d3c1142ae58134 Mon Sep 17 00:00:00 2001 From: corochann Date: Wed, 20 Oct 2021 09:45:21 +0900 Subject: [PATCH 11/14] update --- .flexci/pytest_script.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.flexci/pytest_script.sh b/.flexci/pytest_script.sh index 25deb85..8ffba7f 100644 --- a/.flexci/pytest_script.sh +++ b/.flexci/pytest_script.sh @@ -2,7 +2,7 @@ set -eu #IMAGE=pytorch/pytorch:1.5.1-cuda10.1-cudnn7-devel -IMAGE=asia.gcr.io/pfn-public-ci/torch-dftd-ci:torch15 +IMAGE=asia.gcr.io/pfn-public-ci/torch-dftd-ci:torch19 main() { From ca6d235381185e274359712b3c2687d745a674c3 Mon Sep 17 00:00:00 2001 From: corochann Date: Wed, 20 Oct 2021 11:41:59 +0900 Subject: [PATCH 12/14] Use numpy array --- torch_dftd/nn/base_dftd_module.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torch_dftd/nn/base_dftd_module.py b/torch_dftd/nn/base_dftd_module.py index 590418e..dee3679 100644 --- a/torch_dftd/nn/base_dftd_module.py +++ b/torch_dftd/nn/base_dftd_module.py @@ -236,8 +236,9 @@ def calc_energy_and_forces( n_graphs = int(batch[-1]) + 1 E_disp_list = E_disp.tolist() results_list = [{"energy": E_disp_list[i]} for i in range(n_graphs)] + batch_array = batch.cpu().numpy() for i in range(n_graphs): - results_list[i]["forces"] = forces[batch == i] + results_list[i]["forces"] = forces[batch_array == i] if stress is not None: # stress = torch.mm(cell_grad, cell.T) / cell_volume From 75b5a9e729d420d0e27d2d93009e25c3a02e6eb9 Mon Sep 17 00:00:00 2001 From: corochann Date: Wed, 20 Oct 2021 12:21:57 +0900 Subject: [PATCH 13/14] bug fix? --- torch_dftd/nn/dftd2_module.py | 2 +- torch_dftd/nn/dftd3_module.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/torch_dftd/nn/dftd2_module.py b/torch_dftd/nn/dftd2_module.py index d6269a9..af9862b 100644 --- a/torch_dftd/nn/dftd2_module.py +++ b/torch_dftd/nn/dftd2_module.py @@ -61,7 +61,7 @@ def calc_energy_batch( autoev: float = d3_autoev, ) -> Tensor: """Forward computation to calculate atomic wise dispersion energy""" - shift_pos = pos.new_zeros((edge_index.size()[1], 3, 3)) if shift_pos is None else shift_pos + shift_pos = pos.new_zeros((edge_index.size()[1], 3)) if shift_pos is None else shift_pos pos_bohr = pos / autoang # angstrom -> bohr if cell is None: cell_bohr: Optional[Tensor] = None diff --git a/torch_dftd/nn/dftd3_module.py b/torch_dftd/nn/dftd3_module.py index 0003dd4..e5fbaac 100644 --- a/torch_dftd/nn/dftd3_module.py +++ b/torch_dftd/nn/dftd3_module.py @@ -85,7 +85,7 @@ def calc_energy_batch( autoev: float = d3_autoev, ) -> Tensor: """Forward computation to calculate atomic wise dispersion energy""" - shift_pos = pos.new_zeros((edge_index.size()[1], 3, 3)) if shift_pos is None else shift_pos + shift_pos = pos.new_zeros((edge_index.size()[1], 3)) if shift_pos is None else shift_pos pos_bohr = pos / autoang # angstrom -> bohr if cell is None: cell_bohr: Optional[Tensor] = None From 14f6dfcdd717c15446cccb4a873c0e8240cdec6c Mon Sep 17 00:00:00 2001 From: corochann Date: Wed, 20 Oct 2021 15:31:08 +0900 Subject: [PATCH 14/14] fix docstring --- torch_dftd/functions/dftd3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch_dftd/functions/dftd3.py b/torch_dftd/functions/dftd3.py index f13a47d..f69667f 100644 --- a/torch_dftd/functions/dftd3.py +++ b/torch_dftd/functions/dftd3.py @@ -149,7 +149,7 @@ def edisp( cnthr (float or None): cutoff distance for coordination number calculation in **bohr** batch (Tensor or None): (n_atoms,) batch_edge (Tensor or None): (n_edges,) - shift_pos (Tensor or None): (n_atoms,) used to calculate 3-body term when abc=True + shift_pos (Tensor or None): (n_edges, 3) used to calculate 3-body term when abc=True pos (Tensor): (n_atoms, 3) position in **bohr** cell (Tensor): (3, 3) cell size in **bohr** r2 (Tensor or None):