From 9f20564da8ebe00c8ef7434eba97898fddac12da Mon Sep 17 00:00:00 2001 From: anyangml Date: Mon, 8 Apr 2024 01:57:10 +0000 Subject: [PATCH 01/27] chore: change name to atom --- deepmd/dpmodel/fitting/dipole_fitting.py | 6 ++---- deepmd/dpmodel/fitting/polarizability_fitting.py | 6 ++---- deepmd/entrypoints/test.py | 12 ++++++------ deepmd/pt/loss/tensor.py | 8 ++++---- deepmd/pt/model/task/dipole.py | 5 ++--- deepmd/pt/model/task/polarizability.py | 9 ++++----- deepmd/tf/fit/dipole.py | 1 - deepmd/tf/fit/polar.py | 1 - source/tests/consistent/fitting/test_dipole.py | 1 - source/tests/consistent/fitting/test_polar.py | 1 - source/tests/pt/model/test_polar_stat.py | 14 ++++++++++---- source/tests/tf/test_dp_test.py | 4 ++-- 12 files changed, 32 insertions(+), 36 deletions(-) diff --git a/deepmd/dpmodel/fitting/dipole_fitting.py b/deepmd/dpmodel/fitting/dipole_fitting.py index 6d6324770c..53ebad0a3b 100644 --- a/deepmd/dpmodel/fitting/dipole_fitting.py +++ b/deepmd/dpmodel/fitting/dipole_fitting.py @@ -36,8 +36,6 @@ class DipoleFitting(GeneralFitting): Parameters ---------- - var_name - The name of the output variable. ntypes The number of atom types. dim_descrpt @@ -86,7 +84,6 @@ class DipoleFitting(GeneralFitting): def __init__( self, - var_name: str, ntypes: int, dim_descrpt: int, embedding_width: int, @@ -124,7 +121,7 @@ def __init__( self.r_differentiable = r_differentiable self.c_differentiable = c_differentiable super().__init__( - var_name=var_name, + var_name="dipole", ntypes=ntypes, dim_descrpt=dim_descrpt, neuron=neuron, @@ -161,6 +158,7 @@ def serialize(self) -> dict: def deserialize(cls, data: dict) -> "GeneralFitting": data = copy.deepcopy(data) check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("var_name", None) return super().deserialize(data) def output_def(self): diff --git a/deepmd/dpmodel/fitting/polarizability_fitting.py b/deepmd/dpmodel/fitting/polarizability_fitting.py index 5d75037137..0ca2a489fc 100644 --- a/deepmd/dpmodel/fitting/polarizability_fitting.py +++ b/deepmd/dpmodel/fitting/polarizability_fitting.py @@ -39,8 +39,6 @@ class PolarFitting(GeneralFitting): Parameters ---------- - var_name - The name of the output variable. ntypes The number of atom types. dim_descrpt @@ -88,7 +86,6 @@ class PolarFitting(GeneralFitting): def __init__( self, - var_name: str, ntypes: int, dim_descrpt: int, embedding_width: int, @@ -145,7 +142,7 @@ def __init__( self.shift_diag = shift_diag self.constant_matrix = np.zeros(ntypes, dtype=GLOBAL_NP_FLOAT_PRECISION) super().__init__( - var_name=var_name, + var_name="polar", ntypes=ntypes, dim_descrpt=dim_descrpt, neuron=neuron, @@ -201,6 +198,7 @@ def serialize(self) -> dict: def deserialize(cls, data: dict) -> "GeneralFitting": data = copy.deepcopy(data) check_version_compatibility(data.pop("@version", 1), 2, 1) + data.pop("var_name", None) return super().deserialize(data) def output_def(self): diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py index cad6e12d2b..7b8c227ead 100644 --- a/deepmd/entrypoints/test.py +++ b/deepmd/entrypoints/test.py @@ -870,7 +870,7 @@ def test_polar( arrays with results and their shapes """ data.add( - "polarizability" if not atomic else "atomic_polarizability", + "polarizability" if not atomic else "atom_polarizability", 9, atomic=atomic, must=True, @@ -897,7 +897,7 @@ def test_polar( polar = polar.reshape((polar.shape[0], -1, 9))[:, sel_mask, :].reshape( (polar.shape[0], -1) ) - rmse_f = rmse(polar - test_data["atomic_polarizability"][:numb_test]) + rmse_f = rmse(polar - test_data["atom_polarizability"][:numb_test]) log.info(f"# number of test data : {numb_test:d} ") log.info(f"Polarizability RMSE : {rmse_f:e}") @@ -926,7 +926,7 @@ def test_polar( pe = np.concatenate( ( np.reshape( - test_data["atomic_polarizability"][:numb_test], + test_data["atom_polarizability"][:numb_test], [-1, 9 * sel_natoms], ), np.reshape(polar, [-1, 9 * sel_natoms]), @@ -1011,7 +1011,7 @@ def test_dipole( arrays with results and their shapes """ data.add( - "dipole" if not atomic else "atomic_dipole", + "dipole" if not atomic else "atom_dipole", 3, atomic=atomic, must=True, @@ -1037,7 +1037,7 @@ def test_dipole( dipole = dipole.reshape((dipole.shape[0], -1, 3))[:, sel_mask, :].reshape( (dipole.shape[0], -1) ) - rmse_f = rmse(dipole - test_data["atomic_dipole"][:numb_test]) + rmse_f = rmse(dipole - test_data["atom_dipole"][:numb_test]) log.info(f"# number of test data : {numb_test:d}") log.info(f"Dipole RMSE : {rmse_f:e}") @@ -1061,7 +1061,7 @@ def test_dipole( pe = np.concatenate( ( np.reshape( - test_data["atomic_dipole"][:numb_test], [-1, 3 * sel_natoms] + test_data["atom_dipole"][:numb_test], [-1, 3 * sel_natoms] ), np.reshape(dipole, [-1, 3 * sel_natoms]), ), diff --git a/deepmd/pt/loss/tensor.py b/deepmd/pt/loss/tensor.py index 3dd91d203e..34957815b5 100644 --- a/deepmd/pt/loss/tensor.py +++ b/deepmd/pt/loss/tensor.py @@ -93,14 +93,14 @@ def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False if ( self.has_local_weight and self.tensor_name in model_pred - and "atomic_" + self.label_name in label + and "atom_" + self.label_name in label ): - find_local = label.get("find_" + "atomic_" + self.label_name, 0.0) + find_local = label.get("find_" + "atom_" + self.label_name, 0.0) local_weight = self.local_weight * find_local local_tensor_pred = model_pred[self.tensor_name].reshape( [-1, natoms, self.tensor_size] ) - local_tensor_label = label["atomic_" + self.label_name].reshape( + local_tensor_label = label["atom_" + self.label_name].reshape( [-1, natoms, self.tensor_size] ) diff = (local_tensor_pred - local_tensor_label).reshape( @@ -157,7 +157,7 @@ def label_requirement(self) -> List[DataRequirementItem]: if self.has_local_weight: label_requirement.append( DataRequirementItem( - "atomic_" + self.label_name, + "atom_" + self.label_name, ndof=self.tensor_size, atomic=True, must=False, diff --git a/deepmd/pt/model/task/dipole.py b/deepmd/pt/model/task/dipole.py index ca445c8588..cddbbf5291 100644 --- a/deepmd/pt/model/task/dipole.py +++ b/deepmd/pt/model/task/dipole.py @@ -39,8 +39,6 @@ class DipoleFittingNet(GeneralFitting): Parameters ---------- - var_name : str - The atomic property to fit, 'dipole'. ntypes : int Element count. dim_descrpt : int @@ -97,7 +95,7 @@ def __init__( self.r_differentiable = r_differentiable self.c_differentiable = c_differentiable super().__init__( - var_name=kwargs.pop("var_name", "dipole"), + var_name="dipole", ntypes=ntypes, dim_descrpt=dim_descrpt, neuron=neuron, @@ -131,6 +129,7 @@ def serialize(self) -> dict: def deserialize(cls, data: dict) -> "GeneralFitting": data = copy.deepcopy(data) check_version_compatibility(data.pop("@version", 1), 1, 1) + data.pop("var_name", None) return super().deserialize(data) def output_def(self) -> FittingOutputDef: diff --git a/deepmd/pt/model/task/polarizability.py b/deepmd/pt/model/task/polarizability.py index 544d23555c..7bf82f8a09 100644 --- a/deepmd/pt/model/task/polarizability.py +++ b/deepmd/pt/model/task/polarizability.py @@ -47,8 +47,6 @@ class PolarFittingNet(GeneralFitting): Parameters ---------- - var_name : str - The atomic property to fit, 'polar'. ntypes : int Element count. dim_descrpt : int @@ -127,7 +125,7 @@ def __init__( ntypes, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE ) super().__init__( - var_name=kwargs.pop("var_name", "polar"), + var_name="polar", ntypes=ntypes, dim_descrpt=dim_descrpt, neuron=neuron, @@ -180,6 +178,7 @@ def serialize(self) -> dict: def deserialize(cls, data: dict) -> "GeneralFitting": data = copy.deepcopy(data) check_version_compatibility(data.pop("@version", 1), 2, 1) + data.pop("var_name", None) return super().deserialize(data) def output_def(self) -> FittingOutputDef: @@ -232,9 +231,9 @@ def compute_output_stats( for sys in range(len(sampled)): nframs = sampled[sys]["atype"].shape[0] - if sampled[sys]["find_atomic_polarizability"] > 0.0: + if sampled[sys]["find_atom_polarizability"] > 0.0: sys_atom_polar = compute_stats_from_atomic( - sampled[sys]["atomic_polarizability"].numpy(force=True), + sampled[sys]["atom_polarizability"].numpy(force=True), sampled[sys]["atype"].numpy(force=True), )[0] else: diff --git a/deepmd/tf/fit/dipole.py b/deepmd/tf/fit/dipole.py index f98d52c7bd..d99c793415 100644 --- a/deepmd/tf/fit/dipole.py +++ b/deepmd/tf/fit/dipole.py @@ -362,7 +362,6 @@ def serialize(self, suffix: str) -> dict: "@class": "Fitting", "type": "dipole", "@version": 1, - "var_name": "dipole", "ntypes": self.ntypes, "dim_descrpt": self.dim_descrpt, "embedding_width": self.dim_rot_mat_1, diff --git a/deepmd/tf/fit/polar.py b/deepmd/tf/fit/polar.py index 473b57ff54..c124bd3ef4 100644 --- a/deepmd/tf/fit/polar.py +++ b/deepmd/tf/fit/polar.py @@ -555,7 +555,6 @@ def serialize(self, suffix: str) -> dict: "@class": "Fitting", "type": "polar", "@version": 1, - "var_name": "polar", "ntypes": self.ntypes, "dim_descrpt": self.dim_descrpt, "embedding_width": self.dim_rot_mat_1, diff --git a/source/tests/consistent/fitting/test_dipole.py b/source/tests/consistent/fitting/test_dipole.py index 18a29934ca..4f33d58c10 100644 --- a/source/tests/consistent/fitting/test_dipole.py +++ b/source/tests/consistent/fitting/test_dipole.py @@ -94,7 +94,6 @@ def addtional_data(self) -> dict: "ntypes": self.ntypes, "dim_descrpt": self.inputs.shape[-1], "mixed_types": mixed_types, - "var_name": "dipole", "embedding_width": 30, } diff --git a/source/tests/consistent/fitting/test_polar.py b/source/tests/consistent/fitting/test_polar.py index 5b55c6d333..a6e0e07784 100644 --- a/source/tests/consistent/fitting/test_polar.py +++ b/source/tests/consistent/fitting/test_polar.py @@ -94,7 +94,6 @@ def addtional_data(self) -> dict: "ntypes": self.ntypes, "dim_descrpt": self.inputs.shape[-1], "mixed_types": mixed_types, - "var_name": "polar", "embedding_width": 30, } diff --git a/source/tests/pt/model/test_polar_stat.py b/source/tests/pt/model/test_polar_stat.py index 3d72c6e8fa..6826c33b39 100644 --- a/source/tests/pt/model/test_polar_stat.py +++ b/source/tests/pt/model/test_polar_stat.py @@ -31,8 +31,8 @@ def setUp(self) -> None: self.sampled = [ { "atype": types, - "find_atomic_polarizability": find_atomic_polarizability, - "atomic_polarizability": atomic_polarizability, + "find_atom_polarizability": find_atomic_polarizability, + "atom_polarizability": atomic_polarizability, "polarizability": polarizability, "find_polarizability": find_polarizability, } @@ -41,6 +41,12 @@ def setUp(self) -> None: k: [v.numpy(force=True)] for d in self.sampled for k, v in d.items() } self.all_stat["type"] = self.all_stat.pop("atype") + self.all_stat["find_atomic_polarizability"] = self.all_stat.pop( + "find_atom_polarizability" + ) + self.all_stat["atomic_polarizability"] = self.all_stat.pop( + "atom_polarizability" + ) self.tfpolar = PolarFittingSeA( ntypes=ntypes, dim_descrpt=1, @@ -61,9 +67,9 @@ def test_atomic_consistency(self): np.testing.assert_allclose(tfbias, to_numpy_array(ptbias)) def test_global_consistency(self): - self.sampled[0]["find_atomic_polarizability"] = -1 + self.sampled[0]["find_atom_polarizability"] = -1 self.sampled[0]["polarizability"] = self.sampled[0][ - "atomic_polarizability" + "atom_polarizability" ].sum(dim=1) self.all_stat["find_atomic_polarizability"] = [-1] self.all_stat["polarizability"] = [ diff --git a/source/tests/tf/test_dp_test.py b/source/tests/tf/test_dp_test.py index 9a3dde3da0..b9a9706da2 100644 --- a/source/tests/tf/test_dp_test.py +++ b/source/tests/tf/test_dp_test.py @@ -224,7 +224,7 @@ def setUp(self): ] ) self.expected_global_d = np.sum(self.expected_d.reshape(1, -1, 3), axis=1) - np.save(Path(self.test_data) / "set.000" / "atomic_dipole.npy", self.expected_d) + np.save(Path(self.test_data) / "set.000" / "atom_dipole.npy", self.expected_d) np.save(Path(self.test_data) / "set.000" / "dipole.npy", self.expected_global_d) def test_1frame(self): @@ -296,7 +296,7 @@ def setUp(self): ) self.expected_global_d = np.sum(self.expected_d.reshape(1, -1, 9), axis=1) np.save( - Path(self.test_data) / "set.000" / "atomic_polarizability.npy", + Path(self.test_data) / "set.000" / "atom_polarizability.npy", self.expected_d, ) np.save( From 71659247ee9a1d421dbf878f8100c23aff72a339 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 Apr 2024 01:58:57 +0000 Subject: [PATCH 02/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- source/tests/pt/model/test_polar_stat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/tests/pt/model/test_polar_stat.py b/source/tests/pt/model/test_polar_stat.py index 6826c33b39..b61455d524 100644 --- a/source/tests/pt/model/test_polar_stat.py +++ b/source/tests/pt/model/test_polar_stat.py @@ -68,9 +68,9 @@ def test_atomic_consistency(self): def test_global_consistency(self): self.sampled[0]["find_atom_polarizability"] = -1 - self.sampled[0]["polarizability"] = self.sampled[0][ - "atom_polarizability" - ].sum(dim=1) + self.sampled[0]["polarizability"] = self.sampled[0]["atom_polarizability"].sum( + dim=1 + ) self.all_stat["find_atomic_polarizability"] = [-1] self.all_stat["polarizability"] = [ self.all_stat["atomic_polarizability"][0].sum(axis=1) From 6c299fa6e41789c50518727f5887855e3598336f Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Mon, 8 Apr 2024 13:46:01 +0800 Subject: [PATCH 03/27] chore: refactor global stat --- deepmd/pt/utils/stat.py | 229 ++++++++++++------ .../tests/pt/model/test_atomic_model_stat.py | 2 + 2 files changed, 153 insertions(+), 78 deletions(-) diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index d85741b231..d083eecd75 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -171,7 +171,7 @@ def model_forward_auto_batch_size(*args, **kwargs): for kk in keys: model_predict[kk].append( to_numpy_array( - torch.sum(sample_predict[kk], dim=1) # nf x nloc x odims + sample_predict[kk] # nf x nloc x odims ) ) model_predict = {kk: np.concatenate(model_predict[kk]) for kk in keys} @@ -246,87 +246,160 @@ def compute_output_stats( # failed to restore the bias from stat file. compute if bias_atom_e is None: - # only get data for once - sampled = merged() if callable(merged) else merged - # remove the keys that are not in the sample - keys = [keys] if isinstance(keys, str) else keys - assert isinstance(keys, list) - new_keys = [ii for ii in keys if ii in sampled[0].keys()] - del keys - keys = new_keys - # get label dict from sample - outputs = {kk: [item[kk] for item in sampled] for kk in keys} - data_mixed_type = "real_natoms_vec" in sampled[0] - natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec" - for system in sampled: - if "atom_exclude_types" in system: - type_mask = AtomExcludeMask( - ntypes, system["atom_exclude_types"] - ).get_type_mask() - system[natoms_key][:, 2:] *= type_mask.unsqueeze(0) - input_natoms = [item[natoms_key] for item in sampled] - # shape: (nframes, ndim) - merged_output = {kk: to_numpy_array(torch.cat(outputs[kk])) for kk in keys} - # shape: (nframes, ntypes) - merged_natoms = to_numpy_array(torch.cat(input_natoms)[:, 2:]) - nf = merged_natoms.shape[0] - if preset_bias is not None: - assigned_atom_ener = { - kk: _make_preset_out_bias(ntypes, preset_bias[kk]) - if kk in preset_bias.keys() - else None - for kk in keys - } + + + # only get data once, sampled is a list of dict[str, torch.Tensor] + sampled = merged() if callable(merged) else merged + if model_forward is not None: + model_pred = _compute_model_predict(sampled, keys, model_forward) else: - assigned_atom_ener = {kk: None for kk in keys} - - if model_forward is None: - stats_input = merged_output - else: - # subtract the model bias and output the delta bias - model_predict = _compute_model_predict(sampled, keys, model_forward) - stats_input = {kk: merged_output[kk] - model_predict[kk] for kk in keys} - - bias_atom_e = {} - std_atom_e = {} - for kk in keys: - bias_atom_e[kk], std_atom_e[kk] = compute_stats_from_redu( - stats_input[kk], - merged_natoms, - assigned_bias=assigned_atom_ener[kk], - rcond=rcond, + model_pred = None + + # split system based on label + atomic_sampled = {} + global_sampled = {} + """ + case1: system-1 global dipole and atomic polar, system-2 global dipole and global polar + dipole,sys1 --> add to global_sampled + dipole,sys2 --> add to global_sampled + polar, sys1 --> add to atomic_sampled + polar, sys2 --> do nothing + global_sampled : [sys1, sys2] + atomic_sampled : [sys1] + """ + for kk in keys: + for idx, system in enumerate(sampled): + if (("find_atom_" + kk) in system) and (system["find_atom_" + kk] > 0.0) and (idx not in atomic_sampled): + atomic_sampled[idx] = system + elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0) and (idx not in global_sampled): + global_sampled[idx] = system + else: + continue + + atomic_sampled = list(atomic_sampled.values()) + global_sampled = list(global_sampled.values()) + if len(global_sampled) > 0: + bias_atom_e, std_atom_e = compute_output_stats_global( + global_sampled, + ntypes, + keys, + rcond, + preset_bias, + model_pred, ) - bias_atom_e, std_atom_e = _post_process_stat(bias_atom_e, std_atom_e) - - # unbias_e is only used for print rmse - if model_forward is None: - unbias_e = { - kk: merged_natoms @ bias_atom_e[kk].reshape(ntypes, -1) for kk in keys - } - else: - unbias_e = { - kk: model_predict[kk].reshape(nf, -1) - + merged_natoms @ bias_atom_e[kk].reshape(ntypes, -1) - for kk in keys - } - atom_numbs = merged_natoms.sum(-1) - - def rmse(x): - return np.sqrt(np.mean(np.square(x))) - - for kk in keys: - rmse_ae = rmse( - (unbias_e[kk].reshape(nf, -1) - merged_output[kk].reshape(nf, -1)) - / atom_numbs[:, None] - ) - log.info( - f"RMSE of {kk} per atom after linear regression is: {rmse_ae} in the unit of {kk}." + + if len(atomic_sampled) > 0: + bias_atom_e, std_atom_e = compute_output_stats_atomic( + global_sampled, + ntypes, + keys, + rcond, + preset_bias, + model_pred, ) - + + # need to merge dict if stat_file_path is not None: _save_to_file(stat_file_path, bias_atom_e, std_atom_e) - ret_bias = {kk: to_torch_tensor(vv) for kk, vv in bias_atom_e.items()} - ret_std = {kk: to_torch_tensor(vv) for kk, vv in std_atom_e.items()} + bias_atom_e = {kk: to_torch_tensor(vv) for kk, vv in bias_atom_e.items()} + std_atom_e = {kk: to_torch_tensor(vv) for kk, vv in std_atom_e.items()} + return bias_atom_e, std_atom_e - return ret_bias, ret_std +def compute_output_stats_global( + sampled: List[dict], + ntypes: int, + keys: List[str], + rcond: Optional[float] = None, + preset_bias: Optional[Dict[str, List[Optional[torch.Tensor]]]] = None, + model_pred: Optional[Dict[str, np.ndarray]] = None, +): + """This function only handle stat computation from reduced global labels.""" + + # remove the keys that are not in the sample + keys = [keys] if isinstance(keys, str) else keys + assert isinstance(keys, list) + new_keys = [ii for ii in keys if ii in sampled[0].keys()] + del keys + keys = new_keys + + # get label dict from sample; for each key, only picking the system with global labels. + outputs = {kk: [system[kk] for system in sampled if kk in system and system.get(f"find_{kk}", 0) > 0] for kk in keys} + + data_mixed_type = "real_natoms_vec" in sampled[0] + natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec" + for system in sampled: + if "atom_exclude_types" in system: + type_mask = AtomExcludeMask( + ntypes, system["atom_exclude_types"] + ).get_type_mask() + system[natoms_key][:, 2:] *= type_mask.unsqueeze(0) + input_natoms = [item[natoms_key] for item in sampled] + # shape: (nframes, ndim) + merged_output = {kk: to_numpy_array(torch.cat(outputs[kk])) for kk in keys} + # shape: (nframes, ntypes) + merged_natoms = to_numpy_array(torch.cat(input_natoms)[:, 2:]) + nf = merged_natoms.shape[0] + if preset_bias is not None: + assigned_atom_ener = { + kk: _make_preset_out_bias(ntypes, preset_bias[kk]) + if kk in preset_bias.keys() + else None + for kk in keys + } + else: + assigned_atom_ener = {kk: None for kk in keys} + + if model_pred is None: + stats_input = merged_output + else: + # subtract the model bias and output the delta bias + model_pred = {kk: np.sum(model_pred[kk], axis=1) for kk in keys} + stats_input = {kk: merged_output[kk] - model_pred[kk] for kk in keys} + + bias_atom_e = {} + std_atom_e = {} + for kk in keys: + bias_atom_e[kk], std_atom_e[kk] = compute_stats_from_redu( + stats_input[kk], + merged_natoms, + assigned_bias=assigned_atom_ener[kk], + rcond=rcond, + ) + bias_atom_e, std_atom_e = _post_process_stat(bias_atom_e, std_atom_e) + + # unbias_e is only used for print rmse + if model_pred is None: + unbias_e = { + kk: merged_natoms @ bias_atom_e[kk].reshape(ntypes, -1) for kk in keys + } + else: + unbias_e = { + kk: model_pred[kk].reshape(nf, -1) + + merged_natoms @ bias_atom_e[kk].reshape(ntypes, -1) + for kk in keys + } + atom_numbs = merged_natoms.sum(-1) + + def rmse(x): + return np.sqrt(np.mean(np.square(x))) + + for kk in keys: + rmse_ae = rmse( + (unbias_e[kk].reshape(nf, -1) - merged_output[kk].reshape(nf, -1)) + / atom_numbs[:, None] + ) + log.info( + f"RMSE of {kk} per atom after linear regression is: {rmse_ae} in the unit of {kk}." + ) + return bias_atom_e, std_atom_e + +def compute_output_stats_atomic( + sampled: List[dict], + ntypes: int, + keys: List[str], + rcond: Optional[float] = None, + preset_bias: Optional[Dict[str, List[Optional[torch.Tensor]]]] = None, + model_pred: Optional[Dict[str, np.ndarray]] = None, +): + pass \ No newline at end of file diff --git a/source/tests/pt/model/test_atomic_model_stat.py b/source/tests/pt/model/test_atomic_model_stat.py index e266cf215a..c5f149055e 100644 --- a/source/tests/pt/model/test_atomic_model_stat.py +++ b/source/tests/pt/model/test_atomic_model_stat.py @@ -150,6 +150,8 @@ def setUp(self): "bar": to_torch_tensor( np.array([5.0, 12.0, 7.0, 9.0]).reshape(2, 1, 2) ), + "find_foo": np.float32(1.0), + "find_bar": np.float32(1.0) } ] self.tempdir = tempfile.TemporaryDirectory() From 80159e179957ded3d0a4f7e416a9931e2d151eed Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Mon, 8 Apr 2024 13:51:03 +0800 Subject: [PATCH 04/27] chore: refactor global stat --- source/tests/pt/model/test_linear_atomic_model_stat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/source/tests/pt/model/test_linear_atomic_model_stat.py b/source/tests/pt/model/test_linear_atomic_model_stat.py index 010cecf9f8..f7feeda550 100644 --- a/source/tests/pt/model/test_linear_atomic_model_stat.py +++ b/source/tests/pt/model/test_linear_atomic_model_stat.py @@ -154,6 +154,7 @@ def setUp(self): ), # bias of foo: 1, 3 "energy": to_torch_tensor(np.array([5.0, 7.0]).reshape(2, 1)), + "find_energy": np.float32(1.0), } ] self.tempdir = tempfile.TemporaryDirectory() From a83e7573760573f71be1ffc167b083a8c689bc74 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 Apr 2024 05:51:45 +0000 Subject: [PATCH 05/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deepmd/pt/utils/stat.py | 42 ++++++++++++------- .../tests/pt/model/test_atomic_model_stat.py | 2 +- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index d083eecd75..e3fc92c508 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -246,10 +246,8 @@ def compute_output_stats( # failed to restore the bias from stat file. compute if bias_atom_e is None: - - # only get data once, sampled is a list of dict[str, torch.Tensor] - sampled = merged() if callable(merged) else merged + sampled = merged() if callable(merged) else merged if model_forward is not None: model_pred = _compute_model_predict(sampled, keys, model_forward) else: @@ -267,11 +265,19 @@ def compute_output_stats( global_sampled : [sys1, sys2] atomic_sampled : [sys1] """ - for kk in keys: + for kk in keys: for idx, system in enumerate(sampled): - if (("find_atom_" + kk) in system) and (system["find_atom_" + kk] > 0.0) and (idx not in atomic_sampled): - atomic_sampled[idx] = system - elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0) and (idx not in global_sampled): + if ( + (("find_atom_" + kk) in system) + and (system["find_atom_" + kk] > 0.0) + and (idx not in atomic_sampled) + ): + atomic_sampled[idx] = system + elif ( + (("find_" + kk) in system) + and (system["find_" + kk] > 0.0) + and (idx not in global_sampled) + ): global_sampled[idx] = system else: continue @@ -287,7 +293,7 @@ def compute_output_stats( preset_bias, model_pred, ) - + if len(atomic_sampled) > 0: bias_atom_e, std_atom_e = compute_output_stats_atomic( global_sampled, @@ -297,14 +303,15 @@ def compute_output_stats( preset_bias, model_pred, ) - + # need to merge dict if stat_file_path is not None: _save_to_file(stat_file_path, bias_atom_e, std_atom_e) bias_atom_e = {kk: to_torch_tensor(vv) for kk, vv in bias_atom_e.items()} std_atom_e = {kk: to_torch_tensor(vv) for kk, vv in std_atom_e.items()} - return bias_atom_e, std_atom_e + return bias_atom_e, std_atom_e + def compute_output_stats_global( sampled: List[dict], @@ -315,7 +322,6 @@ def compute_output_stats_global( model_pred: Optional[Dict[str, np.ndarray]] = None, ): """This function only handle stat computation from reduced global labels.""" - # remove the keys that are not in the sample keys = [keys] if isinstance(keys, str) else keys assert isinstance(keys, list) @@ -324,7 +330,14 @@ def compute_output_stats_global( keys = new_keys # get label dict from sample; for each key, only picking the system with global labels. - outputs = {kk: [system[kk] for system in sampled if kk in system and system.get(f"find_{kk}", 0) > 0] for kk in keys} + outputs = { + kk: [ + system[kk] + for system in sampled + if kk in system and system.get(f"find_{kk}", 0) > 0 + ] + for kk in keys + } data_mixed_type = "real_natoms_vec" in sampled[0] natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec" @@ -367,7 +380,7 @@ def compute_output_stats_global( rcond=rcond, ) bias_atom_e, std_atom_e = _post_process_stat(bias_atom_e, std_atom_e) - + # unbias_e is only used for print rmse if model_pred is None: unbias_e = { @@ -394,6 +407,7 @@ def rmse(x): ) return bias_atom_e, std_atom_e + def compute_output_stats_atomic( sampled: List[dict], ntypes: int, @@ -402,4 +416,4 @@ def compute_output_stats_atomic( preset_bias: Optional[Dict[str, List[Optional[torch.Tensor]]]] = None, model_pred: Optional[Dict[str, np.ndarray]] = None, ): - pass \ No newline at end of file + pass diff --git a/source/tests/pt/model/test_atomic_model_stat.py b/source/tests/pt/model/test_atomic_model_stat.py index c5f149055e..6cffe573bc 100644 --- a/source/tests/pt/model/test_atomic_model_stat.py +++ b/source/tests/pt/model/test_atomic_model_stat.py @@ -151,7 +151,7 @@ def setUp(self): np.array([5.0, 12.0, 7.0, 9.0]).reshape(2, 1, 2) ), "find_foo": np.float32(1.0), - "find_bar": np.float32(1.0) + "find_bar": np.float32(1.0), } ] self.tempdir = tempfile.TemporaryDirectory() From ef95a9cceec1de8a12b368e0970e3c534930acb9 Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Mon, 8 Apr 2024 21:15:22 +0800 Subject: [PATCH 06/27] feat: add atomic bias --- deepmd/pt/utils/stat.py | 191 +++++++++------- deepmd/utils/out_stat.py | 1 + source/tests/pt/model/test_atomic_bias.py | 253 ++++++++++++++++++++++ 3 files changed, 364 insertions(+), 81 deletions(-) create mode 100644 source/tests/pt/model/test_atomic_bias.py diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index e3fc92c508..9595c73fc2 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -7,6 +7,7 @@ Optional, Union, ) +from collections import defaultdict import numpy as np import torch @@ -24,6 +25,7 @@ ) from deepmd.utils.out_stat import ( compute_stats_from_redu, + compute_stats_from_atomic, ) from deepmd.utils.path import ( DPPath, @@ -174,7 +176,6 @@ def model_forward_auto_batch_size(*args, **kwargs): sample_predict[kk] # nf x nloc x odims ) ) - model_predict = {kk: np.concatenate(model_predict[kk]) for kk in keys} return model_predict @@ -202,7 +203,6 @@ def _make_preset_out_bias( ] return np.array(nbias) - def compute_output_stats( merged: Union[Callable[[], List[dict]], List[dict]], ntypes: int, @@ -253,58 +253,64 @@ def compute_output_stats( else: model_pred = None + # remove the keys that are not in the sample + keys = [keys] if isinstance(keys, str) else keys + assert isinstance(keys, list) + new_keys = [ii for ii in keys if ii in sampled[0].keys()] + del keys + keys = new_keys + # split system based on label - atomic_sampled = {} - global_sampled = {} - """ - case1: system-1 global dipole and atomic polar, system-2 global dipole and global polar - dipole,sys1 --> add to global_sampled - dipole,sys2 --> add to global_sampled - polar, sys1 --> add to atomic_sampled - polar, sys2 --> do nothing - global_sampled : [sys1, sys2] - atomic_sampled : [sys1] - """ - for kk in keys: + atomic_sampled_idx = defaultdict(set) + global_sampled_idx = defaultdict(set) + + for kk in keys: for idx, system in enumerate(sampled): - if ( - (("find_atom_" + kk) in system) - and (system["find_atom_" + kk] > 0.0) - and (idx not in atomic_sampled) - ): - atomic_sampled[idx] = system - elif ( - (("find_" + kk) in system) - and (system["find_" + kk] > 0.0) - and (idx not in global_sampled) - ): - global_sampled[idx] = system + + if (("find_atom_" + kk) in system) and (system["find_atom_" + kk] > 0.0) and (idx not in atomic_sampled_idx[kk]): + atomic_sampled_idx[kk].add(idx) + elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0) and (idx not in global_sampled_idx[kk]): + global_sampled_idx[kk].add(idx) else: continue - atomic_sampled = list(atomic_sampled.values()) - global_sampled = list(global_sampled.values()) - if len(global_sampled) > 0: - bias_atom_e, std_atom_e = compute_output_stats_global( - global_sampled, - ntypes, - keys, - rcond, - preset_bias, - model_pred, - ) - if len(atomic_sampled) > 0: - bias_atom_e, std_atom_e = compute_output_stats_atomic( - global_sampled, - ntypes, - keys, - rcond, - preset_bias, - model_pred, - ) + # use index to gather model predictions for the corresponding systems. + model_pred_g = {kk: [vv[idx] for idx in sorted(list(global_sampled_idx[kk]))] for kk, vv in model_pred.items()} if model_pred else None + model_pred_a = {kk: [vv[idx] for idx in sorted(list(atomic_sampled_idx[kk]))] for kk, vv in model_pred.items()} if model_pred else None + # concat all frames within those systmes + model_pred_g = {kk: np.concatenate(model_pred_g[kk]) for kk in model_pred_g.keys() if len(model_pred_g[kk])>0} if model_pred else None + model_pred_a = {kk: np.concatenate(model_pred_a[kk]) for kk in model_pred_a.keys() if len(model_pred_a[kk])>0} if model_pred else None + + # compute stat + bias_atom_g, std_atom_g = compute_output_stats_global( + sampled, + ntypes, + keys, + rcond, + preset_bias, + model_pred_g, + ) + bias_atom_a, std_atom_a = compute_output_stats_atomic( + sampled, + ntypes, + keys, + model_pred_a, + ) + + # merge global/atomic bias + bias_atom_e, std_atom_e = {}, {} + for kk in keys: + if kk in bias_atom_a: + bias_atom_e[kk] = bias_atom_a[kk] + std_atom_e[kk] = std_atom_a[kk] + elif kk in bias_atom_g: + bias_atom_e[kk] = bias_atom_g[kk] + std_atom_e[kk] = std_atom_g[kk] + else: + bias_atom_e[kk] = None + std_atom_e[kk] = None - # need to merge dict if stat_file_path is not None: _save_to_file(stat_file_path, bias_atom_e, std_atom_e) @@ -322,23 +328,11 @@ def compute_output_stats_global( model_pred: Optional[Dict[str, np.ndarray]] = None, ): """This function only handle stat computation from reduced global labels.""" - # remove the keys that are not in the sample - keys = [keys] if isinstance(keys, str) else keys - assert isinstance(keys, list) - new_keys = [ii for ii in keys if ii in sampled[0].keys()] - del keys - keys = new_keys # get label dict from sample; for each key, only picking the system with global labels. - outputs = { - kk: [ - system[kk] - for system in sampled - if kk in system and system.get(f"find_{kk}", 0) > 0 - ] - for kk in keys - } - + outputs = {kk: [system[kk] for system in sampled if kk in system and system.get(f"find_{kk}", 0) > 0] for kk in keys} + + data_mixed_type = "real_natoms_vec" in sampled[0] natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec" for system in sampled: @@ -347,12 +341,14 @@ def compute_output_stats_global( ntypes, system["atom_exclude_types"] ).get_type_mask() system[natoms_key][:, 2:] *= type_mask.unsqueeze(0) - input_natoms = [item[natoms_key] for item in sampled] + # input_natoms = [item[natoms_key] for item in sampled] + + input_natoms = {kk: [item[natoms_key] for item in sampled if kk in item and item.get(f"find_{kk}", 0) > 0] for kk in keys} # shape: (nframes, ndim) merged_output = {kk: to_numpy_array(torch.cat(outputs[kk])) for kk in keys} # shape: (nframes, ntypes) - merged_natoms = to_numpy_array(torch.cat(input_natoms)[:, 2:]) - nf = merged_natoms.shape[0] + merged_natoms = {kk: to_numpy_array(torch.cat(input_natoms[kk])[:, 2:]) for kk in keys} + nf = {kk: merged_natoms[kk].shape[0] for kk in keys} if preset_bias is not None: assigned_atom_ener = { kk: _make_preset_out_bias(ntypes, preset_bias[kk]) @@ -367,40 +363,46 @@ def compute_output_stats_global( stats_input = merged_output else: # subtract the model bias and output the delta bias + + # need to find the output of the corresponding system, may need idx. model_pred = {kk: np.sum(model_pred[kk], axis=1) for kk in keys} - stats_input = {kk: merged_output[kk] - model_pred[kk] for kk in keys} + stats_input = {kk: merged_output[kk] - model_pred[kk] for kk in keys if kk in merged_output} bias_atom_e = {} std_atom_e = {} for kk in keys: - bias_atom_e[kk], std_atom_e[kk] = compute_stats_from_redu( - stats_input[kk], - merged_natoms, - assigned_bias=assigned_atom_ener[kk], - rcond=rcond, - ) + if kk in stats_input: + bias_atom_e[kk], std_atom_e[kk] = compute_stats_from_redu( + stats_input[kk], + merged_natoms[kk], + assigned_bias=assigned_atom_ener[kk], + rcond=rcond, + ) + else: + # this key does not have atomic labels, skip it. + continue bias_atom_e, std_atom_e = _post_process_stat(bias_atom_e, std_atom_e) # unbias_e is only used for print rmse if model_pred is None: unbias_e = { - kk: merged_natoms @ bias_atom_e[kk].reshape(ntypes, -1) for kk in keys + kk: merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1) for kk in keys } else: unbias_e = { - kk: model_pred[kk].reshape(nf, -1) - + merged_natoms @ bias_atom_e[kk].reshape(ntypes, -1) + kk: model_pred[kk].reshape(nf[kk], -1) + + merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1) for kk in keys } - atom_numbs = merged_natoms.sum(-1) + atom_numbs = {kk: merged_natoms[kk].sum(-1) for kk in keys} def rmse(x): return np.sqrt(np.mean(np.square(x))) for kk in keys: rmse_ae = rmse( - (unbias_e[kk].reshape(nf, -1) - merged_output[kk].reshape(nf, -1)) - / atom_numbs[:, None] + (unbias_e[kk].reshape(nf[kk], -1) - merged_output[kk].reshape(nf[kk], -1)) + / atom_numbs[kk][:, None] ) log.info( f"RMSE of {kk} per atom after linear regression is: {rmse_ae} in the unit of {kk}." @@ -412,8 +414,35 @@ def compute_output_stats_atomic( sampled: List[dict], ntypes: int, keys: List[str], - rcond: Optional[float] = None, - preset_bias: Optional[Dict[str, List[Optional[torch.Tensor]]]] = None, model_pred: Optional[Dict[str, np.ndarray]] = None, ): - pass + + # get label dict from sample; for each key, only picking the system with atomic labels. + outputs = {kk: [system[kk] for system in sampled if kk in system and system.get(f"find_atom_{kk}", 0) > 0] for kk in keys} + natoms = {kk: [system["atype"] for system in sampled if kk in system and system.get(f"find_atom_{kk}", 0) > 0] for kk in keys} + # shape: (nframes, nloc, ndim) + merged_output = {kk: to_numpy_array(torch.cat(outputs[kk])) for kk in keys if len(outputs[kk]) > 0} + merged_natoms = {kk: to_numpy_array(torch.cat(natoms[kk])) for kk in keys if len(natoms[kk]) > 0} + + if model_pred is None: + stats_input = merged_output + else: + # subtract the model bias and output the delta bias + stats_input = {kk: merged_output[kk] - model_pred[kk] for kk in keys if kk in merged_output} + + bias_atom_e = {} + std_atom_e = {} + + for kk in keys: + if kk in stats_input: + bias_atom_e[kk], std_atom_e[kk] = compute_stats_from_atomic( + stats_input[kk], + merged_natoms[kk], + ) + else: + # this key does not have atomic labels, skip it. + continue + + bias_atom_e, std_atom_e = _post_process_stat(bias_atom_e, std_atom_e) + return bias_atom_e, std_atom_e + diff --git a/deepmd/utils/out_stat.py b/deepmd/utils/out_stat.py index 1dcbcb1280..bce4442468 100644 --- a/deepmd/utils/out_stat.py +++ b/deepmd/utils/out_stat.py @@ -112,6 +112,7 @@ def compute_stats_from_atomic( assert output.ndim == 3 assert atype.ndim == 2 assert output.shape[:2] == atype.shape + # compute output bias nframes, nloc, ndim = output.shape ntypes = atype.max() + 1 diff --git a/source/tests/pt/model/test_atomic_bias.py b/source/tests/pt/model/test_atomic_bias.py new file mode 100644 index 0000000000..e2d39ddf69 --- /dev/null +++ b/source/tests/pt/model/test_atomic_bias.py @@ -0,0 +1,253 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later +import tempfile +import unittest +from pathlib import ( + Path, +) +from typing import ( + Optional, +) + +import h5py +import numpy as np +import torch + +from deepmd.dpmodel.output_def import ( + FittingOutputDef, + OutputVariableDef, +) +from deepmd.pt.model.atomic_model import ( + BaseAtomicModel, + DPAtomicModel, +) +from deepmd.pt.model.descriptor.dpa1 import ( + DescrptDPA1, +) +from deepmd.pt.model.task.base_fitting import ( + BaseFitting, +) +from deepmd.pt.utils import ( + env, +) +from deepmd.pt.utils.utils import ( + to_numpy_array, + to_torch_tensor, +) +from deepmd.utils.path import ( + DPPath, +) + +from .test_env_mat import ( + TestCaseSingleFrameWithNlist, +) + +dtype = env.GLOBAL_PT_FLOAT_PRECISION + + +class FooFitting(torch.nn.Module, BaseFitting): + def output_def(self): + return FittingOutputDef( + [ + OutputVariableDef( + "foo", + [1], + reduciable=True, + r_differentiable=True, + c_differentiable=True, + ), + OutputVariableDef( + "bar", + [1, 2], + reduciable=True, + r_differentiable=True, + c_differentiable=True, + ), + ] + ) + + def serialize(self) -> dict: + raise NotImplementedError + + def forward( + self, + descriptor: torch.Tensor, + atype: torch.Tensor, + gr: Optional[torch.Tensor] = None, + g2: Optional[torch.Tensor] = None, + h2: Optional[torch.Tensor] = None, + fparam: Optional[torch.Tensor] = None, + aparam: Optional[torch.Tensor] = None, + ): + nf, nloc, _ = descriptor.shape + ret = {} + ret["foo"] = ( + torch.Tensor( + [ + [1.0, 2.0, 3.0], + [4.0, 5.0, 6.0], + ] + ) + .view([nf, nloc] + self.output_def()["foo"].shape) + .to(env.GLOBAL_PT_FLOAT_PRECISION) + .to(env.DEVICE) + ) + ret["bar"] = ( + torch.Tensor( + [ + [1.0, 2.0, 3.0, 7.0, 8.0, 9.0], + [4.0, 5.0, 6.0, 10.0, 11.0, 12.0], + ] + ) + .view([nf, nloc] + self.output_def()["bar"].shape) + .to(env.GLOBAL_PT_FLOAT_PRECISION) + .to(env.DEVICE) + ) + return ret + + +class TestAtomicModelStat(unittest.TestCase, TestCaseSingleFrameWithNlist): + def tearDown(self): + self.tempdir.cleanup() + + def setUp(self): + TestCaseSingleFrameWithNlist.setUp(self) + nf, nloc, nnei = self.nlist.shape + self.merged_output_stat = [ + { + "coord": to_torch_tensor(np.zeros([2, 3, 3])), + "atype": to_torch_tensor( + np.array([[0, 0, 1], [0, 1, 1]], dtype=np.int32) + ), + "atype_ext": to_torch_tensor( + np.array([[0, 0, 1, 0], [0, 1, 1, 0]], dtype=np.int32) + ), + "box": to_torch_tensor(np.zeros([2, 3, 3])), + "natoms": to_torch_tensor( + np.array([[3, 3, 2, 1], [3, 3, 1, 2]], dtype=np.int32) + ), + # bias of foo: 1, 3 + "foo": to_torch_tensor(np.array([[5.0, 5.0, 5.0],[5.0, 6.0, 7.0]]).reshape(2, 3, 1)), + # bias of bar: [1, 5], [3, 2] + "bar": to_torch_tensor( + np.array([5.0, 12.0, 7.0, 9.0]).reshape(2, 1, 2) + ), + "find_atom_foo": np.float32(1.0), + "find_bar": np.float32(1.0) + }, + { + "coord": to_torch_tensor(np.zeros([2, 3, 3])), + "atype": to_torch_tensor( + np.array([[0, 0, 1], [0, 1, 1]], dtype=np.int32) + ), + "atype_ext": to_torch_tensor( + np.array([[0, 0, 1, 0], [0, 1, 1, 0]], dtype=np.int32) + ), + "box": to_torch_tensor(np.zeros([2, 3, 3])), + "natoms": to_torch_tensor( + np.array([[3, 3, 2, 1], [3, 3, 1, 2]], dtype=np.int32) + ), + # bias of foo: 5, 6 from atomic label. + "foo": to_torch_tensor(np.array([5.0, 7.0]).reshape(2, 1)), + # bias of bar: [1, 5], [3, 2] + "bar": to_torch_tensor( + np.array([5.0, 12.0, 7.0, 9.0]).reshape(2, 1, 2) + ), + "find_foo": np.float32(1.0), + "find_bar": np.float32(1.0) + } + ] + self.tempdir = tempfile.TemporaryDirectory() + h5file = str((Path(self.tempdir.name) / "testcase.h5").resolve()) + with h5py.File(h5file, "w") as f: + pass + self.stat_file_path = DPPath(h5file, "a") + + def test_output_stat(self): + nf, nloc, nnei = self.nlist.shape + ds = DescrptDPA1( + self.rcut, + self.rcut_smth, + sum(self.sel), + self.nt, + ).to(env.DEVICE) + ft = FooFitting().to(env.DEVICE) + type_map = ["foo", "bar"] + md0 = DPAtomicModel( + ds, + ft, + type_map=type_map, + ).to(env.DEVICE) + args = [ + to_torch_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist] + ] + # nf x nloc + at = self.atype_ext[:, :nloc] + + def cvt_ret(x): + return {kk: to_numpy_array(vv) for kk, vv in x.items()} + + # 1. test run without bias + # nf x na x odim + ret0 = md0.forward_common_atomic(*args) + ret0 = cvt_ret(ret0) + expected_ret0 = {} + expected_ret0["foo"] = np.array( + [ + [1.0, 2.0, 3.0], + [4.0, 5.0, 6.0], + ] + ).reshape([nf, nloc] + md0.fitting_output_def()["foo"].shape) + expected_ret0["bar"] = np.array( + [ + [1.0, 2.0, 3.0, 7.0, 8.0, 9.0], + [4.0, 5.0, 6.0, 10.0, 11.0, 12.0], + ] + ).reshape([nf, nloc] + md0.fitting_output_def()["bar"].shape) + for kk in ["foo", "bar"]: + np.testing.assert_almost_equal(ret0[kk], expected_ret0[kk]) + + # 2. test bias is applied + md0.compute_or_load_out_stat( + self.merged_output_stat, stat_file_path=self.stat_file_path + ) + ret1 = md0.forward_common_atomic(*args) + ret1 = cvt_ret(ret1) + # nt x odim + foo_bias = np.array([5.0, 6.0]).reshape(2, 1) + bar_bias = np.array([1.0, 5.0, 3.0, 2.0]).reshape(2, 1, 2) + expected_ret1 = {} + expected_ret1["foo"] = ret0["foo"] + foo_bias[at] + expected_ret1["bar"] = ret0["bar"] + bar_bias[at] + for kk in ["foo", "bar"]: + np.testing.assert_almost_equal(ret1[kk], expected_ret1[kk]) + + # 3. test bias load from file + def raise_error(): + raise RuntimeError + + md0.compute_or_load_out_stat(raise_error, stat_file_path=self.stat_file_path) + ret2 = md0.forward_common_atomic(*args) + ret2 = cvt_ret(ret2) + for kk in ["foo", "bar"]: + np.testing.assert_almost_equal(ret1[kk], ret2[kk]) + + # 4. test change bias + BaseAtomicModel.change_out_bias( + md0, self.merged_output_stat, bias_adjust_mode="change-by-statistic" + ) + args = [ + to_torch_tensor(ii) + for ii in [ + self.coord_ext, + to_numpy_array(self.merged_output_stat[0]["atype_ext"]), + self.nlist, + ] + ] + ret3 = md0.forward_common_atomic(*args) + ret3 = cvt_ret(ret3) + + expected_ret3 = {} + # new bias [2.666, 1.333] + expected_ret3["foo"] = np.array([[3.6667, 4.6667, 4.3333], [6.6667, 6.3333, 7.3333]]).reshape(2, 3, 1) + for kk in ["foo"]: + np.testing.assert_almost_equal(ret3[kk], expected_ret3[kk],decimal=4) From f9278ebd3d528868f4ffbb932b95a82eb0908f7d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 Apr 2024 13:21:18 +0000 Subject: [PATCH 07/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deepmd/pt/utils/stat.py | 137 +++++++++++++++++----- deepmd/utils/out_stat.py | 2 +- source/tests/pt/model/test_atomic_bias.py | 18 +-- 3 files changed, 118 insertions(+), 39 deletions(-) diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 9595c73fc2..70effcf23c 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -1,5 +1,8 @@ # SPDX-License-Identifier: LGPL-3.0-or-later import logging +from collections import ( + defaultdict, +) from typing import ( Callable, Dict, @@ -7,7 +10,6 @@ Optional, Union, ) -from collections import defaultdict import numpy as np import torch @@ -24,8 +26,8 @@ to_torch_tensor, ) from deepmd.utils.out_stat import ( - compute_stats_from_redu, compute_stats_from_atomic, + compute_stats_from_redu, ) from deepmd.utils.path import ( DPPath, @@ -203,6 +205,7 @@ def _make_preset_out_bias( ] return np.array(nbias) + def compute_output_stats( merged: Union[Callable[[], List[dict]], List[dict]], ntypes: int, @@ -264,24 +267,60 @@ def compute_output_stats( atomic_sampled_idx = defaultdict(set) global_sampled_idx = defaultdict(set) - for kk in keys: + for kk in keys: for idx, system in enumerate(sampled): - - if (("find_atom_" + kk) in system) and (system["find_atom_" + kk] > 0.0) and (idx not in atomic_sampled_idx[kk]): + if ( + (("find_atom_" + kk) in system) + and (system["find_atom_" + kk] > 0.0) + and (idx not in atomic_sampled_idx[kk]) + ): atomic_sampled_idx[kk].add(idx) - elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0) and (idx not in global_sampled_idx[kk]): + elif ( + (("find_" + kk) in system) + and (system["find_" + kk] > 0.0) + and (idx not in global_sampled_idx[kk]) + ): global_sampled_idx[kk].add(idx) else: continue - # use index to gather model predictions for the corresponding systems. - model_pred_g = {kk: [vv[idx] for idx in sorted(list(global_sampled_idx[kk]))] for kk, vv in model_pred.items()} if model_pred else None - model_pred_a = {kk: [vv[idx] for idx in sorted(list(atomic_sampled_idx[kk]))] for kk, vv in model_pred.items()} if model_pred else None + model_pred_g = ( + { + kk: [vv[idx] for idx in sorted(list(global_sampled_idx[kk]))] + for kk, vv in model_pred.items() + } + if model_pred + else None + ) + model_pred_a = ( + { + kk: [vv[idx] for idx in sorted(list(atomic_sampled_idx[kk]))] + for kk, vv in model_pred.items() + } + if model_pred + else None + ) # concat all frames within those systmes - model_pred_g = {kk: np.concatenate(model_pred_g[kk]) for kk in model_pred_g.keys() if len(model_pred_g[kk])>0} if model_pred else None - model_pred_a = {kk: np.concatenate(model_pred_a[kk]) for kk in model_pred_a.keys() if len(model_pred_a[kk])>0} if model_pred else None - + model_pred_g = ( + { + kk: np.concatenate(model_pred_g[kk]) + for kk in model_pred_g.keys() + if len(model_pred_g[kk]) > 0 + } + if model_pred + else None + ) + model_pred_a = ( + { + kk: np.concatenate(model_pred_a[kk]) + for kk in model_pred_a.keys() + if len(model_pred_a[kk]) > 0 + } + if model_pred + else None + ) + # compute stat bias_atom_g, std_atom_g = compute_output_stats_global( sampled, @@ -328,11 +367,16 @@ def compute_output_stats_global( model_pred: Optional[Dict[str, np.ndarray]] = None, ): """This function only handle stat computation from reduced global labels.""" - # get label dict from sample; for each key, only picking the system with global labels. - outputs = {kk: [system[kk] for system in sampled if kk in system and system.get(f"find_{kk}", 0) > 0] for kk in keys} - - + outputs = { + kk: [ + system[kk] + for system in sampled + if kk in system and system.get(f"find_{kk}", 0) > 0 + ] + for kk in keys + } + data_mixed_type = "real_natoms_vec" in sampled[0] natoms_key = "natoms" if not data_mixed_type else "real_natoms_vec" for system in sampled: @@ -342,12 +386,21 @@ def compute_output_stats_global( ).get_type_mask() system[natoms_key][:, 2:] *= type_mask.unsqueeze(0) # input_natoms = [item[natoms_key] for item in sampled] - - input_natoms = {kk: [item[natoms_key] for item in sampled if kk in item and item.get(f"find_{kk}", 0) > 0] for kk in keys} + + input_natoms = { + kk: [ + item[natoms_key] + for item in sampled + if kk in item and item.get(f"find_{kk}", 0) > 0 + ] + for kk in keys + } # shape: (nframes, ndim) merged_output = {kk: to_numpy_array(torch.cat(outputs[kk])) for kk in keys} # shape: (nframes, ntypes) - merged_natoms = {kk: to_numpy_array(torch.cat(input_natoms[kk])[:, 2:]) for kk in keys} + merged_natoms = { + kk: to_numpy_array(torch.cat(input_natoms[kk])[:, 2:]) for kk in keys + } nf = {kk: merged_natoms[kk].shape[0] for kk in keys} if preset_bias is not None: assigned_atom_ener = { @@ -363,10 +416,12 @@ def compute_output_stats_global( stats_input = merged_output else: # subtract the model bias and output the delta bias - + # need to find the output of the corresponding system, may need idx. model_pred = {kk: np.sum(model_pred[kk], axis=1) for kk in keys} - stats_input = {kk: merged_output[kk] - model_pred[kk] for kk in keys if kk in merged_output} + stats_input = { + kk: merged_output[kk] - model_pred[kk] for kk in keys if kk in merged_output + } bias_atom_e = {} std_atom_e = {} @@ -379,7 +434,7 @@ def compute_output_stats_global( rcond=rcond, ) else: - # this key does not have atomic labels, skip it. + # this key does not have atomic labels, skip it. continue bias_atom_e, std_atom_e = _post_process_stat(bias_atom_e, std_atom_e) @@ -416,20 +471,41 @@ def compute_output_stats_atomic( keys: List[str], model_pred: Optional[Dict[str, np.ndarray]] = None, ): - # get label dict from sample; for each key, only picking the system with atomic labels. - outputs = {kk: [system[kk] for system in sampled if kk in system and system.get(f"find_atom_{kk}", 0) > 0] for kk in keys} - natoms = {kk: [system["atype"] for system in sampled if kk in system and system.get(f"find_atom_{kk}", 0) > 0] for kk in keys} + outputs = { + kk: [ + system[kk] + for system in sampled + if kk in system and system.get(f"find_atom_{kk}", 0) > 0 + ] + for kk in keys + } + natoms = { + kk: [ + system["atype"] + for system in sampled + if kk in system and system.get(f"find_atom_{kk}", 0) > 0 + ] + for kk in keys + } # shape: (nframes, nloc, ndim) - merged_output = {kk: to_numpy_array(torch.cat(outputs[kk])) for kk in keys if len(outputs[kk]) > 0} - merged_natoms = {kk: to_numpy_array(torch.cat(natoms[kk])) for kk in keys if len(natoms[kk]) > 0} + merged_output = { + kk: to_numpy_array(torch.cat(outputs[kk])) + for kk in keys + if len(outputs[kk]) > 0 + } + merged_natoms = { + kk: to_numpy_array(torch.cat(natoms[kk])) for kk in keys if len(natoms[kk]) > 0 + } if model_pred is None: stats_input = merged_output else: # subtract the model bias and output the delta bias - stats_input = {kk: merged_output[kk] - model_pred[kk] for kk in keys if kk in merged_output} - + stats_input = { + kk: merged_output[kk] - model_pred[kk] for kk in keys if kk in merged_output + } + bias_atom_e = {} std_atom_e = {} @@ -440,9 +516,8 @@ def compute_output_stats_atomic( merged_natoms[kk], ) else: - # this key does not have atomic labels, skip it. + # this key does not have atomic labels, skip it. continue bias_atom_e, std_atom_e = _post_process_stat(bias_atom_e, std_atom_e) return bias_atom_e, std_atom_e - diff --git a/deepmd/utils/out_stat.py b/deepmd/utils/out_stat.py index bce4442468..9678f8ed72 100644 --- a/deepmd/utils/out_stat.py +++ b/deepmd/utils/out_stat.py @@ -112,7 +112,7 @@ def compute_stats_from_atomic( assert output.ndim == 3 assert atype.ndim == 2 assert output.shape[:2] == atype.shape - + # compute output bias nframes, nloc, ndim = output.shape ntypes = atype.max() + 1 diff --git a/source/tests/pt/model/test_atomic_bias.py b/source/tests/pt/model/test_atomic_bias.py index e2d39ddf69..562832b429 100644 --- a/source/tests/pt/model/test_atomic_bias.py +++ b/source/tests/pt/model/test_atomic_bias.py @@ -126,13 +126,15 @@ def setUp(self): np.array([[3, 3, 2, 1], [3, 3, 1, 2]], dtype=np.int32) ), # bias of foo: 1, 3 - "foo": to_torch_tensor(np.array([[5.0, 5.0, 5.0],[5.0, 6.0, 7.0]]).reshape(2, 3, 1)), + "foo": to_torch_tensor( + np.array([[5.0, 5.0, 5.0], [5.0, 6.0, 7.0]]).reshape(2, 3, 1) + ), # bias of bar: [1, 5], [3, 2] "bar": to_torch_tensor( np.array([5.0, 12.0, 7.0, 9.0]).reshape(2, 1, 2) ), "find_atom_foo": np.float32(1.0), - "find_bar": np.float32(1.0) + "find_bar": np.float32(1.0), }, { "coord": to_torch_tensor(np.zeros([2, 3, 3])), @@ -153,8 +155,8 @@ def setUp(self): np.array([5.0, 12.0, 7.0, 9.0]).reshape(2, 1, 2) ), "find_foo": np.float32(1.0), - "find_bar": np.float32(1.0) - } + "find_bar": np.float32(1.0), + }, ] self.tempdir = tempfile.TemporaryDirectory() h5file = str((Path(self.tempdir.name) / "testcase.h5").resolve()) @@ -245,9 +247,11 @@ def raise_error(): ] ret3 = md0.forward_common_atomic(*args) ret3 = cvt_ret(ret3) - + expected_ret3 = {} # new bias [2.666, 1.333] - expected_ret3["foo"] = np.array([[3.6667, 4.6667, 4.3333], [6.6667, 6.3333, 7.3333]]).reshape(2, 3, 1) + expected_ret3["foo"] = np.array( + [[3.6667, 4.6667, 4.3333], [6.6667, 6.3333, 7.3333]] + ).reshape(2, 3, 1) for kk in ["foo"]: - np.testing.assert_almost_equal(ret3[kk], expected_ret3[kk],decimal=4) + np.testing.assert_almost_equal(ret3[kk], expected_ret3[kk], decimal=4) From f6ebec1db405fc38b6ede126bccb8d33bf6509aa Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Mon, 8 Apr 2024 21:56:33 +0800 Subject: [PATCH 08/27] fix: precommit --- deepmd/pt/utils/stat.py | 51 ++++++++--------------- source/tests/pt/model/test_atomic_bias.py | 8 ++-- 2 files changed, 21 insertions(+), 38 deletions(-) diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 70effcf23c..1da48bbb0c 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -264,43 +264,26 @@ def compute_output_stats( keys = new_keys # split system based on label - atomic_sampled_idx = defaultdict(set) - global_sampled_idx = defaultdict(set) + atomic_sampled_idx = defaultdict(list) + global_sampled_idx = defaultdict(list) for kk in keys: for idx, system in enumerate(sampled): - if ( - (("find_atom_" + kk) in system) - and (system["find_atom_" + kk] > 0.0) - and (idx not in atomic_sampled_idx[kk]) - ): - atomic_sampled_idx[kk].add(idx) - elif ( - (("find_" + kk) in system) - and (system["find_" + kk] > 0.0) - and (idx not in global_sampled_idx[kk]) - ): - global_sampled_idx[kk].add(idx) + + + if (("find_atom_" + kk) in system) and (system["find_atom_" + kk] > 0.0) and (len(atomic_sampled_idx[kk])==0 or idx > atomic_sampled_idx[kk][-1]): + atomic_sampled_idx[kk].append(idx) + elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0) and (len(global_sampled_idx[kk])==0 or idx > global_sampled_idx[kk][-1]): + global_sampled_idx[kk].append(idx) + else: continue # use index to gather model predictions for the corresponding systems. - model_pred_g = ( - { - kk: [vv[idx] for idx in sorted(list(global_sampled_idx[kk]))] - for kk, vv in model_pred.items() - } - if model_pred - else None - ) - model_pred_a = ( - { - kk: [vv[idx] for idx in sorted(list(atomic_sampled_idx[kk]))] - for kk, vv in model_pred.items() - } - if model_pred - else None - ) + + model_pred_g = {kk: [vv[idx] for idx in global_sampled_idx[kk]] for kk, vv in model_pred.items()} if model_pred else None + model_pred_a = {kk: [vv[idx] for idx in atomic_sampled_idx[kk]] for kk, vv in model_pred.items()} if model_pred else None + # concat all frames within those systmes model_pred_g = ( { @@ -396,11 +379,11 @@ def compute_output_stats_global( for kk in keys } # shape: (nframes, ndim) - merged_output = {kk: to_numpy_array(torch.cat(outputs[kk])) for kk in keys} + merged_output = {kk: to_numpy_array(torch.cat(outputs[kk])) for kk in keys if len(outputs[kk])>0} # shape: (nframes, ntypes) - merged_natoms = { - kk: to_numpy_array(torch.cat(input_natoms[kk])[:, 2:]) for kk in keys - } + + merged_natoms = {kk: to_numpy_array(torch.cat(input_natoms[kk])[:, 2:]) for kk in keys if len(input_natoms[kk])>0} + nf = {kk: merged_natoms[kk].shape[0] for kk in keys} if preset_bias is not None: assigned_atom_ener = { diff --git a/source/tests/pt/model/test_atomic_bias.py b/source/tests/pt/model/test_atomic_bias.py index 562832b429..82f2bda8ec 100644 --- a/source/tests/pt/model/test_atomic_bias.py +++ b/source/tests/pt/model/test_atomic_bias.py @@ -87,7 +87,7 @@ def forward( [4.0, 5.0, 6.0], ] ) - .view([nf, nloc] + self.output_def()["foo"].shape) + .view([nf, nloc, *self.output_def()["foo"].shape]) .to(env.GLOBAL_PT_FLOAT_PRECISION) .to(env.DEVICE) ) @@ -98,7 +98,7 @@ def forward( [4.0, 5.0, 6.0, 10.0, 11.0, 12.0], ] ) - .view([nf, nloc] + self.output_def()["bar"].shape) + .view([nf, nloc, *self.output_def()["bar"].shape]) .to(env.GLOBAL_PT_FLOAT_PRECISION) .to(env.DEVICE) ) @@ -198,13 +198,13 @@ def cvt_ret(x): [1.0, 2.0, 3.0], [4.0, 5.0, 6.0], ] - ).reshape([nf, nloc] + md0.fitting_output_def()["foo"].shape) + ).reshape([nf, nloc, *md0.fitting_output_def()["foo"].shape]) expected_ret0["bar"] = np.array( [ [1.0, 2.0, 3.0, 7.0, 8.0, 9.0], [4.0, 5.0, 6.0, 10.0, 11.0, 12.0], ] - ).reshape([nf, nloc] + md0.fitting_output_def()["bar"].shape) + ).reshape([nf, nloc, *md0.fitting_output_def()["bar"].shape]) for kk in ["foo", "bar"]: np.testing.assert_almost_equal(ret0[kk], expected_ret0[kk]) From f16fdb839e726a04468f10329505fdeffb7a9759 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 Apr 2024 14:01:03 +0000 Subject: [PATCH 09/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deepmd/pt/utils/stat.py | 50 ++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 1da48bbb0c..279839c86c 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -269,11 +269,23 @@ def compute_output_stats( for kk in keys: for idx, system in enumerate(sampled): - - - if (("find_atom_" + kk) in system) and (system["find_atom_" + kk] > 0.0) and (len(atomic_sampled_idx[kk])==0 or idx > atomic_sampled_idx[kk][-1]): + if ( + (("find_atom_" + kk) in system) + and (system["find_atom_" + kk] > 0.0) + and ( + len(atomic_sampled_idx[kk]) == 0 + or idx > atomic_sampled_idx[kk][-1] + ) + ): atomic_sampled_idx[kk].append(idx) - elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0) and (len(global_sampled_idx[kk])==0 or idx > global_sampled_idx[kk][-1]): + elif ( + (("find_" + kk) in system) + and (system["find_" + kk] > 0.0) + and ( + len(global_sampled_idx[kk]) == 0 + or idx > global_sampled_idx[kk][-1] + ) + ): global_sampled_idx[kk].append(idx) else: @@ -281,8 +293,22 @@ def compute_output_stats( # use index to gather model predictions for the corresponding systems. - model_pred_g = {kk: [vv[idx] for idx in global_sampled_idx[kk]] for kk, vv in model_pred.items()} if model_pred else None - model_pred_a = {kk: [vv[idx] for idx in atomic_sampled_idx[kk]] for kk, vv in model_pred.items()} if model_pred else None + model_pred_g = ( + { + kk: [vv[idx] for idx in global_sampled_idx[kk]] + for kk, vv in model_pred.items() + } + if model_pred + else None + ) + model_pred_a = ( + { + kk: [vv[idx] for idx in atomic_sampled_idx[kk]] + for kk, vv in model_pred.items() + } + if model_pred + else None + ) # concat all frames within those systmes model_pred_g = ( @@ -379,10 +405,18 @@ def compute_output_stats_global( for kk in keys } # shape: (nframes, ndim) - merged_output = {kk: to_numpy_array(torch.cat(outputs[kk])) for kk in keys if len(outputs[kk])>0} + merged_output = { + kk: to_numpy_array(torch.cat(outputs[kk])) + for kk in keys + if len(outputs[kk]) > 0 + } # shape: (nframes, ntypes) - merged_natoms = {kk: to_numpy_array(torch.cat(input_natoms[kk])[:, 2:]) for kk in keys if len(input_natoms[kk])>0} + merged_natoms = { + kk: to_numpy_array(torch.cat(input_natoms[kk])[:, 2:]) + for kk in keys + if len(input_natoms[kk]) > 0 + } nf = {kk: merged_natoms[kk].shape[0] for kk in keys} if preset_bias is not None: From 74b37957f4f5949103282b95c6227cd198b0a68c Mon Sep 17 00:00:00 2001 From: anyangml Date: Mon, 8 Apr 2024 15:27:31 +0000 Subject: [PATCH 10/27] fix: keys --- deepmd/pt/utils/stat.py | 61 +++++++++++------------ source/tests/pt/model/test_atomic_bias.py | 2 +- 2 files changed, 31 insertions(+), 32 deletions(-) diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 279839c86c..442d2a48b0 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -259,10 +259,9 @@ def compute_output_stats( # remove the keys that are not in the sample keys = [keys] if isinstance(keys, str) else keys assert isinstance(keys, list) - new_keys = [ii for ii in keys if ii in sampled[0].keys()] + new_keys = [ii for ii in keys if (ii in sampled[0].keys()) or ("atom_"+ii in sampled[0].keys())] del keys keys = new_keys - # split system based on label atomic_sampled_idx = defaultdict(list) global_sampled_idx = defaultdict(list) @@ -417,8 +416,7 @@ def compute_output_stats_global( for kk in keys if len(input_natoms[kk]) > 0 } - - nf = {kk: merged_natoms[kk].shape[0] for kk in keys} + nf = {kk: merged_natoms[kk].shape[0] for kk in keys if kk in merged_natoms} if preset_bias is not None: assigned_atom_ener = { kk: _make_preset_out_bias(ntypes, preset_bias[kk]) @@ -455,30 +453,30 @@ def compute_output_stats_global( continue bias_atom_e, std_atom_e = _post_process_stat(bias_atom_e, std_atom_e) - # unbias_e is only used for print rmse - if model_pred is None: - unbias_e = { - kk: merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1) for kk in keys - } - else: - unbias_e = { - kk: model_pred[kk].reshape(nf[kk], -1) - + merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1) - for kk in keys - } - atom_numbs = {kk: merged_natoms[kk].sum(-1) for kk in keys} - - def rmse(x): - return np.sqrt(np.mean(np.square(x))) - - for kk in keys: - rmse_ae = rmse( - (unbias_e[kk].reshape(nf[kk], -1) - merged_output[kk].reshape(nf[kk], -1)) - / atom_numbs[kk][:, None] - ) - log.info( - f"RMSE of {kk} per atom after linear regression is: {rmse_ae} in the unit of {kk}." - ) + # # unbias_e is only used for print rmse + # if model_pred is None: + # unbias_e = { + # kk: merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1) for kk in keys + # } + # else: + # unbias_e = { + # kk: model_pred[kk].reshape(nf[kk], -1) + # + merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1) + # for kk in keys + # } + # atom_numbs = {kk: merged_natoms[kk].sum(-1) for kk in keys} + + # def rmse(x): + # return np.sqrt(np.mean(np.square(x))) + + # for kk in keys: + # rmse_ae = rmse( + # (unbias_e[kk].reshape(nf[kk], -1) - merged_output[kk].reshape(nf[kk], -1)) + # / atom_numbs[kk][:, None] + # ) + # log.info( + # f"RMSE of {kk} per atom after linear regression is: {rmse_ae} in the unit of {kk}." + # ) return bias_atom_e, std_atom_e @@ -491,9 +489,9 @@ def compute_output_stats_atomic( # get label dict from sample; for each key, only picking the system with atomic labels. outputs = { kk: [ - system[kk] + system["atom_" + kk] for system in sampled - if kk in system and system.get(f"find_atom_{kk}", 0) > 0 + if ("atom_"+kk) in system and system.get(f"find_atom_{kk}", 0) > 0 ] for kk in keys } @@ -501,7 +499,7 @@ def compute_output_stats_atomic( kk: [ system["atype"] for system in sampled - if kk in system and system.get(f"find_atom_{kk}", 0) > 0 + if ("atom_"+kk) in system and system.get(f"find_atom_{kk}", 0) > 0 ] for kk in keys } @@ -526,6 +524,7 @@ def compute_output_stats_atomic( bias_atom_e = {} std_atom_e = {} + # print(stats_input['dos'].shape, merged_natoms['dos'].shape) for kk in keys: if kk in stats_input: bias_atom_e[kk], std_atom_e[kk] = compute_stats_from_atomic( diff --git a/source/tests/pt/model/test_atomic_bias.py b/source/tests/pt/model/test_atomic_bias.py index 82f2bda8ec..6410ad2a84 100644 --- a/source/tests/pt/model/test_atomic_bias.py +++ b/source/tests/pt/model/test_atomic_bias.py @@ -126,7 +126,7 @@ def setUp(self): np.array([[3, 3, 2, 1], [3, 3, 1, 2]], dtype=np.int32) ), # bias of foo: 1, 3 - "foo": to_torch_tensor( + "atom_foo": to_torch_tensor( np.array([[5.0, 5.0, 5.0], [5.0, 6.0, 7.0]]).reshape(2, 3, 1) ), # bias of bar: [1, 5], [3, 2] From 1de9e715ccf722692b442a166130fac928221ccc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 Apr 2024 15:28:12 +0000 Subject: [PATCH 11/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deepmd/pt/utils/stat.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 442d2a48b0..413f1d8d23 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -259,7 +259,11 @@ def compute_output_stats( # remove the keys that are not in the sample keys = [keys] if isinstance(keys, str) else keys assert isinstance(keys, list) - new_keys = [ii for ii in keys if (ii in sampled[0].keys()) or ("atom_"+ii in sampled[0].keys())] + new_keys = [ + ii + for ii in keys + if (ii in sampled[0].keys()) or ("atom_" + ii in sampled[0].keys()) + ] del keys keys = new_keys # split system based on label @@ -491,7 +495,7 @@ def compute_output_stats_atomic( kk: [ system["atom_" + kk] for system in sampled - if ("atom_"+kk) in system and system.get(f"find_atom_{kk}", 0) > 0 + if ("atom_" + kk) in system and system.get(f"find_atom_{kk}", 0) > 0 ] for kk in keys } @@ -499,7 +503,7 @@ def compute_output_stats_atomic( kk: [ system["atype"] for system in sampled - if ("atom_"+kk) in system and system.get(f"find_atom_{kk}", 0) > 0 + if ("atom_" + kk) in system and system.get(f"find_atom_{kk}", 0) > 0 ] for kk in keys } From c0a14ea63ca9b1bb18f2939087c5fb8becd2a128 Mon Sep 17 00:00:00 2001 From: anyangml Date: Mon, 8 Apr 2024 15:39:23 +0000 Subject: [PATCH 12/27] chore: clean code --- deepmd/pt/utils/stat.py | 1 - source/tests/pt/test_training.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 413f1d8d23..9e6cbe4388 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -528,7 +528,6 @@ def compute_output_stats_atomic( bias_atom_e = {} std_atom_e = {} - # print(stats_input['dos'].shape, merged_natoms['dos'].shape) for kk in keys: if kk in stats_input: bias_atom_e[kk], std_atom_e[kk] = compute_stats_from_atomic( diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py index 1635ad56ea..3ce42a68af 100644 --- a/source/tests/pt/test_training.py +++ b/source/tests/pt/test_training.py @@ -96,7 +96,7 @@ def setUp(self): def tearDown(self) -> None: DPTrainTest.tearDown(self) - +@unittest.skip("something wrong with the data.") class TestDOSModelSeA(unittest.TestCase, DPTrainTest): def setUp(self): input_json = str(Path(__file__).parent / "dos/input.json") From 3851137f74d9d124477429b44c17b941d41804db Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 8 Apr 2024 15:40:09 +0000 Subject: [PATCH 13/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- source/tests/pt/test_training.py | 1 + 1 file changed, 1 insertion(+) diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py index 3ce42a68af..5a555212af 100644 --- a/source/tests/pt/test_training.py +++ b/source/tests/pt/test_training.py @@ -96,6 +96,7 @@ def setUp(self): def tearDown(self) -> None: DPTrainTest.tearDown(self) + @unittest.skip("something wrong with the data.") class TestDOSModelSeA(unittest.TestCase, DPTrainTest): def setUp(self): From 3be7e91d1223d9bfc6a6394d2ddd932902301fce Mon Sep 17 00:00:00 2001 From: anyangml Date: Mon, 8 Apr 2024 15:48:49 +0000 Subject: [PATCH 14/27] fix: UTs --- source/tests/pt/test_training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py index 5a555212af..f0a988607e 100644 --- a/source/tests/pt/test_training.py +++ b/source/tests/pt/test_training.py @@ -97,7 +97,6 @@ def tearDown(self) -> None: DPTrainTest.tearDown(self) -@unittest.skip("something wrong with the data.") class TestDOSModelSeA(unittest.TestCase, DPTrainTest): def setUp(self): input_json = str(Path(__file__).parent / "dos/input.json") @@ -107,6 +106,7 @@ def setUp(self): self.config["training"]["training_data"]["systems"] = data_file self.config["training"]["validation_data"]["systems"] = data_file self.config["model"] = deepcopy(model_dos) + self.config["model"]["type_map"] = ["H"] self.config["training"]["numb_steps"] = 1 self.config["training"]["save_freq"] = 1 self.not_all_grad = True From b99afa0f40966536ea5d326776117ced3f771cae Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Tue, 9 Apr 2024 12:43:59 +0800 Subject: [PATCH 15/27] feat: add UT missing atype --- deepmd/pt/utils/stat.py | 90 +++++++++---- source/tests/pt/model/test_atomic_bias.py | 152 +++++++++++++++++++++- 2 files changed, 212 insertions(+), 30 deletions(-) diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 9e6cbe4388..0e72d2239f 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -206,6 +206,28 @@ def _make_preset_out_bias( return np.array(nbias) +def _fill_stat_with_global( + atomic_stat: Union[np.ndarray, None], + global_stat: np.ndarray, + ): + """This function is used to fill atomic stat with global stat. + + Parameters + ---------- + atomic_stat : Union[np.ndarray, None] + The atomic stat. + global_stat : np.ndarray + The global stat. + + if the atomic stat is None, use global stat. + if the atomic stat is not None, but has nan values (missing atypes), fill with global stat. + """ + if atomic_stat is None: + return global_stat + else: + return np.nan_to_num(np.where(np.isnan(atomic_stat) & ~np.isnan(global_stat), global_stat, atomic_stat)) + + def compute_output_stats( merged: Union[Callable[[], List[dict]], List[dict]], ntypes: int, @@ -352,16 +374,20 @@ def compute_output_stats( # merge global/atomic bias bias_atom_e, std_atom_e = {}, {} for kk in keys: + # use atomic bias whenever available if kk in bias_atom_a: bias_atom_e[kk] = bias_atom_a[kk] std_atom_e[kk] = std_atom_a[kk] - elif kk in bias_atom_g: - bias_atom_e[kk] = bias_atom_g[kk] - std_atom_e[kk] = std_atom_g[kk] else: bias_atom_e[kk] = None std_atom_e[kk] = None - + # use global bias to fill missing atomic bias + if kk in bias_atom_g: + bias_atom_e[kk] = _fill_stat_with_global(bias_atom_e[kk], bias_atom_g[kk]) + std_atom_e[kk] = _fill_stat_with_global(std_atom_e[kk], std_atom_g[kk]) + else: + raise RuntimeError("Fail to compute stat.") + if stat_file_path is not None: _save_to_file(stat_file_path, bias_atom_e, std_atom_e) @@ -457,30 +483,31 @@ def compute_output_stats_global( continue bias_atom_e, std_atom_e = _post_process_stat(bias_atom_e, std_atom_e) - # # unbias_e is only used for print rmse - # if model_pred is None: - # unbias_e = { - # kk: merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1) for kk in keys - # } - # else: - # unbias_e = { - # kk: model_pred[kk].reshape(nf[kk], -1) - # + merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1) - # for kk in keys - # } - # atom_numbs = {kk: merged_natoms[kk].sum(-1) for kk in keys} - - # def rmse(x): - # return np.sqrt(np.mean(np.square(x))) - - # for kk in keys: - # rmse_ae = rmse( - # (unbias_e[kk].reshape(nf[kk], -1) - merged_output[kk].reshape(nf[kk], -1)) - # / atom_numbs[kk][:, None] - # ) - # log.info( - # f"RMSE of {kk} per atom after linear regression is: {rmse_ae} in the unit of {kk}." - # ) + # unbias_e is only used for print rmse + + if model_pred is None: + unbias_e = { + kk: merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1) for kk in keys + } + else: + unbias_e = { + kk: model_pred[kk].reshape(nf[kk], -1) + + merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1) + for kk in keys + } + atom_numbs = {kk: merged_natoms[kk].sum(-1) for kk in keys} + + def rmse(x): + return np.sqrt(np.mean(np.square(x))) + + for kk in keys: + rmse_ae = rmse( + (unbias_e[kk].reshape(nf[kk], -1) - merged_output[kk].reshape(nf[kk], -1)) + / atom_numbs[kk][:, None] + ) + log.info( + f"RMSE of {kk} per atom after linear regression is: {rmse_ae} in the unit of {kk}." + ) return bias_atom_e, std_atom_e @@ -534,6 +561,13 @@ def compute_output_stats_atomic( stats_input[kk], merged_natoms[kk], ) + # correction for missing types + missing_types = ntypes - merged_natoms[kk].max() - 1 + if missing_types > 0: + nan_padding = np.empty((missing_types, bias_atom_e[kk].shape[1])) + nan_padding.fill(np.nan) + bias_atom_e[kk] = np.concatenate([bias_atom_e[kk], nan_padding],axis=0) + std_atom_e[kk] = np.concatenate([bias_atom_e[kk], nan_padding],axis=0) else: # this key does not have atomic labels, skip it. continue diff --git a/source/tests/pt/model/test_atomic_bias.py b/source/tests/pt/model/test_atomic_bias.py index 6410ad2a84..bd4f2d5cd6 100644 --- a/source/tests/pt/model/test_atomic_bias.py +++ b/source/tests/pt/model/test_atomic_bias.py @@ -111,7 +111,6 @@ def tearDown(self): def setUp(self): TestCaseSingleFrameWithNlist.setUp(self) - nf, nloc, nnei = self.nlist.shape self.merged_output_stat = [ { "coord": to_torch_tensor(np.zeros([2, 3, 3])), @@ -125,7 +124,7 @@ def setUp(self): "natoms": to_torch_tensor( np.array([[3, 3, 2, 1], [3, 3, 1, 2]], dtype=np.int32) ), - # bias of foo: 1, 3 + # bias of foo: 5, 6 "atom_foo": to_torch_tensor( np.array([[5.0, 5.0, 5.0], [5.0, 6.0, 7.0]]).reshape(2, 3, 1) ), @@ -255,3 +254,152 @@ def raise_error(): ).reshape(2, 3, 1) for kk in ["foo"]: np.testing.assert_almost_equal(ret3[kk], expected_ret3[kk], decimal=4) + assert False +class TestAtomicModelStatMergeGlobalAtomic(unittest.TestCase, TestCaseSingleFrameWithNlist): + def tearDown(self): + self.tempdir.cleanup() + + def setUp(self): + TestCaseSingleFrameWithNlist.setUp(self) + self.merged_output_stat = [ + { + "coord": to_torch_tensor(np.zeros([2, 3, 3])), + "atype": to_torch_tensor( + np.array([[0, 0, 0], [0, 0, 0]], dtype=np.int32) + ), + "atype_ext": to_torch_tensor( + np.array([[0, 0, 1, 0], [0, 1, 1, 0]], dtype=np.int32) + ), + "box": to_torch_tensor(np.zeros([2, 3, 3])), + "natoms": to_torch_tensor( + np.array([[3, 3, 2, 1], [3, 3, 1, 2]], dtype=np.int32) + ), + # bias of foo: 5.5, nan + "atom_foo": to_torch_tensor( + np.array([[5.0, 5.0, 5.0], [5.0, 6.0, 7.0]]).reshape(2, 3, 1) + ), + # bias of bar: [1, 5], [3, 2] + "bar": to_torch_tensor( + np.array([5.0, 12.0, 7.0, 9.0]).reshape(2, 1, 2) + ), + "find_atom_foo": np.float32(1.0), + "find_bar": np.float32(1.0), + }, + { + "coord": to_torch_tensor(np.zeros([2, 3, 3])), + "atype": to_torch_tensor( + np.array([[0, 0, 1], [0, 1, 1]], dtype=np.int32) + ), + "atype_ext": to_torch_tensor( + np.array([[0, 0, 1, 0], [0, 1, 1, 0]], dtype=np.int32) + ), + "box": to_torch_tensor(np.zeros([2, 3, 3])), + "natoms": to_torch_tensor( + np.array([[3, 3, 2, 1], [3, 3, 1, 2]], dtype=np.int32) + ), + # bias of foo: 5.5, 3 from atomic label. + "foo": to_torch_tensor(np.array([5.0, 7.0]).reshape(2, 1)), + # bias of bar: [1, 5], [3, 2] + "bar": to_torch_tensor( + np.array([5.0, 12.0, 7.0, 9.0]).reshape(2, 1, 2) + ), + "find_foo": np.float32(1.0), + "find_bar": np.float32(1.0), + }, + ] + self.tempdir = tempfile.TemporaryDirectory() + h5file = str((Path(self.tempdir.name) / "testcase.h5").resolve()) + with h5py.File(h5file, "w") as f: + pass + self.stat_file_path = DPPath(h5file, "a") + + def test_output_stat(self): + nf, nloc, nnei = self.nlist.shape + ds = DescrptDPA1( + self.rcut, + self.rcut_smth, + sum(self.sel), + self.nt, + ).to(env.DEVICE) + ft = FooFitting().to(env.DEVICE) + type_map = ["foo", "bar"] + md0 = DPAtomicModel( + ds, + ft, + type_map=type_map, + ).to(env.DEVICE) + args = [ + to_torch_tensor(ii) for ii in [self.coord_ext, self.atype_ext, self.nlist] + ] + # nf x nloc + at = self.atype_ext[:, :nloc] + + def cvt_ret(x): + return {kk: to_numpy_array(vv) for kk, vv in x.items()} + + # 1. test run without bias + # nf x na x odim + ret0 = md0.forward_common_atomic(*args) + ret0 = cvt_ret(ret0) + expected_ret0 = {} + expected_ret0["foo"] = np.array( + [ + [1.0, 2.0, 3.0], + [4.0, 5.0, 6.0], + ] + ).reshape([nf, nloc, *md0.fitting_output_def()["foo"].shape]) + expected_ret0["bar"] = np.array( + [ + [1.0, 2.0, 3.0, 7.0, 8.0, 9.0], + [4.0, 5.0, 6.0, 10.0, 11.0, 12.0], + ] + ).reshape([nf, nloc, *md0.fitting_output_def()["bar"].shape]) + for kk in ["foo", "bar"]: + np.testing.assert_almost_equal(ret0[kk], expected_ret0[kk]) + + # 2. test bias is applied + md0.compute_or_load_out_stat( + self.merged_output_stat, stat_file_path=self.stat_file_path + ) + ret1 = md0.forward_common_atomic(*args) + ret1 = cvt_ret(ret1) + # nt x odim + foo_bias = np.array([5.5, 3.0]).reshape(2, 1) + bar_bias = np.array([1.0, 5.0, 3.0, 2.0]).reshape(2, 1, 2) + expected_ret1 = {} + expected_ret1["foo"] = ret0["foo"] + foo_bias[at] + expected_ret1["bar"] = ret0["bar"] + bar_bias[at] + for kk in ["foo", "bar"]: + np.testing.assert_almost_equal(ret1[kk], expected_ret1[kk]) + + # 3. test bias load from file + def raise_error(): + raise RuntimeError + + md0.compute_or_load_out_stat(raise_error, stat_file_path=self.stat_file_path) + ret2 = md0.forward_common_atomic(*args) + ret2 = cvt_ret(ret2) + for kk in ["foo", "bar"]: + np.testing.assert_almost_equal(ret1[kk], ret2[kk]) + + # 4. test change bias + BaseAtomicModel.change_out_bias( + md0, self.merged_output_stat, bias_adjust_mode="change-by-statistic" + ) + args = [ + to_torch_tensor(ii) + for ii in [ + self.coord_ext, + to_numpy_array(self.merged_output_stat[0]["atype_ext"]), + self.nlist, + ] + ] + ret3 = md0.forward_common_atomic(*args) + ret3 = cvt_ret(ret3) + expected_ret3 = {} + # new bias [2, -5] + expected_ret3["foo"] = np.array( + [[3, 4, -2], [6, 0, 1]] + ).reshape(2, 3, 1) + for kk in ["foo"]: + np.testing.assert_almost_equal(ret3[kk], expected_ret3[kk], decimal=4) From bb475413920e970f4cca82a20bc968db8e2953e1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 04:45:14 +0000 Subject: [PATCH 16/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deepmd/pt/utils/stat.py | 23 ++++++++++++++--------- source/tests/pt/model/test_atomic_bias.py | 10 ++++++---- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 0e72d2239f..e7752c47ca 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -209,23 +209,26 @@ def _make_preset_out_bias( def _fill_stat_with_global( atomic_stat: Union[np.ndarray, None], global_stat: np.ndarray, - ): +): """This function is used to fill atomic stat with global stat. - + Parameters ---------- atomic_stat : Union[np.ndarray, None] The atomic stat. global_stat : np.ndarray The global stat. - - if the atomic stat is None, use global stat. + if the atomic stat is None, use global stat. if the atomic stat is not None, but has nan values (missing atypes), fill with global stat. """ if atomic_stat is None: return global_stat else: - return np.nan_to_num(np.where(np.isnan(atomic_stat) & ~np.isnan(global_stat), global_stat, atomic_stat)) + return np.nan_to_num( + np.where( + np.isnan(atomic_stat) & ~np.isnan(global_stat), global_stat, atomic_stat + ) + ) def compute_output_stats( @@ -383,11 +386,13 @@ def compute_output_stats( std_atom_e[kk] = None # use global bias to fill missing atomic bias if kk in bias_atom_g: - bias_atom_e[kk] = _fill_stat_with_global(bias_atom_e[kk], bias_atom_g[kk]) + bias_atom_e[kk] = _fill_stat_with_global( + bias_atom_e[kk], bias_atom_g[kk] + ) std_atom_e[kk] = _fill_stat_with_global(std_atom_e[kk], std_atom_g[kk]) else: raise RuntimeError("Fail to compute stat.") - + if stat_file_path is not None: _save_to_file(stat_file_path, bias_atom_e, std_atom_e) @@ -566,8 +571,8 @@ def compute_output_stats_atomic( if missing_types > 0: nan_padding = np.empty((missing_types, bias_atom_e[kk].shape[1])) nan_padding.fill(np.nan) - bias_atom_e[kk] = np.concatenate([bias_atom_e[kk], nan_padding],axis=0) - std_atom_e[kk] = np.concatenate([bias_atom_e[kk], nan_padding],axis=0) + bias_atom_e[kk] = np.concatenate([bias_atom_e[kk], nan_padding], axis=0) + std_atom_e[kk] = np.concatenate([bias_atom_e[kk], nan_padding], axis=0) else: # this key does not have atomic labels, skip it. continue diff --git a/source/tests/pt/model/test_atomic_bias.py b/source/tests/pt/model/test_atomic_bias.py index bd4f2d5cd6..dc0c55eb53 100644 --- a/source/tests/pt/model/test_atomic_bias.py +++ b/source/tests/pt/model/test_atomic_bias.py @@ -255,7 +255,11 @@ def raise_error(): for kk in ["foo"]: np.testing.assert_almost_equal(ret3[kk], expected_ret3[kk], decimal=4) assert False -class TestAtomicModelStatMergeGlobalAtomic(unittest.TestCase, TestCaseSingleFrameWithNlist): + + +class TestAtomicModelStatMergeGlobalAtomic( + unittest.TestCase, TestCaseSingleFrameWithNlist +): def tearDown(self): self.tempdir.cleanup() @@ -398,8 +402,6 @@ def raise_error(): ret3 = cvt_ret(ret3) expected_ret3 = {} # new bias [2, -5] - expected_ret3["foo"] = np.array( - [[3, 4, -2], [6, 0, 1]] - ).reshape(2, 3, 1) + expected_ret3["foo"] = np.array([[3, 4, -2], [6, 0, 1]]).reshape(2, 3, 1) for kk in ["foo"]: np.testing.assert_almost_equal(ret3[kk], expected_ret3[kk], decimal=4) From f504f07ebb06e240866d258e5423e57e1878eb54 Mon Sep 17 00:00:00 2001 From: anyangml Date: Tue, 9 Apr 2024 05:33:41 +0000 Subject: [PATCH 17/27] fix: UTs --- deepmd/pt/utils/stat.py | 11 +- source/tests/pt/model/test_atomic_bias.py | 1 - source/tests/pt/test_training.py | 584 +++++++++++----------- 3 files changed, 297 insertions(+), 299 deletions(-) diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index e7752c47ca..8e3025644c 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -390,7 +390,7 @@ def compute_output_stats( bias_atom_e[kk], bias_atom_g[kk] ) std_atom_e[kk] = _fill_stat_with_global(std_atom_e[kk], std_atom_g[kk]) - else: + if (bias_atom_e[kk] is None) or (std_atom_e[kk] is None): raise RuntimeError("Fail to compute stat.") if stat_file_path is not None: @@ -492,20 +492,20 @@ def compute_output_stats_global( if model_pred is None: unbias_e = { - kk: merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1) for kk in keys + kk: merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1) for kk in bias_atom_e.keys() } else: unbias_e = { kk: model_pred[kk].reshape(nf[kk], -1) + merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1) - for kk in keys + for kk in bias_atom_e.keys() } - atom_numbs = {kk: merged_natoms[kk].sum(-1) for kk in keys} + atom_numbs = {kk: merged_natoms[kk].sum(-1) for kk in bias_atom_e.keys()} def rmse(x): return np.sqrt(np.mean(np.square(x))) - for kk in keys: + for kk in bias_atom_e.keys(): rmse_ae = rmse( (unbias_e[kk].reshape(nf[kk], -1) - merged_output[kk].reshape(nf[kk], -1)) / atom_numbs[kk][:, None] @@ -576,6 +576,5 @@ def compute_output_stats_atomic( else: # this key does not have atomic labels, skip it. continue - bias_atom_e, std_atom_e = _post_process_stat(bias_atom_e, std_atom_e) return bias_atom_e, std_atom_e diff --git a/source/tests/pt/model/test_atomic_bias.py b/source/tests/pt/model/test_atomic_bias.py index dc0c55eb53..8f365a09fe 100644 --- a/source/tests/pt/model/test_atomic_bias.py +++ b/source/tests/pt/model/test_atomic_bias.py @@ -254,7 +254,6 @@ def raise_error(): ).reshape(2, 3, 1) for kk in ["foo"]: np.testing.assert_almost_equal(ret3[kk], expected_ret3[kk], decimal=4) - assert False class TestAtomicModelStatMergeGlobalAtomic( diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py index f0a988607e..dd505a6559 100644 --- a/source/tests/pt/test_training.py +++ b/source/tests/pt/test_training.py @@ -81,20 +81,20 @@ def tearDown(self): shutil.rmtree(f) -class TestEnergyModelSeA(unittest.TestCase, DPTrainTest): - def setUp(self): - input_json = str(Path(__file__).parent / "water/se_atten.json") - with open(input_json) as f: - self.config = json.load(f) - data_file = [str(Path(__file__).parent / "water/data/data_0")] - self.config["training"]["training_data"]["systems"] = data_file - self.config["training"]["validation_data"]["systems"] = data_file - self.config["model"] = deepcopy(model_se_e2_a) - self.config["training"]["numb_steps"] = 1 - self.config["training"]["save_freq"] = 1 - - def tearDown(self) -> None: - DPTrainTest.tearDown(self) +# class TestEnergyModelSeA(unittest.TestCase, DPTrainTest): +# def setUp(self): +# input_json = str(Path(__file__).parent / "water/se_atten.json") +# with open(input_json) as f: +# self.config = json.load(f) +# data_file = [str(Path(__file__).parent / "water/data/data_0")] +# self.config["training"]["training_data"]["systems"] = data_file +# self.config["training"]["validation_data"]["systems"] = data_file +# self.config["model"] = deepcopy(model_se_e2_a) +# self.config["training"]["numb_steps"] = 1 +# self.config["training"]["save_freq"] = 1 + +# def tearDown(self) -> None: +# DPTrainTest.tearDown(self) class TestDOSModelSeA(unittest.TestCase, DPTrainTest): @@ -115,281 +115,281 @@ def tearDown(self) -> None: DPTrainTest.tearDown(self) -class TestEnergyZBLModelSeA(unittest.TestCase, DPTrainTest): - def setUp(self): - input_json = str(Path(__file__).parent / "water/zbl.json") - with open(input_json) as f: - self.config = json.load(f) - data_file = [str(Path(__file__).parent / "water/data/data_0")] - self.config["training"]["training_data"]["systems"] = data_file - self.config["training"]["validation_data"]["systems"] = data_file - self.config["model"] = deepcopy(model_zbl) - self.config["training"]["numb_steps"] = 1 - self.config["training"]["save_freq"] = 1 - - def tearDown(self) -> None: - DPTrainTest.tearDown(self) - - -class TestFparam(unittest.TestCase, DPTrainTest): - """Test if `fparam` can be loaded correctly.""" - - def setUp(self): - input_json = str(Path(__file__).parent / "water/se_atten.json") - with open(input_json) as f: - self.config = json.load(f) - data_file = [str(Path(__file__).parent / "water/data/data_0")] - self.config["training"]["training_data"]["systems"] = data_file - self.config["training"]["validation_data"]["systems"] = data_file - self.config["model"] = deepcopy(model_se_e2_a) - self.config["model"]["fitting_net"]["numb_fparam"] = 1 - self.config["training"]["numb_steps"] = 1 - self.config["training"]["save_freq"] = 1 - self.set_path = Path(__file__).parent / "water/data/data_0" / "set.000" - shutil.copyfile(self.set_path / "energy.npy", self.set_path / "fparam.npy") - - def tearDown(self) -> None: - (self.set_path / "fparam.npy").unlink(missing_ok=True) - DPTrainTest.tearDown(self) - - -class TestEnergyModelDPA1(unittest.TestCase, DPTrainTest): - def setUp(self): - input_json = str(Path(__file__).parent / "water/se_atten.json") - with open(input_json) as f: - self.config = json.load(f) - data_file = [str(Path(__file__).parent / "water/data/data_0")] - self.config["training"]["training_data"]["systems"] = data_file - self.config["training"]["validation_data"]["systems"] = data_file - self.config["model"] = deepcopy(model_dpa1) - self.config["training"]["numb_steps"] = 1 - self.config["training"]["save_freq"] = 1 - - def tearDown(self) -> None: - DPTrainTest.tearDown(self) - - -class TestEnergyModelDPA2(unittest.TestCase, DPTrainTest): - def setUp(self): - input_json = str(Path(__file__).parent / "water/se_atten.json") - with open(input_json) as f: - self.config = json.load(f) - data_file = [str(Path(__file__).parent / "water/data/data_0")] - self.config["training"]["training_data"]["systems"] = data_file - self.config["training"]["validation_data"]["systems"] = data_file - self.config["model"] = deepcopy(model_dpa2) - self.config["training"]["numb_steps"] = 1 - self.config["training"]["save_freq"] = 1 - - def tearDown(self) -> None: - DPTrainTest.tearDown(self) - - -@unittest.skip("hybrid not supported at the moment") -class TestEnergyModelHybrid(unittest.TestCase, DPTrainTest): - def setUp(self): - input_json = str(Path(__file__).parent / "water/se_atten.json") - with open(input_json) as f: - self.config = json.load(f) - data_file = [str(Path(__file__).parent / "water/data/data_0")] - self.config["training"]["training_data"]["systems"] = data_file - self.config["training"]["validation_data"]["systems"] = data_file - self.config["model"] = deepcopy(model_hybrid) - self.config["training"]["numb_steps"] = 1 - self.config["training"]["save_freq"] = 1 - - def tearDown(self) -> None: - DPTrainTest.tearDown(self) - - -class TestDipoleModelSeA(unittest.TestCase, DPTrainTest): - def setUp(self): - input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") - with open(input_json) as f: - self.config = json.load(f) - data_file_atomic = str( - Path(__file__).parent / "water_tensor/dipole/atomic_system" - ) - data_file_global = str( - Path(__file__).parent / "water_tensor/dipole/global_system" - ) - self.config["training"]["training_data"]["systems"] = [ - data_file_atomic, - data_file_global, - ] - self.config["training"]["validation_data"]["systems"] = [ - data_file_atomic, - data_file_global, - ] - self.config["model"] = deepcopy(model_se_e2_a) - self.config["model"]["atom_exclude_types"] = [1] - self.config["model"]["fitting_net"]["type"] = "dipole" - self.config["training"]["numb_steps"] = 1 - self.config["training"]["save_freq"] = 1 - - def tearDown(self) -> None: - DPTrainTest.tearDown(self) - - -class TestDipoleModelDPA1(unittest.TestCase, DPTrainTest): - def setUp(self): - input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") - with open(input_json) as f: - self.config = json.load(f) - data_file_atomic = str( - Path(__file__).parent / "water_tensor/dipole/atomic_system" - ) - data_file_global = str( - Path(__file__).parent / "water_tensor/dipole/global_system" - ) - self.config["training"]["training_data"]["systems"] = [ - data_file_atomic, - data_file_global, - ] - self.config["training"]["validation_data"]["systems"] = [ - data_file_atomic, - data_file_global, - ] - self.config["model"] = deepcopy(model_dpa1) - self.config["model"]["atom_exclude_types"] = [1] - self.config["model"]["fitting_net"]["type"] = "dipole" - self.config["training"]["numb_steps"] = 1 - self.config["training"]["save_freq"] = 1 - - def tearDown(self) -> None: - DPTrainTest.tearDown(self) - - -class TestDipoleModelDPA2(unittest.TestCase, DPTrainTest): - def setUp(self): - input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") - with open(input_json) as f: - self.config = json.load(f) - data_file_atomic = str( - Path(__file__).parent / "water_tensor/dipole/atomic_system" - ) - data_file_global = str( - Path(__file__).parent / "water_tensor/dipole/global_system" - ) - self.config["training"]["training_data"]["systems"] = [ - data_file_atomic, - data_file_global, - ] - self.config["training"]["validation_data"]["systems"] = [ - data_file_atomic, - data_file_global, - ] - self.config["model"] = deepcopy(model_dpa2) - self.config["model"]["atom_exclude_types"] = [1] - self.config["model"]["fitting_net"]["type"] = "dipole" - self.config["training"]["numb_steps"] = 1 - self.config["training"]["save_freq"] = 1 - - def tearDown(self) -> None: - DPTrainTest.tearDown(self) - - -class TestPolarModelSeA(unittest.TestCase, DPTrainTest): - def setUp(self): - input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") - with open(input_json) as f: - self.config = json.load(f) - data_file_atomic = str( - Path(__file__).parent / "water_tensor/polar/atomic_system" - ) - data_file_global = str( - Path(__file__).parent / "water_tensor/polar/global_system" - ) - self.config["training"]["training_data"]["systems"] = [ - data_file_atomic, - data_file_global, - ] - self.config["training"]["validation_data"]["systems"] = [ - data_file_atomic, - data_file_global, - ] - self.config["model"] = deepcopy(model_se_e2_a) - self.config["model"]["atom_exclude_types"] = [1] - self.config["model"]["fitting_net"]["type"] = "polar" - self.config["model"]["fitting_net"]["fit_diag"] = False - self.config["model"]["fitting_net"]["shift_diag"] = False - self.config["training"]["numb_steps"] = 1 - self.config["training"]["save_freq"] = 1 - # can not set requires_grad false for all parameters, - # because the input coord has no grad, thus the loss if all set to false - self.not_all_grad = True - - def tearDown(self) -> None: - DPTrainTest.tearDown(self) - - -class TestPolarModelDPA1(unittest.TestCase, DPTrainTest): - def setUp(self): - input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") - with open(input_json) as f: - self.config = json.load(f) - data_file_atomic = str( - Path(__file__).parent / "water_tensor/polar/atomic_system" - ) - data_file_global = str( - Path(__file__).parent / "water_tensor/polar/global_system" - ) - self.config["training"]["training_data"]["systems"] = [ - data_file_atomic, - data_file_global, - ] - self.config["training"]["validation_data"]["systems"] = [ - data_file_atomic, - data_file_global, - ] - self.config["model"] = deepcopy(model_dpa1) - self.config["model"]["atom_exclude_types"] = [1] - self.config["model"]["fitting_net"]["type"] = "polar" - self.config["model"]["fitting_net"]["fit_diag"] = False - self.config["model"]["fitting_net"]["shift_diag"] = False - self.config["training"]["numb_steps"] = 1 - self.config["training"]["save_freq"] = 1 - # can not set requires_grad false for all parameters, - # because the input coord has no grad, thus the loss if all set to false - self.not_all_grad = True - - def tearDown(self) -> None: - DPTrainTest.tearDown(self) - - -class TestPolarModelDPA2(unittest.TestCase, DPTrainTest): - def setUp(self): - input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") - with open(input_json) as f: - self.config = json.load(f) - data_file_atomic = str( - Path(__file__).parent / "water_tensor/polar/atomic_system" - ) - data_file_global = str( - Path(__file__).parent / "water_tensor/polar/global_system" - ) - self.config["training"]["training_data"]["systems"] = [ - data_file_atomic, - data_file_global, - ] - self.config["training"]["validation_data"]["systems"] = [ - data_file_atomic, - data_file_global, - ] - self.config["model"] = deepcopy(model_dpa2) - self.config["model"]["atom_exclude_types"] = [1] - self.config["model"]["fitting_net"]["type"] = "polar" - self.config["model"]["fitting_net"]["fit_diag"] = False - self.config["model"]["fitting_net"]["shift_diag"] = False - self.config["training"]["numb_steps"] = 1 - self.config["training"]["save_freq"] = 1 - # can not set requires_grad false for all parameters, - # because the input coord has no grad, thus the loss if all set to false - self.not_all_grad = True - - def tearDown(self) -> None: - DPTrainTest.tearDown(self) - - -if __name__ == "__main__": - unittest.main() +# class TestEnergyZBLModelSeA(unittest.TestCase, DPTrainTest): +# def setUp(self): +# input_json = str(Path(__file__).parent / "water/zbl.json") +# with open(input_json) as f: +# self.config = json.load(f) +# data_file = [str(Path(__file__).parent / "water/data/data_0")] +# self.config["training"]["training_data"]["systems"] = data_file +# self.config["training"]["validation_data"]["systems"] = data_file +# self.config["model"] = deepcopy(model_zbl) +# self.config["training"]["numb_steps"] = 1 +# self.config["training"]["save_freq"] = 1 + +# def tearDown(self) -> None: +# DPTrainTest.tearDown(self) + + +# class TestFparam(unittest.TestCase, DPTrainTest): +# """Test if `fparam` can be loaded correctly.""" + +# def setUp(self): +# input_json = str(Path(__file__).parent / "water/se_atten.json") +# with open(input_json) as f: +# self.config = json.load(f) +# data_file = [str(Path(__file__).parent / "water/data/data_0")] +# self.config["training"]["training_data"]["systems"] = data_file +# self.config["training"]["validation_data"]["systems"] = data_file +# self.config["model"] = deepcopy(model_se_e2_a) +# self.config["model"]["fitting_net"]["numb_fparam"] = 1 +# self.config["training"]["numb_steps"] = 1 +# self.config["training"]["save_freq"] = 1 +# self.set_path = Path(__file__).parent / "water/data/data_0" / "set.000" +# shutil.copyfile(self.set_path / "energy.npy", self.set_path / "fparam.npy") + +# def tearDown(self) -> None: +# (self.set_path / "fparam.npy").unlink(missing_ok=True) +# DPTrainTest.tearDown(self) + + +# class TestEnergyModelDPA1(unittest.TestCase, DPTrainTest): +# def setUp(self): +# input_json = str(Path(__file__).parent / "water/se_atten.json") +# with open(input_json) as f: +# self.config = json.load(f) +# data_file = [str(Path(__file__).parent / "water/data/data_0")] +# self.config["training"]["training_data"]["systems"] = data_file +# self.config["training"]["validation_data"]["systems"] = data_file +# self.config["model"] = deepcopy(model_dpa1) +# self.config["training"]["numb_steps"] = 1 +# self.config["training"]["save_freq"] = 1 + +# def tearDown(self) -> None: +# DPTrainTest.tearDown(self) + + +# class TestEnergyModelDPA2(unittest.TestCase, DPTrainTest): +# def setUp(self): +# input_json = str(Path(__file__).parent / "water/se_atten.json") +# with open(input_json) as f: +# self.config = json.load(f) +# data_file = [str(Path(__file__).parent / "water/data/data_0")] +# self.config["training"]["training_data"]["systems"] = data_file +# self.config["training"]["validation_data"]["systems"] = data_file +# self.config["model"] = deepcopy(model_dpa2) +# self.config["training"]["numb_steps"] = 1 +# self.config["training"]["save_freq"] = 1 + +# def tearDown(self) -> None: +# DPTrainTest.tearDown(self) + + +# @unittest.skip("hybrid not supported at the moment") +# class TestEnergyModelHybrid(unittest.TestCase, DPTrainTest): +# def setUp(self): +# input_json = str(Path(__file__).parent / "water/se_atten.json") +# with open(input_json) as f: +# self.config = json.load(f) +# data_file = [str(Path(__file__).parent / "water/data/data_0")] +# self.config["training"]["training_data"]["systems"] = data_file +# self.config["training"]["validation_data"]["systems"] = data_file +# self.config["model"] = deepcopy(model_hybrid) +# self.config["training"]["numb_steps"] = 1 +# self.config["training"]["save_freq"] = 1 + +# def tearDown(self) -> None: +# DPTrainTest.tearDown(self) + + +# class TestDipoleModelSeA(unittest.TestCase, DPTrainTest): +# def setUp(self): +# input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") +# with open(input_json) as f: +# self.config = json.load(f) +# data_file_atomic = str( +# Path(__file__).parent / "water_tensor/dipole/atomic_system" +# ) +# data_file_global = str( +# Path(__file__).parent / "water_tensor/dipole/global_system" +# ) +# self.config["training"]["training_data"]["systems"] = [ +# data_file_atomic, +# data_file_global, +# ] +# self.config["training"]["validation_data"]["systems"] = [ +# data_file_atomic, +# data_file_global, +# ] +# self.config["model"] = deepcopy(model_se_e2_a) +# self.config["model"]["atom_exclude_types"] = [1] +# self.config["model"]["fitting_net"]["type"] = "dipole" +# self.config["training"]["numb_steps"] = 1 +# self.config["training"]["save_freq"] = 1 + +# def tearDown(self) -> None: +# DPTrainTest.tearDown(self) + + +# class TestDipoleModelDPA1(unittest.TestCase, DPTrainTest): +# def setUp(self): +# input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") +# with open(input_json) as f: +# self.config = json.load(f) +# data_file_atomic = str( +# Path(__file__).parent / "water_tensor/dipole/atomic_system" +# ) +# data_file_global = str( +# Path(__file__).parent / "water_tensor/dipole/global_system" +# ) +# self.config["training"]["training_data"]["systems"] = [ +# data_file_atomic, +# data_file_global, +# ] +# self.config["training"]["validation_data"]["systems"] = [ +# data_file_atomic, +# data_file_global, +# ] +# self.config["model"] = deepcopy(model_dpa1) +# self.config["model"]["atom_exclude_types"] = [1] +# self.config["model"]["fitting_net"]["type"] = "dipole" +# self.config["training"]["numb_steps"] = 1 +# self.config["training"]["save_freq"] = 1 + +# def tearDown(self) -> None: +# DPTrainTest.tearDown(self) + + +# class TestDipoleModelDPA2(unittest.TestCase, DPTrainTest): +# def setUp(self): +# input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") +# with open(input_json) as f: +# self.config = json.load(f) +# data_file_atomic = str( +# Path(__file__).parent / "water_tensor/dipole/atomic_system" +# ) +# data_file_global = str( +# Path(__file__).parent / "water_tensor/dipole/global_system" +# ) +# self.config["training"]["training_data"]["systems"] = [ +# data_file_atomic, +# data_file_global, +# ] +# self.config["training"]["validation_data"]["systems"] = [ +# data_file_atomic, +# data_file_global, +# ] +# self.config["model"] = deepcopy(model_dpa2) +# self.config["model"]["atom_exclude_types"] = [1] +# self.config["model"]["fitting_net"]["type"] = "dipole" +# self.config["training"]["numb_steps"] = 1 +# self.config["training"]["save_freq"] = 1 + +# def tearDown(self) -> None: +# DPTrainTest.tearDown(self) + + +# class TestPolarModelSeA(unittest.TestCase, DPTrainTest): +# def setUp(self): +# input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") +# with open(input_json) as f: +# self.config = json.load(f) +# data_file_atomic = str( +# Path(__file__).parent / "water_tensor/polar/atomic_system" +# ) +# data_file_global = str( +# Path(__file__).parent / "water_tensor/polar/global_system" +# ) +# self.config["training"]["training_data"]["systems"] = [ +# data_file_atomic, +# data_file_global, +# ] +# self.config["training"]["validation_data"]["systems"] = [ +# data_file_atomic, +# data_file_global, +# ] +# self.config["model"] = deepcopy(model_se_e2_a) +# self.config["model"]["atom_exclude_types"] = [1] +# self.config["model"]["fitting_net"]["type"] = "polar" +# self.config["model"]["fitting_net"]["fit_diag"] = False +# self.config["model"]["fitting_net"]["shift_diag"] = False +# self.config["training"]["numb_steps"] = 1 +# self.config["training"]["save_freq"] = 1 +# # can not set requires_grad false for all parameters, +# # because the input coord has no grad, thus the loss if all set to false +# self.not_all_grad = True + +# def tearDown(self) -> None: +# DPTrainTest.tearDown(self) + + +# class TestPolarModelDPA1(unittest.TestCase, DPTrainTest): +# def setUp(self): +# input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") +# with open(input_json) as f: +# self.config = json.load(f) +# data_file_atomic = str( +# Path(__file__).parent / "water_tensor/polar/atomic_system" +# ) +# data_file_global = str( +# Path(__file__).parent / "water_tensor/polar/global_system" +# ) +# self.config["training"]["training_data"]["systems"] = [ +# data_file_atomic, +# data_file_global, +# ] +# self.config["training"]["validation_data"]["systems"] = [ +# data_file_atomic, +# data_file_global, +# ] +# self.config["model"] = deepcopy(model_dpa1) +# self.config["model"]["atom_exclude_types"] = [1] +# self.config["model"]["fitting_net"]["type"] = "polar" +# self.config["model"]["fitting_net"]["fit_diag"] = False +# self.config["model"]["fitting_net"]["shift_diag"] = False +# self.config["training"]["numb_steps"] = 1 +# self.config["training"]["save_freq"] = 1 +# # can not set requires_grad false for all parameters, +# # because the input coord has no grad, thus the loss if all set to false +# self.not_all_grad = True + +# def tearDown(self) -> None: +# DPTrainTest.tearDown(self) + + +# class TestPolarModelDPA2(unittest.TestCase, DPTrainTest): +# def setUp(self): +# input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") +# with open(input_json) as f: +# self.config = json.load(f) +# data_file_atomic = str( +# Path(__file__).parent / "water_tensor/polar/atomic_system" +# ) +# data_file_global = str( +# Path(__file__).parent / "water_tensor/polar/global_system" +# ) +# self.config["training"]["training_data"]["systems"] = [ +# data_file_atomic, +# data_file_global, +# ] +# self.config["training"]["validation_data"]["systems"] = [ +# data_file_atomic, +# data_file_global, +# ] +# self.config["model"] = deepcopy(model_dpa2) +# self.config["model"]["atom_exclude_types"] = [1] +# self.config["model"]["fitting_net"]["type"] = "polar" +# self.config["model"]["fitting_net"]["fit_diag"] = False +# self.config["model"]["fitting_net"]["shift_diag"] = False +# self.config["training"]["numb_steps"] = 1 +# self.config["training"]["save_freq"] = 1 +# # can not set requires_grad false for all parameters, +# # because the input coord has no grad, thus the loss if all set to false +# self.not_all_grad = True + +# def tearDown(self) -> None: +# DPTrainTest.tearDown(self) + + +# if __name__ == "__main__": +# unittest.main() From 64427c1f8d6c360a359e876981cb98ecc0dc399d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 05:34:20 +0000 Subject: [PATCH 18/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deepmd/pt/utils/stat.py | 3 ++- source/tests/pt/test_training.py | 5 ----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 8e3025644c..73ee7468e5 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -492,7 +492,8 @@ def compute_output_stats_global( if model_pred is None: unbias_e = { - kk: merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1) for kk in bias_atom_e.keys() + kk: merged_natoms[kk] @ bias_atom_e[kk].reshape(ntypes, -1) + for kk in bias_atom_e.keys() } else: unbias_e = { diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py index dd505a6559..59fa1b49b8 100644 --- a/source/tests/pt/test_training.py +++ b/source/tests/pt/test_training.py @@ -18,11 +18,6 @@ from .model.test_permutation import ( model_dos, - model_dpa1, - model_dpa2, - model_hybrid, - model_se_e2_a, - model_zbl, ) From 48f10cb73161e5940d24f204be3c59f315094429 Mon Sep 17 00:00:00 2001 From: anyangml Date: Tue, 9 Apr 2024 05:34:30 +0000 Subject: [PATCH 19/27] fix: UTs --- source/tests/pt/test_training.py | 584 +++++++++++++++---------------- 1 file changed, 292 insertions(+), 292 deletions(-) diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py index 59fa1b49b8..ef18fbd360 100644 --- a/source/tests/pt/test_training.py +++ b/source/tests/pt/test_training.py @@ -76,20 +76,20 @@ def tearDown(self): shutil.rmtree(f) -# class TestEnergyModelSeA(unittest.TestCase, DPTrainTest): -# def setUp(self): -# input_json = str(Path(__file__).parent / "water/se_atten.json") -# with open(input_json) as f: -# self.config = json.load(f) -# data_file = [str(Path(__file__).parent / "water/data/data_0")] -# self.config["training"]["training_data"]["systems"] = data_file -# self.config["training"]["validation_data"]["systems"] = data_file -# self.config["model"] = deepcopy(model_se_e2_a) -# self.config["training"]["numb_steps"] = 1 -# self.config["training"]["save_freq"] = 1 - -# def tearDown(self) -> None: -# DPTrainTest.tearDown(self) +class TestEnergyModelSeA(unittest.TestCase, DPTrainTest): + def setUp(self): + input_json = str(Path(__file__).parent / "water/se_atten.json") + with open(input_json) as f: + self.config = json.load(f) + data_file = [str(Path(__file__).parent / "water/data/data_0")] + self.config["training"]["training_data"]["systems"] = data_file + self.config["training"]["validation_data"]["systems"] = data_file + self.config["model"] = deepcopy(model_se_e2_a) + self.config["training"]["numb_steps"] = 1 + self.config["training"]["save_freq"] = 1 + + def tearDown(self) -> None: + DPTrainTest.tearDown(self) class TestDOSModelSeA(unittest.TestCase, DPTrainTest): @@ -110,281 +110,281 @@ def tearDown(self) -> None: DPTrainTest.tearDown(self) -# class TestEnergyZBLModelSeA(unittest.TestCase, DPTrainTest): -# def setUp(self): -# input_json = str(Path(__file__).parent / "water/zbl.json") -# with open(input_json) as f: -# self.config = json.load(f) -# data_file = [str(Path(__file__).parent / "water/data/data_0")] -# self.config["training"]["training_data"]["systems"] = data_file -# self.config["training"]["validation_data"]["systems"] = data_file -# self.config["model"] = deepcopy(model_zbl) -# self.config["training"]["numb_steps"] = 1 -# self.config["training"]["save_freq"] = 1 - -# def tearDown(self) -> None: -# DPTrainTest.tearDown(self) - - -# class TestFparam(unittest.TestCase, DPTrainTest): -# """Test if `fparam` can be loaded correctly.""" - -# def setUp(self): -# input_json = str(Path(__file__).parent / "water/se_atten.json") -# with open(input_json) as f: -# self.config = json.load(f) -# data_file = [str(Path(__file__).parent / "water/data/data_0")] -# self.config["training"]["training_data"]["systems"] = data_file -# self.config["training"]["validation_data"]["systems"] = data_file -# self.config["model"] = deepcopy(model_se_e2_a) -# self.config["model"]["fitting_net"]["numb_fparam"] = 1 -# self.config["training"]["numb_steps"] = 1 -# self.config["training"]["save_freq"] = 1 -# self.set_path = Path(__file__).parent / "water/data/data_0" / "set.000" -# shutil.copyfile(self.set_path / "energy.npy", self.set_path / "fparam.npy") - -# def tearDown(self) -> None: -# (self.set_path / "fparam.npy").unlink(missing_ok=True) -# DPTrainTest.tearDown(self) - - -# class TestEnergyModelDPA1(unittest.TestCase, DPTrainTest): -# def setUp(self): -# input_json = str(Path(__file__).parent / "water/se_atten.json") -# with open(input_json) as f: -# self.config = json.load(f) -# data_file = [str(Path(__file__).parent / "water/data/data_0")] -# self.config["training"]["training_data"]["systems"] = data_file -# self.config["training"]["validation_data"]["systems"] = data_file -# self.config["model"] = deepcopy(model_dpa1) -# self.config["training"]["numb_steps"] = 1 -# self.config["training"]["save_freq"] = 1 - -# def tearDown(self) -> None: -# DPTrainTest.tearDown(self) - - -# class TestEnergyModelDPA2(unittest.TestCase, DPTrainTest): -# def setUp(self): -# input_json = str(Path(__file__).parent / "water/se_atten.json") -# with open(input_json) as f: -# self.config = json.load(f) -# data_file = [str(Path(__file__).parent / "water/data/data_0")] -# self.config["training"]["training_data"]["systems"] = data_file -# self.config["training"]["validation_data"]["systems"] = data_file -# self.config["model"] = deepcopy(model_dpa2) -# self.config["training"]["numb_steps"] = 1 -# self.config["training"]["save_freq"] = 1 - -# def tearDown(self) -> None: -# DPTrainTest.tearDown(self) - - -# @unittest.skip("hybrid not supported at the moment") -# class TestEnergyModelHybrid(unittest.TestCase, DPTrainTest): -# def setUp(self): -# input_json = str(Path(__file__).parent / "water/se_atten.json") -# with open(input_json) as f: -# self.config = json.load(f) -# data_file = [str(Path(__file__).parent / "water/data/data_0")] -# self.config["training"]["training_data"]["systems"] = data_file -# self.config["training"]["validation_data"]["systems"] = data_file -# self.config["model"] = deepcopy(model_hybrid) -# self.config["training"]["numb_steps"] = 1 -# self.config["training"]["save_freq"] = 1 - -# def tearDown(self) -> None: -# DPTrainTest.tearDown(self) - - -# class TestDipoleModelSeA(unittest.TestCase, DPTrainTest): -# def setUp(self): -# input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") -# with open(input_json) as f: -# self.config = json.load(f) -# data_file_atomic = str( -# Path(__file__).parent / "water_tensor/dipole/atomic_system" -# ) -# data_file_global = str( -# Path(__file__).parent / "water_tensor/dipole/global_system" -# ) -# self.config["training"]["training_data"]["systems"] = [ -# data_file_atomic, -# data_file_global, -# ] -# self.config["training"]["validation_data"]["systems"] = [ -# data_file_atomic, -# data_file_global, -# ] -# self.config["model"] = deepcopy(model_se_e2_a) -# self.config["model"]["atom_exclude_types"] = [1] -# self.config["model"]["fitting_net"]["type"] = "dipole" -# self.config["training"]["numb_steps"] = 1 -# self.config["training"]["save_freq"] = 1 - -# def tearDown(self) -> None: -# DPTrainTest.tearDown(self) - - -# class TestDipoleModelDPA1(unittest.TestCase, DPTrainTest): -# def setUp(self): -# input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") -# with open(input_json) as f: -# self.config = json.load(f) -# data_file_atomic = str( -# Path(__file__).parent / "water_tensor/dipole/atomic_system" -# ) -# data_file_global = str( -# Path(__file__).parent / "water_tensor/dipole/global_system" -# ) -# self.config["training"]["training_data"]["systems"] = [ -# data_file_atomic, -# data_file_global, -# ] -# self.config["training"]["validation_data"]["systems"] = [ -# data_file_atomic, -# data_file_global, -# ] -# self.config["model"] = deepcopy(model_dpa1) -# self.config["model"]["atom_exclude_types"] = [1] -# self.config["model"]["fitting_net"]["type"] = "dipole" -# self.config["training"]["numb_steps"] = 1 -# self.config["training"]["save_freq"] = 1 - -# def tearDown(self) -> None: -# DPTrainTest.tearDown(self) - - -# class TestDipoleModelDPA2(unittest.TestCase, DPTrainTest): -# def setUp(self): -# input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") -# with open(input_json) as f: -# self.config = json.load(f) -# data_file_atomic = str( -# Path(__file__).parent / "water_tensor/dipole/atomic_system" -# ) -# data_file_global = str( -# Path(__file__).parent / "water_tensor/dipole/global_system" -# ) -# self.config["training"]["training_data"]["systems"] = [ -# data_file_atomic, -# data_file_global, -# ] -# self.config["training"]["validation_data"]["systems"] = [ -# data_file_atomic, -# data_file_global, -# ] -# self.config["model"] = deepcopy(model_dpa2) -# self.config["model"]["atom_exclude_types"] = [1] -# self.config["model"]["fitting_net"]["type"] = "dipole" -# self.config["training"]["numb_steps"] = 1 -# self.config["training"]["save_freq"] = 1 - -# def tearDown(self) -> None: -# DPTrainTest.tearDown(self) - - -# class TestPolarModelSeA(unittest.TestCase, DPTrainTest): -# def setUp(self): -# input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") -# with open(input_json) as f: -# self.config = json.load(f) -# data_file_atomic = str( -# Path(__file__).parent / "water_tensor/polar/atomic_system" -# ) -# data_file_global = str( -# Path(__file__).parent / "water_tensor/polar/global_system" -# ) -# self.config["training"]["training_data"]["systems"] = [ -# data_file_atomic, -# data_file_global, -# ] -# self.config["training"]["validation_data"]["systems"] = [ -# data_file_atomic, -# data_file_global, -# ] -# self.config["model"] = deepcopy(model_se_e2_a) -# self.config["model"]["atom_exclude_types"] = [1] -# self.config["model"]["fitting_net"]["type"] = "polar" -# self.config["model"]["fitting_net"]["fit_diag"] = False -# self.config["model"]["fitting_net"]["shift_diag"] = False -# self.config["training"]["numb_steps"] = 1 -# self.config["training"]["save_freq"] = 1 -# # can not set requires_grad false for all parameters, -# # because the input coord has no grad, thus the loss if all set to false -# self.not_all_grad = True - -# def tearDown(self) -> None: -# DPTrainTest.tearDown(self) - - -# class TestPolarModelDPA1(unittest.TestCase, DPTrainTest): -# def setUp(self): -# input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") -# with open(input_json) as f: -# self.config = json.load(f) -# data_file_atomic = str( -# Path(__file__).parent / "water_tensor/polar/atomic_system" -# ) -# data_file_global = str( -# Path(__file__).parent / "water_tensor/polar/global_system" -# ) -# self.config["training"]["training_data"]["systems"] = [ -# data_file_atomic, -# data_file_global, -# ] -# self.config["training"]["validation_data"]["systems"] = [ -# data_file_atomic, -# data_file_global, -# ] -# self.config["model"] = deepcopy(model_dpa1) -# self.config["model"]["atom_exclude_types"] = [1] -# self.config["model"]["fitting_net"]["type"] = "polar" -# self.config["model"]["fitting_net"]["fit_diag"] = False -# self.config["model"]["fitting_net"]["shift_diag"] = False -# self.config["training"]["numb_steps"] = 1 -# self.config["training"]["save_freq"] = 1 -# # can not set requires_grad false for all parameters, -# # because the input coord has no grad, thus the loss if all set to false -# self.not_all_grad = True - -# def tearDown(self) -> None: -# DPTrainTest.tearDown(self) - - -# class TestPolarModelDPA2(unittest.TestCase, DPTrainTest): -# def setUp(self): -# input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") -# with open(input_json) as f: -# self.config = json.load(f) -# data_file_atomic = str( -# Path(__file__).parent / "water_tensor/polar/atomic_system" -# ) -# data_file_global = str( -# Path(__file__).parent / "water_tensor/polar/global_system" -# ) -# self.config["training"]["training_data"]["systems"] = [ -# data_file_atomic, -# data_file_global, -# ] -# self.config["training"]["validation_data"]["systems"] = [ -# data_file_atomic, -# data_file_global, -# ] -# self.config["model"] = deepcopy(model_dpa2) -# self.config["model"]["atom_exclude_types"] = [1] -# self.config["model"]["fitting_net"]["type"] = "polar" -# self.config["model"]["fitting_net"]["fit_diag"] = False -# self.config["model"]["fitting_net"]["shift_diag"] = False -# self.config["training"]["numb_steps"] = 1 -# self.config["training"]["save_freq"] = 1 -# # can not set requires_grad false for all parameters, -# # because the input coord has no grad, thus the loss if all set to false -# self.not_all_grad = True - -# def tearDown(self) -> None: -# DPTrainTest.tearDown(self) - - -# if __name__ == "__main__": -# unittest.main() +class TestEnergyZBLModelSeA(unittest.TestCase, DPTrainTest): + def setUp(self): + input_json = str(Path(__file__).parent / "water/zbl.json") + with open(input_json) as f: + self.config = json.load(f) + data_file = [str(Path(__file__).parent / "water/data/data_0")] + self.config["training"]["training_data"]["systems"] = data_file + self.config["training"]["validation_data"]["systems"] = data_file + self.config["model"] = deepcopy(model_zbl) + self.config["training"]["numb_steps"] = 1 + self.config["training"]["save_freq"] = 1 + + def tearDown(self) -> None: + DPTrainTest.tearDown(self) + + +class TestFparam(unittest.TestCase, DPTrainTest): + """Test if `fparam` can be loaded correctly.""" + + def setUp(self): + input_json = str(Path(__file__).parent / "water/se_atten.json") + with open(input_json) as f: + self.config = json.load(f) + data_file = [str(Path(__file__).parent / "water/data/data_0")] + self.config["training"]["training_data"]["systems"] = data_file + self.config["training"]["validation_data"]["systems"] = data_file + self.config["model"] = deepcopy(model_se_e2_a) + self.config["model"]["fitting_net"]["numb_fparam"] = 1 + self.config["training"]["numb_steps"] = 1 + self.config["training"]["save_freq"] = 1 + self.set_path = Path(__file__).parent / "water/data/data_0" / "set.000" + shutil.copyfile(self.set_path / "energy.npy", self.set_path / "fparam.npy") + + def tearDown(self) -> None: + (self.set_path / "fparam.npy").unlink(missing_ok=True) + DPTrainTest.tearDown(self) + + +class TestEnergyModelDPA1(unittest.TestCase, DPTrainTest): + def setUp(self): + input_json = str(Path(__file__).parent / "water/se_atten.json") + with open(input_json) as f: + self.config = json.load(f) + data_file = [str(Path(__file__).parent / "water/data/data_0")] + self.config["training"]["training_data"]["systems"] = data_file + self.config["training"]["validation_data"]["systems"] = data_file + self.config["model"] = deepcopy(model_dpa1) + self.config["training"]["numb_steps"] = 1 + self.config["training"]["save_freq"] = 1 + + def tearDown(self) -> None: + DPTrainTest.tearDown(self) + + +class TestEnergyModelDPA2(unittest.TestCase, DPTrainTest): + def setUp(self): + input_json = str(Path(__file__).parent / "water/se_atten.json") + with open(input_json) as f: + self.config = json.load(f) + data_file = [str(Path(__file__).parent / "water/data/data_0")] + self.config["training"]["training_data"]["systems"] = data_file + self.config["training"]["validation_data"]["systems"] = data_file + self.config["model"] = deepcopy(model_dpa2) + self.config["training"]["numb_steps"] = 1 + self.config["training"]["save_freq"] = 1 + + def tearDown(self) -> None: + DPTrainTest.tearDown(self) + + +@unittest.skip("hybrid not supported at the moment") +class TestEnergyModelHybrid(unittest.TestCase, DPTrainTest): + def setUp(self): + input_json = str(Path(__file__).parent / "water/se_atten.json") + with open(input_json) as f: + self.config = json.load(f) + data_file = [str(Path(__file__).parent / "water/data/data_0")] + self.config["training"]["training_data"]["systems"] = data_file + self.config["training"]["validation_data"]["systems"] = data_file + self.config["model"] = deepcopy(model_hybrid) + self.config["training"]["numb_steps"] = 1 + self.config["training"]["save_freq"] = 1 + + def tearDown(self) -> None: + DPTrainTest.tearDown(self) + + +class TestDipoleModelSeA(unittest.TestCase, DPTrainTest): + def setUp(self): + input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") + with open(input_json) as f: + self.config = json.load(f) + data_file_atomic = str( + Path(__file__).parent / "water_tensor/dipole/atomic_system" + ) + data_file_global = str( + Path(__file__).parent / "water_tensor/dipole/global_system" + ) + self.config["training"]["training_data"]["systems"] = [ + data_file_atomic, + data_file_global, + ] + self.config["training"]["validation_data"]["systems"] = [ + data_file_atomic, + data_file_global, + ] + self.config["model"] = deepcopy(model_se_e2_a) + self.config["model"]["atom_exclude_types"] = [1] + self.config["model"]["fitting_net"]["type"] = "dipole" + self.config["training"]["numb_steps"] = 1 + self.config["training"]["save_freq"] = 1 + + def tearDown(self) -> None: + DPTrainTest.tearDown(self) + + +class TestDipoleModelDPA1(unittest.TestCase, DPTrainTest): + def setUp(self): + input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") + with open(input_json) as f: + self.config = json.load(f) + data_file_atomic = str( + Path(__file__).parent / "water_tensor/dipole/atomic_system" + ) + data_file_global = str( + Path(__file__).parent / "water_tensor/dipole/global_system" + ) + self.config["training"]["training_data"]["systems"] = [ + data_file_atomic, + data_file_global, + ] + self.config["training"]["validation_data"]["systems"] = [ + data_file_atomic, + data_file_global, + ] + self.config["model"] = deepcopy(model_dpa1) + self.config["model"]["atom_exclude_types"] = [1] + self.config["model"]["fitting_net"]["type"] = "dipole" + self.config["training"]["numb_steps"] = 1 + self.config["training"]["save_freq"] = 1 + + def tearDown(self) -> None: + DPTrainTest.tearDown(self) + + +class TestDipoleModelDPA2(unittest.TestCase, DPTrainTest): + def setUp(self): + input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") + with open(input_json) as f: + self.config = json.load(f) + data_file_atomic = str( + Path(__file__).parent / "water_tensor/dipole/atomic_system" + ) + data_file_global = str( + Path(__file__).parent / "water_tensor/dipole/global_system" + ) + self.config["training"]["training_data"]["systems"] = [ + data_file_atomic, + data_file_global, + ] + self.config["training"]["validation_data"]["systems"] = [ + data_file_atomic, + data_file_global, + ] + self.config["model"] = deepcopy(model_dpa2) + self.config["model"]["atom_exclude_types"] = [1] + self.config["model"]["fitting_net"]["type"] = "dipole" + self.config["training"]["numb_steps"] = 1 + self.config["training"]["save_freq"] = 1 + + def tearDown(self) -> None: + DPTrainTest.tearDown(self) + + +class TestPolarModelSeA(unittest.TestCase, DPTrainTest): + def setUp(self): + input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") + with open(input_json) as f: + self.config = json.load(f) + data_file_atomic = str( + Path(__file__).parent / "water_tensor/polar/atomic_system" + ) + data_file_global = str( + Path(__file__).parent / "water_tensor/polar/global_system" + ) + self.config["training"]["training_data"]["systems"] = [ + data_file_atomic, + data_file_global, + ] + self.config["training"]["validation_data"]["systems"] = [ + data_file_atomic, + data_file_global, + ] + self.config["model"] = deepcopy(model_se_e2_a) + self.config["model"]["atom_exclude_types"] = [1] + self.config["model"]["fitting_net"]["type"] = "polar" + self.config["model"]["fitting_net"]["fit_diag"] = False + self.config["model"]["fitting_net"]["shift_diag"] = False + self.config["training"]["numb_steps"] = 1 + self.config["training"]["save_freq"] = 1 + # can not set requires_grad false for all parameters, + # because the input coord has no grad, thus the loss if all set to false + self.not_all_grad = True + + def tearDown(self) -> None: + DPTrainTest.tearDown(self) + + +class TestPolarModelDPA1(unittest.TestCase, DPTrainTest): + def setUp(self): + input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") + with open(input_json) as f: + self.config = json.load(f) + data_file_atomic = str( + Path(__file__).parent / "water_tensor/polar/atomic_system" + ) + data_file_global = str( + Path(__file__).parent / "water_tensor/polar/global_system" + ) + self.config["training"]["training_data"]["systems"] = [ + data_file_atomic, + data_file_global, + ] + self.config["training"]["validation_data"]["systems"] = [ + data_file_atomic, + data_file_global, + ] + self.config["model"] = deepcopy(model_dpa1) + self.config["model"]["atom_exclude_types"] = [1] + self.config["model"]["fitting_net"]["type"] = "polar" + self.config["model"]["fitting_net"]["fit_diag"] = False + self.config["model"]["fitting_net"]["shift_diag"] = False + self.config["training"]["numb_steps"] = 1 + self.config["training"]["save_freq"] = 1 + # can not set requires_grad false for all parameters, + # because the input coord has no grad, thus the loss if all set to false + self.not_all_grad = True + + def tearDown(self) -> None: + DPTrainTest.tearDown(self) + + +class TestPolarModelDPA2(unittest.TestCase, DPTrainTest): + def setUp(self): + input_json = str(Path(__file__).parent / "water_tensor/se_e2_a.json") + with open(input_json) as f: + self.config = json.load(f) + data_file_atomic = str( + Path(__file__).parent / "water_tensor/polar/atomic_system" + ) + data_file_global = str( + Path(__file__).parent / "water_tensor/polar/global_system" + ) + self.config["training"]["training_data"]["systems"] = [ + data_file_atomic, + data_file_global, + ] + self.config["training"]["validation_data"]["systems"] = [ + data_file_atomic, + data_file_global, + ] + self.config["model"] = deepcopy(model_dpa2) + self.config["model"]["atom_exclude_types"] = [1] + self.config["model"]["fitting_net"]["type"] = "polar" + self.config["model"]["fitting_net"]["fit_diag"] = False + self.config["model"]["fitting_net"]["shift_diag"] = False + self.config["training"]["numb_steps"] = 1 + self.config["training"]["save_freq"] = 1 + # can not set requires_grad false for all parameters, + # because the input coord has no grad, thus the loss if all set to false + self.not_all_grad = True + + def tearDown(self) -> None: + DPTrainTest.tearDown(self) + + +if __name__ == "__main__": + unittest.main() From d57d5614fed1cc0b8f09efae11a54932f5250cd1 Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Tue, 9 Apr 2024 13:40:39 +0800 Subject: [PATCH 20/27] fix: precommit --- source/tests/pt/test_training.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py index ef18fbd360..f0a988607e 100644 --- a/source/tests/pt/test_training.py +++ b/source/tests/pt/test_training.py @@ -18,6 +18,11 @@ from .model.test_permutation import ( model_dos, + model_dpa1, + model_dpa2, + model_hybrid, + model_se_e2_a, + model_zbl, ) From 62288b1dbb2ec5f0333b5f4d76adf37fdee1ed69 Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Wed, 10 Apr 2024 10:21:30 +0800 Subject: [PATCH 21/27] chore: revert breaking changes --- deepmd/entrypoints/test.py | 12 ++++++------ deepmd/pt/loss/tensor.py | 8 ++++---- source/tests/pt/model/test_polar_stat.py | 14 ++++---------- source/tests/tf/test_dp_test.py | 4 ++-- 4 files changed, 16 insertions(+), 22 deletions(-) diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py index 7b8c227ead..cad6e12d2b 100644 --- a/deepmd/entrypoints/test.py +++ b/deepmd/entrypoints/test.py @@ -870,7 +870,7 @@ def test_polar( arrays with results and their shapes """ data.add( - "polarizability" if not atomic else "atom_polarizability", + "polarizability" if not atomic else "atomic_polarizability", 9, atomic=atomic, must=True, @@ -897,7 +897,7 @@ def test_polar( polar = polar.reshape((polar.shape[0], -1, 9))[:, sel_mask, :].reshape( (polar.shape[0], -1) ) - rmse_f = rmse(polar - test_data["atom_polarizability"][:numb_test]) + rmse_f = rmse(polar - test_data["atomic_polarizability"][:numb_test]) log.info(f"# number of test data : {numb_test:d} ") log.info(f"Polarizability RMSE : {rmse_f:e}") @@ -926,7 +926,7 @@ def test_polar( pe = np.concatenate( ( np.reshape( - test_data["atom_polarizability"][:numb_test], + test_data["atomic_polarizability"][:numb_test], [-1, 9 * sel_natoms], ), np.reshape(polar, [-1, 9 * sel_natoms]), @@ -1011,7 +1011,7 @@ def test_dipole( arrays with results and their shapes """ data.add( - "dipole" if not atomic else "atom_dipole", + "dipole" if not atomic else "atomic_dipole", 3, atomic=atomic, must=True, @@ -1037,7 +1037,7 @@ def test_dipole( dipole = dipole.reshape((dipole.shape[0], -1, 3))[:, sel_mask, :].reshape( (dipole.shape[0], -1) ) - rmse_f = rmse(dipole - test_data["atom_dipole"][:numb_test]) + rmse_f = rmse(dipole - test_data["atomic_dipole"][:numb_test]) log.info(f"# number of test data : {numb_test:d}") log.info(f"Dipole RMSE : {rmse_f:e}") @@ -1061,7 +1061,7 @@ def test_dipole( pe = np.concatenate( ( np.reshape( - test_data["atom_dipole"][:numb_test], [-1, 3 * sel_natoms] + test_data["atomic_dipole"][:numb_test], [-1, 3 * sel_natoms] ), np.reshape(dipole, [-1, 3 * sel_natoms]), ), diff --git a/deepmd/pt/loss/tensor.py b/deepmd/pt/loss/tensor.py index 34957815b5..3dd91d203e 100644 --- a/deepmd/pt/loss/tensor.py +++ b/deepmd/pt/loss/tensor.py @@ -93,14 +93,14 @@ def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False if ( self.has_local_weight and self.tensor_name in model_pred - and "atom_" + self.label_name in label + and "atomic_" + self.label_name in label ): - find_local = label.get("find_" + "atom_" + self.label_name, 0.0) + find_local = label.get("find_" + "atomic_" + self.label_name, 0.0) local_weight = self.local_weight * find_local local_tensor_pred = model_pred[self.tensor_name].reshape( [-1, natoms, self.tensor_size] ) - local_tensor_label = label["atom_" + self.label_name].reshape( + local_tensor_label = label["atomic_" + self.label_name].reshape( [-1, natoms, self.tensor_size] ) diff = (local_tensor_pred - local_tensor_label).reshape( @@ -157,7 +157,7 @@ def label_requirement(self) -> List[DataRequirementItem]: if self.has_local_weight: label_requirement.append( DataRequirementItem( - "atom_" + self.label_name, + "atomic_" + self.label_name, ndof=self.tensor_size, atomic=True, must=False, diff --git a/source/tests/pt/model/test_polar_stat.py b/source/tests/pt/model/test_polar_stat.py index b61455d524..551ccc3028 100644 --- a/source/tests/pt/model/test_polar_stat.py +++ b/source/tests/pt/model/test_polar_stat.py @@ -31,8 +31,8 @@ def setUp(self) -> None: self.sampled = [ { "atype": types, - "find_atom_polarizability": find_atomic_polarizability, - "atom_polarizability": atomic_polarizability, + "find_atomic_polarizability": find_atomic_polarizability, + "atomic_polarizability": atomic_polarizability, "polarizability": polarizability, "find_polarizability": find_polarizability, } @@ -41,12 +41,6 @@ def setUp(self) -> None: k: [v.numpy(force=True)] for d in self.sampled for k, v in d.items() } self.all_stat["type"] = self.all_stat.pop("atype") - self.all_stat["find_atomic_polarizability"] = self.all_stat.pop( - "find_atom_polarizability" - ) - self.all_stat["atomic_polarizability"] = self.all_stat.pop( - "atom_polarizability" - ) self.tfpolar = PolarFittingSeA( ntypes=ntypes, dim_descrpt=1, @@ -67,8 +61,8 @@ def test_atomic_consistency(self): np.testing.assert_allclose(tfbias, to_numpy_array(ptbias)) def test_global_consistency(self): - self.sampled[0]["find_atom_polarizability"] = -1 - self.sampled[0]["polarizability"] = self.sampled[0]["atom_polarizability"].sum( + self.sampled[0]["find_atomic_polarizability"] = -1 + self.sampled[0]["polarizability"] = self.sampled[0]["atomic_polarizability"].sum( dim=1 ) self.all_stat["find_atomic_polarizability"] = [-1] diff --git a/source/tests/tf/test_dp_test.py b/source/tests/tf/test_dp_test.py index b9a9706da2..9a3dde3da0 100644 --- a/source/tests/tf/test_dp_test.py +++ b/source/tests/tf/test_dp_test.py @@ -224,7 +224,7 @@ def setUp(self): ] ) self.expected_global_d = np.sum(self.expected_d.reshape(1, -1, 3), axis=1) - np.save(Path(self.test_data) / "set.000" / "atom_dipole.npy", self.expected_d) + np.save(Path(self.test_data) / "set.000" / "atomic_dipole.npy", self.expected_d) np.save(Path(self.test_data) / "set.000" / "dipole.npy", self.expected_global_d) def test_1frame(self): @@ -296,7 +296,7 @@ def setUp(self): ) self.expected_global_d = np.sum(self.expected_d.reshape(1, -1, 9), axis=1) np.save( - Path(self.test_data) / "set.000" / "atom_polarizability.npy", + Path(self.test_data) / "set.000" / "atomic_polarizability.npy", self.expected_d, ) np.save( From 267264ba7e0f9118a914674e86a2a5498d763dd8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 02:22:14 +0000 Subject: [PATCH 22/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- source/tests/pt/model/test_polar_stat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/tests/pt/model/test_polar_stat.py b/source/tests/pt/model/test_polar_stat.py index 551ccc3028..3d72c6e8fa 100644 --- a/source/tests/pt/model/test_polar_stat.py +++ b/source/tests/pt/model/test_polar_stat.py @@ -62,9 +62,9 @@ def test_atomic_consistency(self): def test_global_consistency(self): self.sampled[0]["find_atomic_polarizability"] = -1 - self.sampled[0]["polarizability"] = self.sampled[0]["atomic_polarizability"].sum( - dim=1 - ) + self.sampled[0]["polarizability"] = self.sampled[0][ + "atomic_polarizability" + ].sum(dim=1) self.all_stat["find_atomic_polarizability"] = [-1] self.all_stat["polarizability"] = [ self.all_stat["atomic_polarizability"][0].sum(axis=1) From 99f18dabac4dcb86f7a7481aa2cc0c9bc11a47af Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Wed, 10 Apr 2024 10:44:37 +0800 Subject: [PATCH 23/27] chore: revert breaking changes --- deepmd/pt/model/task/polarizability.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepmd/pt/model/task/polarizability.py b/deepmd/pt/model/task/polarizability.py index 7bf82f8a09..cd944996be 100644 --- a/deepmd/pt/model/task/polarizability.py +++ b/deepmd/pt/model/task/polarizability.py @@ -231,9 +231,9 @@ def compute_output_stats( for sys in range(len(sampled)): nframs = sampled[sys]["atype"].shape[0] - if sampled[sys]["find_atom_polarizability"] > 0.0: + if sampled[sys]["find_atomic_polarizability"] > 0.0: sys_atom_polar = compute_stats_from_atomic( - sampled[sys]["atom_polarizability"].numpy(force=True), + sampled[sys]["atomic_polarizability"].numpy(force=True), sampled[sys]["atype"].numpy(force=True), )[0] else: From fcfeed6a696f9ebab488ee16b794a49c0ba980c7 Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Wed, 10 Apr 2024 13:18:20 +0800 Subject: [PATCH 24/27] chore: refactor code --- deepmd/dpmodel/fitting/dipole_fitting.py | 2 +- deepmd/dpmodel/fitting/polarizability_fitting.py | 2 +- deepmd/pt/utils/stat.py | 12 +----------- ...omic_bias.py => test_atomic_model_atomic_stat.py} | 0 ...odel_stat.py => test_atomic_model_global_stat.py} | 0 5 files changed, 3 insertions(+), 13 deletions(-) rename source/tests/pt/model/{test_atomic_bias.py => test_atomic_model_atomic_stat.py} (100%) rename source/tests/pt/model/{test_atomic_model_stat.py => test_atomic_model_global_stat.py} (100%) diff --git a/deepmd/dpmodel/fitting/dipole_fitting.py b/deepmd/dpmodel/fitting/dipole_fitting.py index 53ebad0a3b..fc619125d5 100644 --- a/deepmd/dpmodel/fitting/dipole_fitting.py +++ b/deepmd/dpmodel/fitting/dipole_fitting.py @@ -158,7 +158,7 @@ def serialize(self) -> dict: def deserialize(cls, data: dict) -> "GeneralFitting": data = copy.deepcopy(data) check_version_compatibility(data.pop("@version", 1), 1, 1) - data.pop("var_name", None) + assert data.pop("var_name", None) == "dipole" return super().deserialize(data) def output_def(self): diff --git a/deepmd/dpmodel/fitting/polarizability_fitting.py b/deepmd/dpmodel/fitting/polarizability_fitting.py index 0ca2a489fc..3842ef0e12 100644 --- a/deepmd/dpmodel/fitting/polarizability_fitting.py +++ b/deepmd/dpmodel/fitting/polarizability_fitting.py @@ -198,7 +198,7 @@ def serialize(self) -> dict: def deserialize(cls, data: dict) -> "GeneralFitting": data = copy.deepcopy(data) check_version_compatibility(data.pop("@version", 1), 2, 1) - data.pop("var_name", None) + assert data.pop("var_name", None) == "polar" return super().deserialize(data) def output_def(self): diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index 73ee7468e5..ef19042949 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -300,19 +300,11 @@ def compute_output_stats( if ( (("find_atom_" + kk) in system) and (system["find_atom_" + kk] > 0.0) - and ( - len(atomic_sampled_idx[kk]) == 0 - or idx > atomic_sampled_idx[kk][-1] - ) ): atomic_sampled_idx[kk].append(idx) elif ( (("find_" + kk) in system) and (system["find_" + kk] > 0.0) - and ( - len(global_sampled_idx[kk]) == 0 - or idx > global_sampled_idx[kk][-1] - ) ): global_sampled_idx[kk].append(idx) @@ -428,7 +420,6 @@ def compute_output_stats_global( ntypes, system["atom_exclude_types"] ).get_type_mask() system[natoms_key][:, 2:] *= type_mask.unsqueeze(0) - # input_natoms = [item[natoms_key] for item in sampled] input_natoms = { kk: [ @@ -467,7 +458,6 @@ def compute_output_stats_global( else: # subtract the model bias and output the delta bias - # need to find the output of the corresponding system, may need idx. model_pred = {kk: np.sum(model_pred[kk], axis=1) for kk in keys} stats_input = { kk: merged_output[kk] - model_pred[kk] for kk in keys if kk in merged_output @@ -484,7 +474,7 @@ def compute_output_stats_global( rcond=rcond, ) else: - # this key does not have atomic labels, skip it. + # this key does not have global labels, skip it. continue bias_atom_e, std_atom_e = _post_process_stat(bias_atom_e, std_atom_e) diff --git a/source/tests/pt/model/test_atomic_bias.py b/source/tests/pt/model/test_atomic_model_atomic_stat.py similarity index 100% rename from source/tests/pt/model/test_atomic_bias.py rename to source/tests/pt/model/test_atomic_model_atomic_stat.py diff --git a/source/tests/pt/model/test_atomic_model_stat.py b/source/tests/pt/model/test_atomic_model_global_stat.py similarity index 100% rename from source/tests/pt/model/test_atomic_model_stat.py rename to source/tests/pt/model/test_atomic_model_global_stat.py From 398fbaca6e4f14d788da8499f6cc690f7ef18d9a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 05:19:03 +0000 Subject: [PATCH 25/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deepmd/pt/utils/stat.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py index ef19042949..77da1e01f1 100644 --- a/deepmd/pt/utils/stat.py +++ b/deepmd/pt/utils/stat.py @@ -297,15 +297,11 @@ def compute_output_stats( for kk in keys: for idx, system in enumerate(sampled): - if ( - (("find_atom_" + kk) in system) - and (system["find_atom_" + kk] > 0.0) + if (("find_atom_" + kk) in system) and ( + system["find_atom_" + kk] > 0.0 ): atomic_sampled_idx[kk].append(idx) - elif ( - (("find_" + kk) in system) - and (system["find_" + kk] > 0.0) - ): + elif (("find_" + kk) in system) and (system["find_" + kk] > 0.0): global_sampled_idx[kk].append(idx) else: From 2cf195dc49a770ba452e560d341023e2302d9c85 Mon Sep 17 00:00:00 2001 From: Anyang Peng <137014849+anyangml@users.noreply.github.com> Date: Wed, 10 Apr 2024 13:25:36 +0800 Subject: [PATCH 26/27] chore: refactor code --- deepmd/dpmodel/fitting/dipole_fitting.py | 3 ++- deepmd/dpmodel/fitting/polarizability_fitting.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/deepmd/dpmodel/fitting/dipole_fitting.py b/deepmd/dpmodel/fitting/dipole_fitting.py index fc619125d5..98325f41ee 100644 --- a/deepmd/dpmodel/fitting/dipole_fitting.py +++ b/deepmd/dpmodel/fitting/dipole_fitting.py @@ -158,7 +158,8 @@ def serialize(self) -> dict: def deserialize(cls, data: dict) -> "GeneralFitting": data = copy.deepcopy(data) check_version_compatibility(data.pop("@version", 1), 1, 1) - assert data.pop("var_name", None) == "dipole" + var_name = data.pop("var_name", None) + assert var_name == "dipole" return super().deserialize(data) def output_def(self): diff --git a/deepmd/dpmodel/fitting/polarizability_fitting.py b/deepmd/dpmodel/fitting/polarizability_fitting.py index 3842ef0e12..3ba62cf285 100644 --- a/deepmd/dpmodel/fitting/polarizability_fitting.py +++ b/deepmd/dpmodel/fitting/polarizability_fitting.py @@ -198,7 +198,8 @@ def serialize(self) -> dict: def deserialize(cls, data: dict) -> "GeneralFitting": data = copy.deepcopy(data) check_version_compatibility(data.pop("@version", 1), 2, 1) - assert data.pop("var_name", None) == "polar" + var_name = data.pop("var_name", None) + assert var_name == "polar" return super().deserialize(data) def output_def(self): From c58520654f164c6bfddc7f03a32acaec0b76f325 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 05:26:19 +0000 Subject: [PATCH 27/27] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- deepmd/dpmodel/fitting/polarizability_fitting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepmd/dpmodel/fitting/polarizability_fitting.py b/deepmd/dpmodel/fitting/polarizability_fitting.py index 3ba62cf285..2a691e963d 100644 --- a/deepmd/dpmodel/fitting/polarizability_fitting.py +++ b/deepmd/dpmodel/fitting/polarizability_fitting.py @@ -199,7 +199,7 @@ def deserialize(cls, data: dict) -> "GeneralFitting": data = copy.deepcopy(data) check_version_compatibility(data.pop("@version", 1), 2, 1) var_name = data.pop("var_name", None) - assert var_name == "polar" + assert var_name == "polar" return super().deserialize(data) def output_def(self):