diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9058decc21..bd36fd6e63 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
         exclude: ^source/3rdparty
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.8.3
+    rev: v0.8.4
     hooks:
       - id: ruff
         args: ["--fix"]
diff --git a/deepmd/dpmodel/atomic_model/__init__.py b/deepmd/dpmodel/atomic_model/__init__.py
index 3d90c738ae..4d882d5e4b 100644
--- a/deepmd/dpmodel/atomic_model/__init__.py
+++ b/deepmd/dpmodel/atomic_model/__init__.py
@@ -42,6 +42,9 @@
 from .polar_atomic_model import (
     DPPolarAtomicModel,
 )
+from .property_atomic_model import (
+    DPPropertyAtomicModel,
+)
 
 __all__ = [
     "BaseAtomicModel",
@@ -50,6 +53,7 @@
     "DPDipoleAtomicModel",
     "DPEnergyAtomicModel",
     "DPPolarAtomicModel",
+    "DPPropertyAtomicModel",
     "DPZBLLinearEnergyAtomicModel",
     "LinearEnergyAtomicModel",
     "PairTabAtomicModel",
diff --git a/deepmd/dpmodel/atomic_model/property_atomic_model.py b/deepmd/dpmodel/atomic_model/property_atomic_model.py
index 6f69f8dfb6..e3c038e695 100644
--- a/deepmd/dpmodel/atomic_model/property_atomic_model.py
+++ b/deepmd/dpmodel/atomic_model/property_atomic_model.py
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import numpy as np
+
 from deepmd.dpmodel.fitting.property_fitting import (
     PropertyFittingNet,
 )
@@ -15,3 +17,25 @@ def __init__(self, descriptor, fitting, type_map, **kwargs):
                 "fitting must be an instance of PropertyFittingNet for DPPropertyAtomicModel"
             )
         super().__init__(descriptor, fitting, type_map, **kwargs)
+
+    def apply_out_stat(
+        self,
+        ret: dict[str, np.ndarray],
+        atype: np.ndarray,
+    ):
+        """Apply the stat to each atomic output.
+
+        In property fitting, each output will be multiplied by label std and then plus the label average value.
+
+        Parameters
+        ----------
+        ret
+            The returned dict by the forward_atomic method
+        atype
+            The atom types. nf x nloc. It is useless in property fitting.
+
+        """
+        out_bias, out_std = self._fetch_out_stat(self.bias_keys)
+        for kk in self.bias_keys:
+            ret[kk] = ret[kk] * out_std[kk][0] + out_bias[kk][0]
+        return ret
diff --git a/deepmd/dpmodel/fitting/property_fitting.py b/deepmd/dpmodel/fitting/property_fitting.py
index 8b903af00e..6d0aa3546f 100644
--- a/deepmd/dpmodel/fitting/property_fitting.py
+++ b/deepmd/dpmodel/fitting/property_fitting.py
@@ -41,10 +41,9 @@ class PropertyFittingNet(InvarFitting):
             this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable.
     intensive
             Whether the fitting property is intensive.
-    bias_method
-            The method of applying the bias to each atomic output, user can select 'normal' or 'no_bias'.
-            If 'normal' is used, the computed bias will be added to the atomic output.
-            If 'no_bias' is used, no bias will be added to the atomic output.
+    property_name:
+            The name of fitting property, which should be consistent with the property name in the dataset.
+            If the data file is named `humo.npy`, this parameter should be "humo".
     resnet_dt
             Time-step `dt` in the resnet construction:
             :math:`y = x + dt * \phi (Wx + b)`
@@ -74,7 +73,7 @@ def __init__(
         rcond: Optional[float] = None,
         trainable: Union[bool, list[bool]] = True,
         intensive: bool = False,
-        bias_method: str = "normal",
+        property_name: str = "property",
         resnet_dt: bool = True,
         numb_fparam: int = 0,
         numb_aparam: int = 0,
@@ -89,9 +88,8 @@ def __init__(
     ) -> None:
         self.task_dim = task_dim
         self.intensive = intensive
-        self.bias_method = bias_method
         super().__init__(
-            var_name="property",
+            var_name=property_name,
             ntypes=ntypes,
             dim_descrpt=dim_descrpt,
             dim_out=task_dim,
@@ -113,9 +111,9 @@ def __init__(
     @classmethod
     def deserialize(cls, data: dict) -> "PropertyFittingNet":
         data = data.copy()
-        check_version_compatibility(data.pop("@version"), 3, 1)
+        check_version_compatibility(data.pop("@version"), 4, 1)
         data.pop("dim_out")
-        data.pop("var_name")
+        data["property_name"] = data.pop("var_name")
         data.pop("tot_ener_zero")
         data.pop("layer_name")
         data.pop("use_aparam_as_mask", None)
@@ -131,6 +129,8 @@ def serialize(self) -> dict:
             **InvarFitting.serialize(self),
             "type": "property",
             "task_dim": self.task_dim,
+            "intensive": self.intensive,
         }
+        dd["@version"] = 4
 
         return dd
diff --git a/deepmd/dpmodel/model/property_model.py b/deepmd/dpmodel/model/property_model.py
index 16fdedd36e..9bd07bd349 100644
--- a/deepmd/dpmodel/model/property_model.py
+++ b/deepmd/dpmodel/model/property_model.py
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
-from deepmd.dpmodel.atomic_model.dp_atomic_model import (
-    DPAtomicModel,
+from deepmd.dpmodel.atomic_model import (
+    DPPropertyAtomicModel,
 )
 from deepmd.dpmodel.model.base_model import (
     BaseModel,
@@ -13,7 +13,7 @@
     make_model,
 )
 
-DPPropertyModel_ = make_model(DPAtomicModel)
+DPPropertyModel_ = make_model(DPPropertyAtomicModel)
 
 
 @BaseModel.register("property")
diff --git a/deepmd/entrypoints/test.py b/deepmd/entrypoints/test.py
index d9744246d7..5aeb84468d 100644
--- a/deepmd/entrypoints/test.py
+++ b/deepmd/entrypoints/test.py
@@ -779,9 +779,17 @@ def test_property(
     tuple[list[np.ndarray], list[int]]
         arrays with results and their shapes
     """
-    data.add("property", dp.task_dim, atomic=False, must=True, high_prec=True)
+    var_name = dp.get_var_name()
+    assert isinstance(var_name, str)
+    data.add(var_name, dp.task_dim, atomic=False, must=True, high_prec=True)
     if has_atom_property:
-        data.add("atom_property", dp.task_dim, atomic=True, must=False, high_prec=True)
+        data.add(
+            f"atom_{var_name}",
+            dp.task_dim,
+            atomic=True,
+            must=False,
+            high_prec=True,
+        )
 
     if dp.get_dim_fparam() > 0:
         data.add(
@@ -832,12 +840,12 @@ def test_property(
         aproperty = ret[1]
         aproperty = aproperty.reshape([numb_test, natoms * dp.task_dim])
 
-    diff_property = property - test_data["property"][:numb_test]
+    diff_property = property - test_data[var_name][:numb_test]
     mae_property = mae(diff_property)
     rmse_property = rmse(diff_property)
 
     if has_atom_property:
-        diff_aproperty = aproperty - test_data["atom_property"][:numb_test]
+        diff_aproperty = aproperty - test_data[f"atom_{var_name}"][:numb_test]
         mae_aproperty = mae(diff_aproperty)
         rmse_aproperty = rmse(diff_aproperty)
 
@@ -854,7 +862,7 @@ def test_property(
         detail_path = Path(detail_file)
 
         for ii in range(numb_test):
-            test_out = test_data["property"][ii].reshape(-1, 1)
+            test_out = test_data[var_name][ii].reshape(-1, 1)
             pred_out = property[ii].reshape(-1, 1)
 
             frame_output = np.hstack((test_out, pred_out))
@@ -868,7 +876,7 @@ def test_property(
 
         if has_atom_property:
             for ii in range(numb_test):
-                test_out = test_data["atom_property"][ii].reshape(-1, 1)
+                test_out = test_data[f"atom_{var_name}"][ii].reshape(-1, 1)
                 pred_out = aproperty[ii].reshape(-1, 1)
 
                 frame_output = np.hstack((test_out, pred_out))
diff --git a/deepmd/infer/deep_eval.py b/deepmd/infer/deep_eval.py
index 159f9bdf60..15e4a56280 100644
--- a/deepmd/infer/deep_eval.py
+++ b/deepmd/infer/deep_eval.py
@@ -70,8 +70,6 @@ class DeepEvalBackend(ABC):
         "dipole_derv_c_redu": "virial",
         "dos": "atom_dos",
         "dos_redu": "dos",
-        "property": "atom_property",
-        "property_redu": "property",
         "mask_mag": "mask_mag",
         "mask": "mask",
         # old models in v1
@@ -276,6 +274,10 @@ def get_has_spin(self) -> bool:
         """Check if the model has spin atom types."""
         return False
 
+    def get_var_name(self) -> str:
+        """Get the name of the fitting property."""
+        raise NotImplementedError
+
     @abstractmethod
     def get_ntypes_spin(self) -> int:
         """Get the number of spin atom types of this model. Only used in old implement."""
diff --git a/deepmd/infer/deep_property.py b/deepmd/infer/deep_property.py
index 389a0e8512..5944491cc0 100644
--- a/deepmd/infer/deep_property.py
+++ b/deepmd/infer/deep_property.py
@@ -37,25 +37,41 @@ class DeepProperty(DeepEval):
         Keyword arguments.
     """
 
-    @property
     def output_def(self) -> ModelOutputDef:
-        """Get the output definition of this model."""
-        return ModelOutputDef(
+        """
+        Get the output definition of this model.
+        But in property_fitting, the output definition is not known until the model is loaded.
+        So we need to rewrite the output definition after the model is loaded.
+        See detail in change_output_def.
+        """
+        pass
+
+    def change_output_def(self) -> None:
+        """
+        Change the output definition of this model.
+        In property_fitting, the output definition is known after the model is loaded.
+        We need to rewrite the output definition and related information.
+        """
+        self.output_def = ModelOutputDef(
             FittingOutputDef(
                 [
                     OutputVariableDef(
-                        "property",
-                        shape=[-1],
+                        self.get_var_name(),
+                        shape=[self.get_task_dim()],
                         reducible=True,
                         atomic=True,
+                        intensive=self.get_intensive(),
                     ),
                 ]
             )
         )
-
-    def change_output_def(self) -> None:
-        self.output_def["property"].shape = self.task_dim
-        self.output_def["property"].intensive = self.get_intensive()
+        self.deep_eval.output_def = self.output_def
+        self.deep_eval._OUTDEF_DP2BACKEND[self.get_var_name()] = (
+            f"atom_{self.get_var_name()}"
+        )
+        self.deep_eval._OUTDEF_DP2BACKEND[f"{self.get_var_name()}_redu"] = (
+            self.get_var_name()
+        )
 
     @property
     def task_dim(self) -> int:
@@ -120,10 +136,12 @@ def eval(
             aparam=aparam,
             **kwargs,
         )
-        atomic_property = results["property"].reshape(
+        atomic_property = results[self.get_var_name()].reshape(
             nframes, natoms, self.get_task_dim()
         )
-        property = results["property_redu"].reshape(nframes, self.get_task_dim())
+        property = results[f"{self.get_var_name()}_redu"].reshape(
+            nframes, self.get_task_dim()
+        )
 
         if atomic:
             return (
@@ -141,5 +159,9 @@ def get_intensive(self) -> bool:
         """Get whether the property is intensive."""
         return self.deep_eval.get_intensive()
 
+    def get_var_name(self) -> str:
+        """Get the name of the fitting property."""
+        return self.deep_eval.get_var_name()
+
 
 __all__ = ["DeepProperty"]
diff --git a/deepmd/pd/entrypoints/main.py b/deepmd/pd/entrypoints/main.py
index 3fa66312e7..bcc93432af 100644
--- a/deepmd/pd/entrypoints/main.py
+++ b/deepmd/pd/entrypoints/main.py
@@ -123,7 +123,7 @@ def prepare_trainer_input_single(
 
         # validation and training data
         # avoid the same batch sequence among devices
-        rank_seed = (seed + rank) % (2**32) if seed is not None else None
+        rank_seed = [rank, seed % (2**32)] if seed is not None else None
         validation_data_single = (
             DpLoaderSet(
                 validation_systems,
diff --git a/deepmd/pd/infer/deep_eval.py b/deepmd/pd/infer/deep_eval.py
index a2f8510f28..c31170ad71 100644
--- a/deepmd/pd/infer/deep_eval.py
+++ b/deepmd/pd/infer/deep_eval.py
@@ -113,6 +113,7 @@ def __init__(
         else:
             # self.dp = paddle.jit.load(self.model_path.split(".json")[0])
             raise ValueError(f"Unknown model file format: {self.model_path}!")
+        self.dp.eval()
         self.rcut = self.dp.model["Default"].get_rcut()
         self.type_map = self.dp.model["Default"].get_type_map()
         if isinstance(auto_batch_size, bool):
diff --git a/deepmd/pd/model/descriptor/__init__.py b/deepmd/pd/model/descriptor/__init__.py
index 7eaa0df85b..cee9dbf226 100644
--- a/deepmd/pd/model/descriptor/__init__.py
+++ b/deepmd/pd/model/descriptor/__init__.py
@@ -9,20 +9,34 @@
     DescrptBlockSeAtten,
     DescrptDPA1,
 )
+from .dpa2 import (
+    DescrptDPA2,
+)
 from .env_mat import (
     prod_env_mat,
 )
+from .repformers import (
+    DescrptBlockRepformers,
+)
 from .se_a import (
     DescrptBlockSeA,
     DescrptSeA,
 )
+from .se_t_tebd import (
+    DescrptBlockSeTTebd,
+    DescrptSeTTebd,
+)
 
 __all__ = [
     "BaseDescriptor",
     "DescriptorBlock",
+    "DescrptBlockRepformers",
     "DescrptBlockSeA",
     "DescrptBlockSeAtten",
+    "DescrptBlockSeTTebd",
     "DescrptDPA1",
+    "DescrptDPA2",
     "DescrptSeA",
+    "DescrptSeTTebd",
     "prod_env_mat",
 ]
diff --git a/deepmd/pd/model/descriptor/dpa2.py b/deepmd/pd/model/descriptor/dpa2.py
new file mode 100644
index 0000000000..8d4e13edae
--- /dev/null
+++ b/deepmd/pd/model/descriptor/dpa2.py
@@ -0,0 +1,902 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Callable,
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.dpmodel.descriptor.dpa2 import (
+    RepformerArgs,
+    RepinitArgs,
+)
+from deepmd.dpmodel.utils import EnvMat as DPEnvMat
+from deepmd.dpmodel.utils.seed import (
+    child_seed,
+)
+from deepmd.pd.model.network.mlp import (
+    Identity,
+    MLPLayer,
+    NetworkCollection,
+)
+from deepmd.pd.model.network.network import (
+    TypeEmbedNet,
+    TypeEmbedNetConsistent,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    PRECISION_DICT,
+)
+from deepmd.pd.utils.nlist import (
+    build_multiple_neighbor_list,
+    get_multiple_nlist_key,
+)
+from deepmd.pd.utils.update_sel import (
+    UpdateSel,
+)
+from deepmd.pd.utils.utils import (
+    to_numpy_array,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.finetune import (
+    get_index_between_two_maps,
+    map_pair_exclude_types,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .base_descriptor import (
+    BaseDescriptor,
+)
+from .descriptor import (
+    extend_descrpt_stat,
+)
+from .repformer_layer import (
+    RepformerLayer,
+)
+from .repformers import (
+    DescrptBlockRepformers,
+)
+from .se_atten import (
+    DescrptBlockSeAtten,
+)
+from .se_t_tebd import (
+    DescrptBlockSeTTebd,
+)
+
+
+@BaseDescriptor.register("dpa2")
+class DescrptDPA2(BaseDescriptor, paddle.nn.Layer):
+    def __init__(
+        self,
+        ntypes: int,
+        # args for repinit
+        repinit: Union[RepinitArgs, dict],
+        # args for repformer
+        repformer: Union[RepformerArgs, dict],
+        # kwargs for descriptor
+        concat_output_tebd: bool = True,
+        precision: str = "float64",
+        smooth: bool = True,
+        exclude_types: list[tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        trainable: bool = True,
+        seed: Optional[Union[int, list[int]]] = None,
+        add_tebd_to_repinit_out: bool = False,
+        use_econf_tebd: bool = False,
+        use_tebd_bias: bool = False,
+        type_map: Optional[list[str]] = None,
+    ) -> None:
+        r"""The DPA-2 descriptor[1]_.
+
+        Parameters
+        ----------
+        repinit : Union[RepinitArgs, dict]
+            The arguments used to initialize the repinit block, see docstr in `RepinitArgs` for details information.
+        repformer : Union[RepformerArgs, dict]
+            The arguments used to initialize the repformer block, see docstr in `RepformerArgs` for details information.
+        concat_output_tebd : bool, optional
+            Whether to concat type embedding at the output of the descriptor.
+        precision : str, optional
+            The precision of the embedding net parameters.
+        smooth : bool, optional
+            Whether to use smoothness in processes such as attention weights calculation.
+        exclude_types : list[list[int]], optional
+            The excluded pairs of types which have no interaction with each other.
+            For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+        env_protection : float, optional
+            Protection parameter to prevent division by zero errors during environment matrix calculations.
+            For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection.
+        trainable : bool, optional
+            If the parameters are trainable.
+        seed : int, optional
+            Random seed for parameter initialization.
+        add_tebd_to_repinit_out : bool, optional
+            Whether to add type embedding to the output representation from repinit before inputting it into repformer.
+        use_econf_tebd : bool, Optional
+            Whether to use electronic configuration type embedding.
+        use_tebd_bias : bool, Optional
+            Whether to use bias in the type embedding layer.
+        type_map : list[str], Optional
+            A list of strings. Give the name to each type of atoms.
+
+        Returns
+        -------
+        descriptor:         paddle.Tensor
+            the descriptor of shape nb x nloc x g1_dim.
+            invariant single-atom representation.
+        g2:                 paddle.Tensor
+            invariant pair-atom representation.
+        h2:                 paddle.Tensor
+            equivariant pair-atom representation.
+        rot_mat:            paddle.Tensor
+            rotation matrix for equivariant fittings
+        sw:                 paddle.Tensor
+            The switch function for decaying inverse distance.
+
+        References
+        ----------
+        .. [1] Zhang, D., Liu, X., Zhang, X. et al. DPA-2: a
+           large atomic model as a multi-task learner. npj
+           Comput Mater 10, 293 (2024). https://doi.org/10.1038/s41524-024-01493-2
+        """
+        super().__init__()
+
+        def init_subclass_params(sub_data, sub_class):
+            if isinstance(sub_data, dict):
+                return sub_class(**sub_data)
+            elif isinstance(sub_data, sub_class):
+                return sub_data
+            else:
+                raise ValueError(
+                    f"Input args must be a {sub_class.__name__} class or a dict!"
+                )
+
+        self.repinit_args = init_subclass_params(repinit, RepinitArgs)
+        self.repformer_args = init_subclass_params(repformer, RepformerArgs)
+        self.tebd_input_mode = self.repinit_args.tebd_input_mode
+
+        self.repinit = DescrptBlockSeAtten(
+            self.repinit_args.rcut,
+            self.repinit_args.rcut_smth,
+            self.repinit_args.nsel,
+            ntypes,
+            attn_layer=0,
+            neuron=self.repinit_args.neuron,
+            axis_neuron=self.repinit_args.axis_neuron,
+            tebd_dim=self.repinit_args.tebd_dim,
+            tebd_input_mode=self.repinit_args.tebd_input_mode,
+            set_davg_zero=self.repinit_args.set_davg_zero,
+            exclude_types=exclude_types,
+            env_protection=env_protection,
+            activation_function=self.repinit_args.activation_function,
+            precision=precision,
+            resnet_dt=self.repinit_args.resnet_dt,
+            smooth=smooth,
+            type_one_side=self.repinit_args.type_one_side,
+            seed=child_seed(seed, 0),
+        )
+        self.use_three_body = self.repinit_args.use_three_body
+        if self.use_three_body:
+            self.repinit_three_body = DescrptBlockSeTTebd(
+                self.repinit_args.three_body_rcut,
+                self.repinit_args.three_body_rcut_smth,
+                self.repinit_args.three_body_sel,
+                ntypes,
+                neuron=self.repinit_args.three_body_neuron,
+                tebd_dim=self.repinit_args.tebd_dim,
+                tebd_input_mode=self.repinit_args.tebd_input_mode,
+                set_davg_zero=self.repinit_args.set_davg_zero,
+                exclude_types=exclude_types,
+                env_protection=env_protection,
+                activation_function=self.repinit_args.activation_function,
+                precision=precision,
+                resnet_dt=self.repinit_args.resnet_dt,
+                smooth=smooth,
+                seed=child_seed(seed, 5),
+            )
+        else:
+            self.repinit_three_body = None
+        self.repformers = DescrptBlockRepformers(
+            self.repformer_args.rcut,
+            self.repformer_args.rcut_smth,
+            self.repformer_args.nsel,
+            ntypes,
+            nlayers=self.repformer_args.nlayers,
+            g1_dim=self.repformer_args.g1_dim,
+            g2_dim=self.repformer_args.g2_dim,
+            axis_neuron=self.repformer_args.axis_neuron,
+            direct_dist=self.repformer_args.direct_dist,
+            update_g1_has_conv=self.repformer_args.update_g1_has_conv,
+            update_g1_has_drrd=self.repformer_args.update_g1_has_drrd,
+            update_g1_has_grrg=self.repformer_args.update_g1_has_grrg,
+            update_g1_has_attn=self.repformer_args.update_g1_has_attn,
+            update_g2_has_g1g1=self.repformer_args.update_g2_has_g1g1,
+            update_g2_has_attn=self.repformer_args.update_g2_has_attn,
+            update_h2=self.repformer_args.update_h2,
+            attn1_hidden=self.repformer_args.attn1_hidden,
+            attn1_nhead=self.repformer_args.attn1_nhead,
+            attn2_hidden=self.repformer_args.attn2_hidden,
+            attn2_nhead=self.repformer_args.attn2_nhead,
+            attn2_has_gate=self.repformer_args.attn2_has_gate,
+            activation_function=self.repformer_args.activation_function,
+            update_style=self.repformer_args.update_style,
+            update_residual=self.repformer_args.update_residual,
+            update_residual_init=self.repformer_args.update_residual_init,
+            set_davg_zero=self.repformer_args.set_davg_zero,
+            smooth=smooth,
+            exclude_types=exclude_types,
+            env_protection=env_protection,
+            precision=precision,
+            trainable_ln=self.repformer_args.trainable_ln,
+            ln_eps=self.repformer_args.ln_eps,
+            use_sqrt_nnei=self.repformer_args.use_sqrt_nnei,
+            g1_out_conv=self.repformer_args.g1_out_conv,
+            g1_out_mlp=self.repformer_args.g1_out_mlp,
+            seed=child_seed(seed, 1),
+        )
+        self.rcsl_list = [
+            (self.repformers.get_rcut(), self.repformers.get_nsel()),
+            (self.repinit.get_rcut(), self.repinit.get_nsel()),
+        ]
+        if self.use_three_body:
+            self.rcsl_list.append(
+                (self.repinit_three_body.get_rcut(), self.repinit_three_body.get_nsel())
+            )
+        self.rcsl_list.sort()
+        for ii in range(1, len(self.rcsl_list)):
+            assert (
+                self.rcsl_list[ii - 1][1] <= self.rcsl_list[ii][1]
+            ), "rcut and sel are not in the same order"
+        self.rcut_list = [ii[0] for ii in self.rcsl_list]
+        self.nsel_list = [ii[1] for ii in self.rcsl_list]
+        self.use_econf_tebd = use_econf_tebd
+        self.use_tebd_bias = use_tebd_bias
+        self.type_map = type_map
+        self.type_embedding = TypeEmbedNet(
+            ntypes,
+            self.repinit_args.tebd_dim,
+            precision=precision,
+            seed=child_seed(seed, 2),
+            use_econf_tebd=self.use_econf_tebd,
+            use_tebd_bias=use_tebd_bias,
+            type_map=type_map,
+        )
+        self.concat_output_tebd = concat_output_tebd
+        self.precision = precision
+        self.prec = PRECISION_DICT[self.precision]
+        self.smooth = smooth
+        self.exclude_types = exclude_types
+        self.env_protection = env_protection
+        self.trainable = trainable
+        self.add_tebd_to_repinit_out = add_tebd_to_repinit_out
+
+        self.repinit_out_dim = self.repinit.dim_out
+        if self.repinit_args.use_three_body:
+            assert self.repinit_three_body is not None
+            self.repinit_out_dim += self.repinit_three_body.dim_out
+
+        if self.repinit_out_dim == self.repformers.dim_in:
+            self.g1_shape_tranform = Identity()
+        else:
+            self.g1_shape_tranform = MLPLayer(
+                self.repinit_out_dim,
+                self.repformers.dim_in,
+                bias=False,
+                precision=precision,
+                init="glorot",
+                seed=child_seed(seed, 3),
+            )
+        self.tebd_transform = None
+        if self.add_tebd_to_repinit_out:
+            self.tebd_transform = MLPLayer(
+                self.repinit_args.tebd_dim,
+                self.repformers.dim_in,
+                bias=False,
+                precision=precision,
+                seed=child_seed(seed, 4),
+            )
+        assert self.repinit.rcut > self.repformers.rcut
+        assert self.repinit.sel[0] > self.repformers.sel[0]
+
+        self.tebd_dim = self.repinit_args.tebd_dim
+        self.rcut = self.repinit.get_rcut()
+        self.rcut_smth = self.repinit.get_rcut_smth()
+        self.ntypes = ntypes
+        self.sel = self.repinit.sel
+        # set trainable
+        for param in self.parameters():
+            param.stop_gradient = not trainable
+        self.compress = False
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.rcut
+
+    def get_rcut_smth(self) -> float:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0."""
+        return self.rcut_smth
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return sum(self.sel)
+
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sel
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def get_type_map(self) -> list[str]:
+        """Get the name to each type of atoms."""
+        return self.type_map
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension of this descriptor."""
+        ret = self.repformers.dim_out
+        if self.concat_output_tebd:
+            ret += self.tebd_dim
+        return ret
+
+    def get_dim_emb(self) -> int:
+        """Returns the embedding dimension of this descriptor."""
+        return self.repformers.dim_emb
+
+    def mixed_types(self) -> bool:
+        """If true, the descriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the descriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return True
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor has message passing."""
+        return any(
+            [self.repinit.has_message_passing(), self.repformers.has_message_passing()]
+        )
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the descriptor needs sorted nlist when using `forward_lower`."""
+        return True
+
+    def get_env_protection(self) -> float:
+        """Returns the protection of building environment matrix."""
+        # the env_protection of repinit is the same as that of the repformer
+        return self.repinit.get_env_protection()
+
+    def share_params(self, base_class, shared_level, resume=False) -> None:
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        # For DPA2 descriptors, the user-defined share-level
+        # shared_level: 0
+        # share all parameters in type_embedding, repinit and repformers
+        if shared_level == 0:
+            self._sub_layers["type_embedding"] = base_class._sub_layers[
+                "type_embedding"
+            ]
+            self.repinit.share_params(base_class.repinit, 0, resume=resume)
+            if self.use_three_body:
+                self.repinit_three_body.share_params(
+                    base_class.repinit_three_body, 0, resume=resume
+                )
+            self._sub_layers["g1_shape_tranform"] = base_class._sub_layers[
+                "g1_shape_tranform"
+            ]
+            self.repformers.share_params(base_class.repformers, 0, resume=resume)
+        # shared_level: 1
+        # share all parameters in type_embedding
+        elif shared_level == 1:
+            self._modules["type_embedding"] = base_class._modules["type_embedding"]
+        # Other shared levels
+        else:
+            raise NotImplementedError
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        """Change the type related params to new ones, according to `type_map` and the original one in the model.
+        If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+        """
+        assert (
+            self.type_map is not None
+        ), "'type_map' must be defined when performing type changing!"
+        remap_index, has_new_type = get_index_between_two_maps(self.type_map, type_map)
+        self.type_map = type_map
+        self.type_embedding.change_type_map(type_map=type_map)
+        self.exclude_types = map_pair_exclude_types(self.exclude_types, remap_index)
+        self.ntypes = len(type_map)
+        repinit = self.repinit
+        repformers = self.repformers
+        repinit_three_body = self.repinit_three_body
+        if has_new_type:
+            # the avg and std of new types need to be updated
+            extend_descrpt_stat(
+                repinit,
+                type_map,
+                des_with_stat=model_with_new_type_stat.repinit
+                if model_with_new_type_stat is not None
+                else None,
+            )
+            extend_descrpt_stat(
+                repformers,
+                type_map,
+                des_with_stat=model_with_new_type_stat.repformers
+                if model_with_new_type_stat is not None
+                else None,
+            )
+            if self.use_three_body:
+                extend_descrpt_stat(
+                    repinit_three_body,
+                    type_map,
+                    des_with_stat=model_with_new_type_stat.repinit_three_body
+                    if model_with_new_type_stat is not None
+                    else None,
+                )
+        repinit.ntypes = self.ntypes
+        repformers.ntypes = self.ntypes
+        repinit.reinit_exclude(self.exclude_types)
+        repformers.reinit_exclude(self.exclude_types)
+        repinit["davg"] = repinit["davg"][remap_index]
+        repinit["dstd"] = repinit["dstd"][remap_index]
+        repformers["davg"] = repformers["davg"][remap_index]
+        repformers["dstd"] = repformers["dstd"][remap_index]
+        if self.use_three_body:
+            repinit_three_body.ntypes = self.ntypes
+            repinit_three_body.reinit_exclude(self.exclude_types)
+            repinit_three_body["davg"] = repinit_three_body["davg"][remap_index]
+            repinit_three_body["dstd"] = repinit_three_body["dstd"][remap_index]
+
+    @property
+    def dim_out(self):
+        return self.get_dim_out()
+
+    @property
+    def dim_emb(self):
+        """Returns the embedding dimension g2."""
+        return self.get_dim_emb()
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        path: Optional[DPPath] = None,
+    ) -> None:
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        descrpt_list = [self.repinit, self.repformers]
+        if self.use_three_body:
+            descrpt_list.append(self.repinit_three_body)
+        for ii, descrpt in enumerate(descrpt_list):
+            descrpt.compute_input_stats(merged, path)
+
+    def set_stat_mean_and_stddev(
+        self,
+        mean: list[paddle.Tensor],
+        stddev: list[paddle.Tensor],
+    ) -> None:
+        """Update mean and stddev for descriptor."""
+        descrpt_list = [self.repinit, self.repformers]
+        if self.use_three_body:
+            descrpt_list.append(self.repinit_three_body)
+        for ii, descrpt in enumerate(descrpt_list):
+            descrpt.mean = mean[ii]
+            descrpt.stddev = stddev[ii]
+
+    def get_stat_mean_and_stddev(
+        self,
+    ) -> tuple[list[paddle.Tensor], list[paddle.Tensor]]:
+        """Get mean and stddev for descriptor."""
+        mean_list = [self.repinit.mean, self.repformers.mean]
+        stddev_list = [
+            self.repinit.stddev,
+            self.repformers.stddev,
+        ]
+        if self.use_three_body:
+            mean_list.append(self.repinit_three_body.mean)
+            stddev_list.append(self.repinit_three_body.stddev)
+        return mean_list, stddev_list
+
+    def serialize(self) -> dict:
+        repinit = self.repinit
+        repformers = self.repformers
+        repinit_three_body = self.repinit_three_body
+        data = {
+            "@class": "Descriptor",
+            "type": "dpa2",
+            "@version": 3,
+            "ntypes": self.ntypes,
+            "repinit_args": self.repinit_args.serialize(),
+            "repformer_args": self.repformer_args.serialize(),
+            "concat_output_tebd": self.concat_output_tebd,
+            "precision": self.precision,
+            "smooth": self.smooth,
+            "exclude_types": self.exclude_types,
+            "env_protection": self.env_protection,
+            "trainable": self.trainable,
+            "add_tebd_to_repinit_out": self.add_tebd_to_repinit_out,
+            "use_econf_tebd": self.use_econf_tebd,
+            "use_tebd_bias": self.use_tebd_bias,
+            "type_map": self.type_map,
+            "type_embedding": self.type_embedding.embedding.serialize(),
+            "g1_shape_tranform": self.g1_shape_tranform.serialize(),
+        }
+        if self.add_tebd_to_repinit_out:
+            data.update(
+                {
+                    "tebd_transform": self.tebd_transform.serialize(),
+                }
+            )
+        repinit_variable = {
+            "embeddings": repinit.filter_layers.serialize(),
+            "env_mat": DPEnvMat(repinit.rcut, repinit.rcut_smth).serialize(),
+            "@variables": {
+                "davg": to_numpy_array(repinit["davg"]),
+                "dstd": to_numpy_array(repinit["dstd"]),
+            },
+        }
+        if repinit.tebd_input_mode in ["strip"]:
+            repinit_variable.update(
+                {"embeddings_strip": repinit.filter_layers_strip.serialize()}
+            )
+        repformers_variable = {
+            "g2_embd": repformers.g2_embd.serialize(),
+            "repformer_layers": [layer.serialize() for layer in repformers.layers],
+            "env_mat": DPEnvMat(repformers.rcut, repformers.rcut_smth).serialize(),
+            "@variables": {
+                "davg": to_numpy_array(repformers["davg"]),
+                "dstd": to_numpy_array(repformers["dstd"]),
+            },
+        }
+        data.update(
+            {
+                "repinit_variable": repinit_variable,
+                "repformers_variable": repformers_variable,
+            }
+        )
+        if self.use_three_body:
+            repinit_three_body_variable = {
+                "embeddings": repinit_three_body.filter_layers.serialize(),
+                "env_mat": DPEnvMat(
+                    repinit_three_body.rcut, repinit_three_body.rcut_smth
+                ).serialize(),
+                "@variables": {
+                    "davg": to_numpy_array(repinit_three_body["davg"]),
+                    "dstd": to_numpy_array(repinit_three_body["dstd"]),
+                },
+            }
+            if repinit_three_body.tebd_input_mode in ["strip"]:
+                repinit_three_body_variable.update(
+                    {
+                        "embeddings_strip": repinit_three_body.filter_layers_strip.serialize()
+                    }
+                )
+            data.update(
+                {
+                    "repinit_three_body_variable": repinit_three_body_variable,
+                }
+            )
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DescrptDPA2":
+        data = data.copy()
+        version = data.pop("@version")
+        check_version_compatibility(version, 3, 1)
+        data.pop("@class")
+        data.pop("type")
+        repinit_variable = data.pop("repinit_variable").copy()
+        repformers_variable = data.pop("repformers_variable").copy()
+        repinit_three_body_variable = (
+            data.pop("repinit_three_body_variable").copy()
+            if "repinit_three_body_variable" in data
+            else None
+        )
+        type_embedding = data.pop("type_embedding")
+        g1_shape_tranform = data.pop("g1_shape_tranform")
+        tebd_transform = data.pop("tebd_transform", None)
+        add_tebd_to_repinit_out = data["add_tebd_to_repinit_out"]
+        if version < 3:
+            # compat with old version
+            data["repformer_args"]["use_sqrt_nnei"] = False
+            data["repformer_args"]["g1_out_conv"] = False
+            data["repformer_args"]["g1_out_mlp"] = False
+        data["repinit"] = RepinitArgs(**data.pop("repinit_args"))
+        data["repformer"] = RepformerArgs(**data.pop("repformer_args"))
+        # compat with version 1
+        if "use_tebd_bias" not in data:
+            data["use_tebd_bias"] = True
+        obj = cls(**data)
+        obj.type_embedding.embedding = TypeEmbedNetConsistent.deserialize(
+            type_embedding
+        )
+        if add_tebd_to_repinit_out:
+            assert isinstance(tebd_transform, dict)
+            obj.tebd_transform = MLPLayer.deserialize(tebd_transform)
+        if obj.repinit.dim_out != obj.repformers.dim_in:
+            obj.g1_shape_tranform = MLPLayer.deserialize(g1_shape_tranform)
+
+        def t_cvt(xx):
+            return paddle.to_tensor(xx, dtype=obj.repinit.prec, place=env.DEVICE)
+
+        # deserialize repinit
+        statistic_repinit = repinit_variable.pop("@variables")
+        env_mat = repinit_variable.pop("env_mat")
+        tebd_input_mode = data["repinit"].tebd_input_mode
+        obj.repinit.filter_layers = NetworkCollection.deserialize(
+            repinit_variable.pop("embeddings")
+        )
+        if tebd_input_mode in ["strip"]:
+            obj.repinit.filter_layers_strip = NetworkCollection.deserialize(
+                repinit_variable.pop("embeddings_strip")
+            )
+        obj.repinit["davg"] = t_cvt(statistic_repinit["davg"])
+        obj.repinit["dstd"] = t_cvt(statistic_repinit["dstd"])
+
+        if data["repinit"].use_three_body:
+            # deserialize repinit_three_body
+            statistic_repinit_three_body = repinit_three_body_variable.pop("@variables")
+            env_mat = repinit_three_body_variable.pop("env_mat")
+            tebd_input_mode = data["repinit"].tebd_input_mode
+            obj.repinit_three_body.filter_layers = NetworkCollection.deserialize(
+                repinit_three_body_variable.pop("embeddings")
+            )
+            if tebd_input_mode in ["strip"]:
+                obj.repinit_three_body.filter_layers_strip = (
+                    NetworkCollection.deserialize(
+                        repinit_three_body_variable.pop("embeddings_strip")
+                    )
+                )
+            obj.repinit_three_body["davg"] = t_cvt(statistic_repinit_three_body["davg"])
+            obj.repinit_three_body["dstd"] = t_cvt(statistic_repinit_three_body["dstd"])
+
+        # deserialize repformers
+        statistic_repformers = repformers_variable.pop("@variables")
+        env_mat = repformers_variable.pop("env_mat")
+        repformer_layers = repformers_variable.pop("repformer_layers")
+        obj.repformers.g2_embd = MLPLayer.deserialize(
+            repformers_variable.pop("g2_embd")
+        )
+        obj.repformers["davg"] = t_cvt(statistic_repformers["davg"])
+        obj.repformers["dstd"] = t_cvt(statistic_repformers["dstd"])
+        obj.repformers.layers = paddle.nn.LayerList(
+            [RepformerLayer.deserialize(layer) for layer in repformer_layers]
+        )
+        return obj
+
+    def forward(
+        self,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        nlist: paddle.Tensor,
+        mapping: Optional[paddle.Tensor] = None,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        extended_coord
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        extended_atype
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping, mapps extended region index to local region.
+        comm_dict
+            The data needed for communication for parallel inference.
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            shape: nf x nloc x nnei x ng
+        h2
+            The rotationally equivariant pair-partical representation.
+            shape: nf x nloc x nnei x 3
+        sw
+            The smooth switch function. shape: nf x nloc x nnei
+
+        """
+        # cast the input to internal precsion
+        extended_coord = extended_coord.to(dtype=self.prec)
+
+        use_three_body = self.use_three_body
+        nframes, nloc, nnei = nlist.shape
+        nall = extended_coord.reshape([nframes, -1]).shape[1] // 3
+        # nlists
+        nlist_dict = build_multiple_neighbor_list(
+            extended_coord.detach(),
+            nlist,
+            self.rcut_list,
+            self.nsel_list,
+        )
+        # repinit
+        g1_ext = self.type_embedding(extended_atype)
+        g1_inp = g1_ext[:, :nloc, :]
+        if self.tebd_input_mode in ["strip"]:
+            type_embedding = self.type_embedding.get_full_embedding(g1_ext.place)
+        else:
+            type_embedding = None
+        g1, _, _, _, _ = self.repinit(
+            nlist_dict[
+                get_multiple_nlist_key(self.repinit.get_rcut(), self.repinit.get_nsel())
+            ],
+            extended_coord,
+            extended_atype,
+            g1_ext,
+            mapping,
+            type_embedding,
+        )
+        if use_three_body:
+            assert self.repinit_three_body is not None
+            g1_three_body, __, __, __, __ = self.repinit_three_body(
+                nlist_dict[
+                    get_multiple_nlist_key(
+                        self.repinit_three_body.get_rcut(),
+                        self.repinit_three_body.get_nsel(),
+                    )
+                ],
+                extended_coord,
+                extended_atype,
+                g1_ext,
+                mapping,
+                type_embedding,
+            )
+            g1 = paddle.concat([g1, g1_three_body], axis=-1)
+        # linear to change shape
+        g1 = self.g1_shape_tranform(g1)
+        if self.add_tebd_to_repinit_out:
+            assert self.tebd_transform is not None
+            g1 = g1 + self.tebd_transform(g1_inp)
+        # mapping g1
+        if comm_dict is None:
+            assert mapping is not None
+            mapping_ext = (
+                mapping.reshape([nframes, nall])
+                .unsqueeze(-1)
+                .expand([-1, -1, g1.shape[-1]])
+            )
+            g1_ext = paddle.take_along_axis(g1, mapping_ext, 1)
+            g1 = g1_ext
+        # repformer
+        g1, g2, h2, rot_mat, sw = self.repformers(
+            nlist_dict[
+                get_multiple_nlist_key(
+                    self.repformers.get_rcut(), self.repformers.get_nsel()
+                )
+            ],
+            extended_coord,
+            extended_atype,
+            g1,
+            mapping,
+            comm_dict=comm_dict,
+        )
+        if self.concat_output_tebd:
+            g1 = paddle.concat([g1, g1_inp], axis=-1)
+        return (
+            g1.to(dtype=env.GLOBAL_PD_FLOAT_PRECISION),
+            rot_mat.to(dtype=env.GLOBAL_PD_FLOAT_PRECISION),
+            g2.to(dtype=env.GLOBAL_PD_FLOAT_PRECISION),
+            h2.to(dtype=env.GLOBAL_PD_FLOAT_PRECISION),
+            sw.to(dtype=env.GLOBAL_PD_FLOAT_PRECISION),
+        )
+
+    @classmethod
+    def update_sel(
+        cls,
+        train_data: DeepmdDataSystem,
+        type_map: Optional[list[str]],
+        local_jdata: dict,
+    ) -> tuple[dict, Optional[float]]:
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        train_data : DeepmdDataSystem
+            data used to do neighbor statistics
+        type_map : list[str], optional
+            The name of each type of atoms
+        local_jdata : dict
+            The local data refer to the current class
+
+        Returns
+        -------
+        dict
+            The updated local data
+        float
+            The minimum distance between two atoms
+        """
+        local_jdata_cpy = local_jdata.copy()
+        update_sel = UpdateSel()
+        min_nbor_dist, repinit_sel = update_sel.update_one_sel(
+            train_data,
+            type_map,
+            local_jdata_cpy["repinit"]["rcut"],
+            local_jdata_cpy["repinit"]["nsel"],
+            True,
+        )
+        local_jdata_cpy["repinit"]["nsel"] = repinit_sel[0]
+        min_nbor_dist, repinit_three_body_sel = update_sel.update_one_sel(
+            train_data,
+            type_map,
+            local_jdata_cpy["repinit"]["three_body_rcut"],
+            local_jdata_cpy["repinit"]["three_body_sel"],
+            True,
+        )
+        local_jdata_cpy["repinit"]["three_body_sel"] = repinit_three_body_sel[0]
+        min_nbor_dist, repformer_sel = update_sel.update_one_sel(
+            train_data,
+            type_map,
+            local_jdata_cpy["repformer"]["rcut"],
+            local_jdata_cpy["repformer"]["nsel"],
+            True,
+        )
+        local_jdata_cpy["repformer"]["nsel"] = repformer_sel[0]
+        return local_jdata_cpy, min_nbor_dist
+
+    def enable_compression(
+        self,
+        min_nbor_dist: float,
+        table_extrapolate: float = 5,
+        table_stride_1: float = 0.01,
+        table_stride_2: float = 0.1,
+        check_frequency: int = -1,
+    ) -> None:
+        """Receive the statistics (distance, max_nbor_size and env_mat_range) of the training data.
+
+        Parameters
+        ----------
+        min_nbor_dist
+            The nearest distance between atoms
+        table_extrapolate
+            The scale of model extrapolation
+        table_stride_1
+            The uniform stride of the first table
+        table_stride_2
+            The uniform stride of the second table
+        check_frequency
+            The overflow check frequency
+        """
+        # do some checks before the mocel compression process
+        raise NotImplementedError("enable_compression is not implemented yet")
diff --git a/deepmd/pd/model/descriptor/repformer_layer.py b/deepmd/pd/model/descriptor/repformer_layer.py
new file mode 100644
index 0000000000..a09c5cbe17
--- /dev/null
+++ b/deepmd/pd/model/descriptor/repformer_layer.py
@@ -0,0 +1,1484 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Optional,
+    Union,
+)
+
+import paddle
+import paddle.nn as nn
+
+from deepmd.dpmodel.utils.seed import (
+    child_seed,
+)
+from deepmd.pd.model.network.init import (
+    constant_,
+    normal_,
+)
+from deepmd.pd.model.network.layernorm import (
+    LayerNorm,
+)
+from deepmd.pd.model.network.mlp import (
+    MLPLayer,
+)
+from deepmd.pd.utils import (
+    decomp,
+    env,
+)
+from deepmd.pd.utils.env import (
+    PRECISION_DICT,
+)
+from deepmd.pd.utils.utils import (
+    ActivationFn,
+    get_generator,
+    to_numpy_array,
+    to_paddle_tensor,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+
+def get_residual(
+    _dim: int,
+    _scale: float,
+    _mode: str = "norm",
+    trainable: bool = True,
+    precision: str = "float64",
+    seed: Optional[Union[int, list[int]]] = None,
+) -> paddle.Tensor:
+    r"""
+    Get residual tensor for one update vector.
+
+    Parameters
+    ----------
+    _dim : int
+        The dimension of the update vector.
+    _scale
+        The initial scale of the residual tensor. See `_mode` for details.
+    _mode
+        The mode of residual initialization for the residual tensor.
+        - "norm" (default): init residual using normal with `_scale` std.
+        - "const": init residual using element-wise constants of `_scale`.
+    trainable
+        Whether the residual tensor is trainable.
+    precision
+        The precision of the residual tensor.
+    seed : int, optional
+        Random seed for parameter initialization.
+    """
+    random_generator = get_generator(seed)
+    residual = paddle.create_parameter(
+        [_dim],
+        dtype=PRECISION_DICT[precision],
+        default_initializer=nn.initializer.Constant(0),
+    ).to(device=env.DEVICE)
+    residual.stop_gradient = not trainable
+    if _mode == "norm":
+        normal_(residual.data, std=_scale, generator=random_generator)
+    elif _mode == "const":
+        constant_(residual.data, val=_scale)
+    else:
+        raise RuntimeError(f"Unsupported initialization mode '{_mode}'!")
+    return residual
+
+
+# common ops
+def _make_nei_g1(
+    g1_ext: paddle.Tensor,
+    nlist: paddle.Tensor,
+) -> paddle.Tensor:
+    """
+    Make neighbor-wise atomic invariant rep.
+
+    Parameters
+    ----------
+    g1_ext
+        Extended atomic invariant rep, with shape nb x nall x ng1.
+    nlist
+        Neighbor list, with shape nb x nloc x nnei.
+
+    Returns
+    -------
+    gg1: paddle.Tensor
+        Neighbor-wise atomic invariant rep, with shape nb x nloc x nnei x ng1.
+
+    """
+    # nlist: nb x nloc x nnei
+    nb, nloc, nnei = nlist.shape
+    # g1_ext: nb x nall x ng1
+    ng1 = g1_ext.shape[-1]
+    # index: nb x (nloc x nnei) x ng1
+    index = nlist.reshape([nb, nloc * nnei]).unsqueeze(-1).expand([-1, -1, ng1])
+    # gg1  : nb x (nloc x nnei) x ng1
+    gg1 = paddle.take_along_axis(g1_ext, axis=1, indices=index)
+    # gg1  : nb x nloc x nnei x ng1
+    gg1 = gg1.reshape([nb, nloc, nnei, ng1])
+    return gg1
+
+
+def _apply_nlist_mask(
+    gg: paddle.Tensor,
+    nlist_mask: paddle.Tensor,
+) -> paddle.Tensor:
+    """
+    Apply nlist mask to neighbor-wise rep tensors.
+
+    Parameters
+    ----------
+    gg
+        Neighbor-wise rep tensors, with shape nf x nloc x nnei x d.
+    nlist_mask
+        Neighbor list mask, where zero means no neighbor, with shape nf x nloc x nnei.
+    """
+    # gg:  nf x nloc x nnei x d
+    # msk: nf x nloc x nnei
+    return gg.masked_fill(~nlist_mask.unsqueeze(-1), 0.0)
+
+
+def _apply_switch(gg: paddle.Tensor, sw: paddle.Tensor) -> paddle.Tensor:
+    """
+    Apply switch function to neighbor-wise rep tensors.
+
+    Parameters
+    ----------
+    gg
+        Neighbor-wise rep tensors, with shape nf x nloc x nnei x d.
+    sw
+        The switch function, which equals 1 within the rcut_smth range, smoothly decays from 1 to 0 between rcut_smth and rcut,
+        and remains 0 beyond rcut, with shape nf x nloc x nnei.
+    """
+    # gg:  nf x nloc x nnei x d
+    # sw:  nf x nloc x nnei
+    return gg * sw.unsqueeze(-1)
+
+
+class Atten2Map(paddle.nn.Layer):
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        head_num: int,
+        has_gate: bool = False,  # apply gate to attn map
+        smooth: bool = True,
+        attnw_shift: float = 20.0,
+        precision: str = "float64",
+        seed: Optional[Union[int, list[int]]] = None,
+    ):
+        """Return neighbor-wise multi-head self-attention maps, with gate mechanism."""
+        super().__init__()
+        self.input_dim = input_dim
+        self.hidden_dim = hidden_dim
+        self.head_num = head_num
+        self.mapqk = MLPLayer(
+            input_dim,
+            hidden_dim * 2 * head_num,
+            bias=False,
+            precision=precision,
+            seed=seed,
+        )
+        self.has_gate = has_gate
+        self.smooth = smooth
+        self.attnw_shift = attnw_shift
+        self.precision = precision
+
+    def forward(
+        self,
+        g2: paddle.Tensor,  # nb x nloc x nnei x ng2
+        h2: paddle.Tensor,  # nb x nloc x nnei x 3
+        nlist_mask: paddle.Tensor,  # nb x nloc x nnei
+        sw: paddle.Tensor,  # nb x nloc x nnei
+    ) -> paddle.Tensor:
+        (
+            nb,
+            nloc,
+            nnei,
+            _,
+        ) = g2.shape
+        nd, nh = self.hidden_dim, self.head_num
+        # nb x nloc x nnei x nd x (nh x 2)
+        g2qk = self.mapqk(g2).reshape([nb, nloc, nnei, nd, nh * 2])
+        # nb x nloc x (nh x 2) x nnei x nd
+        g2qk = paddle.transpose(g2qk, (0, 1, 4, 2, 3))
+        # nb x nloc x nh x nnei x nd
+        g2q, g2k = paddle.split(g2qk, decomp.sec(g2qk.shape[2], nh), axis=2)
+        # g2q = paddle.nn.functional.normalize(g2q, axis=-1)
+        # g2k = paddle.nn.functional.normalize(g2k, axis=-1)
+        # nb x nloc x nh x nnei x nnei
+        attnw = paddle.matmul(g2q, paddle.transpose(g2k, [0, 1, 2, 4, 3])) / nd**0.5
+        if self.has_gate:
+            gate = paddle.matmul(h2, paddle.transpose(h2, [0, 1, 3, 2])).unsqueeze(-3)
+            attnw = attnw * gate
+        # mask the attenmap, nb x nloc x 1 x 1 x nnei
+        attnw_mask = ~nlist_mask.unsqueeze(2).unsqueeze(2)
+        # mask the attenmap, nb x nloc x 1 x nnei x 1
+        attnw_mask_c = ~nlist_mask.unsqueeze(2).unsqueeze(-1)
+        if self.smooth:
+            attnw = (attnw + self.attnw_shift) * sw[:, :, None, :, None] * sw[
+                :, :, None, None, :
+            ] - self.attnw_shift
+        else:
+            attnw = attnw.masked_fill(
+                attnw_mask,
+                float("-inf"),
+            )
+        attnw = paddle.nn.functional.softmax(attnw, axis=-1)
+        attnw = attnw.masked_fill(
+            attnw_mask,
+            0.0,
+        )
+        # nb x nloc x nh x nnei x nnei
+        attnw = attnw.masked_fill(
+            attnw_mask_c,
+            0.0,
+        )
+        if self.smooth:
+            attnw = attnw * sw[:, :, None, :, None] * sw[:, :, None, None, :]
+        # nb x nloc x nnei x nnei
+        h2h2t = paddle.matmul(h2, paddle.transpose(h2, [0, 1, 3, 2])) / 3.0**0.5
+        # nb x nloc x nh x nnei x nnei
+        ret = attnw * h2h2t[:, :, None, :, :]
+        # ret = paddle.nn.functional.softmax(g2qk, axis=-1)
+        # nb x nloc x nnei x nnei x nh
+        ret = paddle.transpose(ret, (0, 1, 3, 4, 2))
+        return ret
+
+    def serialize(self) -> dict:
+        """Serialize the networks to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized networks.
+        """
+        return {
+            "@class": "Atten2Map",
+            "@version": 1,
+            "input_dim": self.input_dim,
+            "hidden_dim": self.hidden_dim,
+            "head_num": self.head_num,
+            "has_gate": self.has_gate,
+            "smooth": self.smooth,
+            "attnw_shift": self.attnw_shift,
+            "precision": self.precision,
+            "mapqk": self.mapqk.serialize(),
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "Atten2Map":
+        """Deserialize the networks from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version"), 1, 1)
+        data.pop("@class")
+        mapqk = data.pop("mapqk")
+        obj = cls(**data)
+        obj.mapqk = MLPLayer.deserialize(mapqk)
+        return obj
+
+
+class Atten2MultiHeadApply(paddle.nn.Layer):
+    def __init__(
+        self,
+        input_dim: int,
+        head_num: int,
+        precision: str = "float64",
+        seed: Optional[Union[int, list[int]]] = None,
+    ) -> None:
+        super().__init__()
+        self.input_dim = input_dim
+        self.head_num = head_num
+        self.mapv = MLPLayer(
+            input_dim,
+            input_dim * head_num,
+            bias=False,
+            precision=precision,
+            seed=child_seed(seed, 0),
+        )
+        self.head_map = MLPLayer(
+            input_dim * head_num,
+            input_dim,
+            precision=precision,
+            seed=child_seed(seed, 1),
+        )
+        self.precision = precision
+
+    def forward(
+        self,
+        AA: paddle.Tensor,  # nf x nloc x nnei x nnei x nh
+        g2: paddle.Tensor,  # nf x nloc x nnei x ng2
+    ) -> paddle.Tensor:
+        nf, nloc, nnei, ng2 = g2.shape
+        nh = self.head_num
+        # nf x nloc x nnei x ng2 x nh
+        g2v = self.mapv(g2).reshape([nf, nloc, nnei, ng2, nh])
+        # nf x nloc x nh x nnei x ng2
+        g2v = paddle.transpose(g2v, (0, 1, 4, 2, 3))
+        # g2v = paddle.nn.functional.normalize(g2v, axis=-1)
+        # nf x nloc x nh x nnei x nnei
+        AA = paddle.transpose(AA, (0, 1, 4, 2, 3))
+        # nf x nloc x nh x nnei x ng2
+        ret = paddle.matmul(AA, g2v)
+        # nf x nloc x nnei x ng2 x nh
+        ret = paddle.transpose(ret, (0, 1, 3, 4, 2)).reshape(
+            [nf, nloc, nnei, (ng2 * nh)]
+        )
+        # nf x nloc x nnei x ng2
+        return self.head_map(ret)
+
+    def serialize(self) -> dict:
+        """Serialize the networks to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized networks.
+        """
+        return {
+            "@class": "Atten2MultiHeadApply",
+            "@version": 1,
+            "input_dim": self.input_dim,
+            "head_num": self.head_num,
+            "precision": self.precision,
+            "mapv": self.mapv.serialize(),
+            "head_map": self.head_map.serialize(),
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "Atten2MultiHeadApply":
+        """Deserialize the networks from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version"), 1, 1)
+        data.pop("@class")
+        mapv = data.pop("mapv")
+        head_map = data.pop("head_map")
+        obj = cls(**data)
+        obj.mapv = MLPLayer.deserialize(mapv)
+        obj.head_map = MLPLayer.deserialize(head_map)
+        return obj
+
+
+class Atten2EquiVarApply(paddle.nn.Layer):
+    def __init__(
+        self,
+        input_dim: int,
+        head_num: int,
+        precision: str = "float64",
+        seed: Optional[Union[int, list[int]]] = None,
+    ) -> None:
+        super().__init__()
+        self.input_dim = input_dim
+        self.head_num = head_num
+        self.head_map = MLPLayer(
+            head_num, 1, bias=False, precision=precision, seed=seed
+        )
+        self.precision = precision
+
+    def forward(
+        self,
+        AA: paddle.Tensor,  # nf x nloc x nnei x nnei x nh
+        h2: paddle.Tensor,  # nf x nloc x nnei x 3
+    ) -> paddle.Tensor:
+        nf, nloc, nnei, _ = h2.shape
+        nh = self.head_num
+        # nf x nloc x nh x nnei x nnei
+        AA = paddle.transpose(AA, (0, 1, 4, 2, 3))
+        h2m = paddle.unsqueeze(h2, axis=2)
+        # nf x nloc x nh x nnei x 3
+        h2m = paddle.tile(h2m, [1, 1, nh, 1, 1])
+        # nf x nloc x nh x nnei x 3
+        ret = paddle.matmul(AA, h2m)
+        # nf x nloc x nnei x 3 x nh
+        ret = paddle.transpose(ret, (0, 1, 3, 4, 2)).reshape([nf, nloc, nnei, 3, nh])
+        # nf x nloc x nnei x 3
+        return paddle.squeeze(self.head_map(ret), axis=-1)
+
+    def serialize(self) -> dict:
+        """Serialize the networks to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized networks.
+        """
+        return {
+            "@class": "Atten2EquiVarApply",
+            "@version": 1,
+            "input_dim": self.input_dim,
+            "head_num": self.head_num,
+            "precision": self.precision,
+            "head_map": self.head_map.serialize(),
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "Atten2EquiVarApply":
+        """Deserialize the networks from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version"), 1, 1)
+        data.pop("@class")
+        head_map = data.pop("head_map")
+        obj = cls(**data)
+        obj.head_map = MLPLayer.deserialize(head_map)
+        return obj
+
+
+class LocalAtten(paddle.nn.Layer):
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        head_num: int,
+        smooth: bool = True,
+        attnw_shift: float = 20.0,
+        precision: str = "float64",
+        seed: Optional[Union[int, list[int]]] = None,
+    ) -> None:
+        super().__init__()
+        self.input_dim = input_dim
+        self.hidden_dim = hidden_dim
+        self.head_num = head_num
+        self.mapq = MLPLayer(
+            input_dim,
+            hidden_dim * 1 * head_num,
+            bias=False,
+            precision=precision,
+            seed=child_seed(seed, 0),
+        )
+        self.mapkv = MLPLayer(
+            input_dim,
+            (hidden_dim + input_dim) * head_num,
+            bias=False,
+            precision=precision,
+            seed=child_seed(seed, 1),
+        )
+        self.head_map = MLPLayer(
+            input_dim * head_num,
+            input_dim,
+            precision=precision,
+            seed=child_seed(seed, 2),
+        )
+        self.smooth = smooth
+        self.attnw_shift = attnw_shift
+        self.precision = precision
+
+    def forward(
+        self,
+        g1: paddle.Tensor,  # nb x nloc x ng1
+        gg1: paddle.Tensor,  # nb x nloc x nnei x ng1
+        nlist_mask: paddle.Tensor,  # nb x nloc x nnei
+        sw: paddle.Tensor,  # nb x nloc x nnei
+    ) -> paddle.Tensor:
+        nb, nloc, nnei = nlist_mask.shape
+        ni, nd, nh = self.input_dim, self.hidden_dim, self.head_num
+        assert ni == g1.shape[-1]
+        assert ni == gg1.shape[-1]
+        # nb x nloc x nd x nh
+        g1q = self.mapq(g1).reshape([nb, nloc, nd, nh])
+        # nb x nloc x nh x nd
+        g1q = paddle.transpose(g1q, (0, 1, 3, 2))
+        # nb x nloc x nnei x (nd+ni) x nh
+        gg1kv = self.mapkv(gg1).reshape([nb, nloc, nnei, nd + ni, nh])
+        gg1kv = paddle.transpose(gg1kv, (0, 1, 4, 2, 3))
+        # nb x nloc x nh x nnei x nd, nb x nloc x nh x nnei x ng1
+        gg1k, gg1v = paddle.split(gg1kv, [nd, ni], axis=-1)
+
+        # nb x nloc x nh x 1 x nnei
+        attnw = (
+            paddle.matmul(g1q.unsqueeze(-2), paddle.transpose(gg1k, [0, 1, 2, 4, 3]))
+            / nd**0.5
+        )
+        # nb x nloc x nh x nnei
+        attnw = attnw.squeeze(-2)
+        # mask the attenmap, nb x nloc x 1 x nnei
+        attnw_mask = ~nlist_mask.unsqueeze(-2)
+        # nb x nloc x nh x nnei
+        if self.smooth:
+            attnw = (attnw + self.attnw_shift) * sw.unsqueeze(-2) - self.attnw_shift
+        else:
+            attnw = attnw.masked_fill(
+                attnw_mask,
+                float("-inf"),
+            )
+        attnw = paddle.nn.functional.softmax(attnw, axis=-1)
+        attnw = attnw.masked_fill(
+            attnw_mask,
+            0.0,
+        )
+        if self.smooth:
+            attnw = attnw * sw.unsqueeze(-2)
+
+        # nb x nloc x nh x ng1
+        ret = (
+            paddle.matmul(attnw.unsqueeze(-2), gg1v)
+            .squeeze(-2)
+            .reshape([nb, nloc, nh * ni])
+        )
+        # nb x nloc x ng1
+        ret = self.head_map(ret)
+        return ret
+
+    def serialize(self) -> dict:
+        """Serialize the networks to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized networks.
+        """
+        return {
+            "@class": "LocalAtten",
+            "@version": 1,
+            "input_dim": self.input_dim,
+            "hidden_dim": self.hidden_dim,
+            "head_num": self.head_num,
+            "smooth": self.smooth,
+            "attnw_shift": self.attnw_shift,
+            "precision": self.precision,
+            "mapq": self.mapq.serialize(),
+            "mapkv": self.mapkv.serialize(),
+            "head_map": self.head_map.serialize(),
+        }
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "LocalAtten":
+        """Deserialize the networks from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version"), 1, 1)
+        data.pop("@class")
+        mapq = data.pop("mapq")
+        mapkv = data.pop("mapkv")
+        head_map = data.pop("head_map")
+        obj = cls(**data)
+        obj.mapq = MLPLayer.deserialize(mapq)
+        obj.mapkv = MLPLayer.deserialize(mapkv)
+        obj.head_map = MLPLayer.deserialize(head_map)
+        return obj
+
+
+class RepformerLayer(paddle.nn.Layer):
+    def __init__(
+        self,
+        rcut,
+        rcut_smth,
+        sel: int,
+        ntypes: int,
+        g1_dim=128,
+        g2_dim=16,
+        axis_neuron: int = 4,
+        update_chnnl_2: bool = True,
+        update_g1_has_conv: bool = True,
+        update_g1_has_drrd: bool = True,
+        update_g1_has_grrg: bool = True,
+        update_g1_has_attn: bool = True,
+        update_g2_has_g1g1: bool = True,
+        update_g2_has_attn: bool = True,
+        update_h2: bool = False,
+        attn1_hidden: int = 64,
+        attn1_nhead: int = 4,
+        attn2_hidden: int = 16,
+        attn2_nhead: int = 4,
+        attn2_has_gate: bool = False,
+        activation_function: str = "tanh",
+        update_style: str = "res_avg",
+        update_residual: float = 0.001,
+        update_residual_init: str = "norm",
+        smooth: bool = True,
+        precision: str = "float64",
+        trainable_ln: bool = True,
+        ln_eps: Optional[float] = 1e-5,
+        use_sqrt_nnei: bool = True,
+        g1_out_conv: bool = True,
+        g1_out_mlp: bool = True,
+        seed: Optional[Union[int, list[int]]] = None,
+    ) -> None:
+        super().__init__()
+        self.epsilon = 1e-4  # protection of 1./nnei
+        self.rcut = float(rcut)
+        self.rcut_smth = float(rcut_smth)
+        self.ntypes = ntypes
+        sel = [sel] if isinstance(sel, int) else sel
+        self.nnei = sum(sel)
+        assert len(sel) == 1
+        self.sel = sel
+        self.sec = self.sel
+        self.axis_neuron = axis_neuron
+        self.activation_function = activation_function
+        self.act = ActivationFn(activation_function)
+        self.update_g1_has_grrg = update_g1_has_grrg
+        self.update_g1_has_drrd = update_g1_has_drrd
+        self.update_g1_has_conv = update_g1_has_conv
+        self.update_g1_has_attn = update_g1_has_attn
+        self.update_chnnl_2 = update_chnnl_2
+        self.update_g2_has_g1g1 = update_g2_has_g1g1 if self.update_chnnl_2 else False
+        self.update_g2_has_attn = update_g2_has_attn if self.update_chnnl_2 else False
+        self.update_h2 = update_h2 if self.update_chnnl_2 else False
+        del update_g2_has_g1g1, update_g2_has_attn, update_h2
+        self.attn1_hidden = attn1_hidden
+        self.attn1_nhead = attn1_nhead
+        self.attn2_hidden = attn2_hidden
+        self.attn2_nhead = attn2_nhead
+        self.attn2_has_gate = attn2_has_gate
+        self.update_style = update_style
+        self.update_residual = update_residual
+        self.update_residual_init = update_residual_init
+        self.smooth = smooth
+        self.g1_dim = g1_dim
+        self.g2_dim = g2_dim
+        self.trainable_ln = trainable_ln
+        self.ln_eps = ln_eps
+        self.precision = precision
+        self.seed = seed
+        self.use_sqrt_nnei = use_sqrt_nnei
+        self.g1_out_conv = g1_out_conv
+        self.g1_out_mlp = g1_out_mlp
+
+        assert update_residual_init in [
+            "norm",
+            "const",
+        ], "'update_residual_init' only support 'norm' or 'const'!"
+        self.update_residual = update_residual
+        self.update_residual_init = update_residual_init
+        self.g1_residual = []
+        self.g2_residual = []
+        self.h2_residual = []
+
+        if self.update_style == "res_residual":
+            self.g1_residual.append(
+                get_residual(
+                    g1_dim,
+                    self.update_residual,
+                    self.update_residual_init,
+                    precision=precision,
+                    seed=child_seed(seed, 0),
+                )
+            )
+
+        g1_in_dim = self.cal_1_dim(g1_dim, g2_dim, self.axis_neuron)
+        self.linear1 = MLPLayer(
+            g1_in_dim,
+            g1_dim,
+            precision=precision,
+            seed=child_seed(seed, 1),
+        )
+        self.linear2 = None
+        self.proj_g1g2 = None
+        self.proj_g1g1g2 = None
+        self.attn2g_map = None
+        self.attn2_mh_apply = None
+        self.attn2_lm = None
+        self.attn2_ev_apply = None
+        self.loc_attn = None
+
+        if self.update_chnnl_2:
+            self.linear2 = MLPLayer(
+                g2_dim,
+                g2_dim,
+                precision=precision,
+                seed=child_seed(seed, 2),
+            )
+            if self.update_style == "res_residual":
+                self.g2_residual.append(
+                    get_residual(
+                        g2_dim,
+                        self.update_residual,
+                        self.update_residual_init,
+                        precision=precision,
+                        seed=child_seed(seed, 3),
+                    )
+                )
+        if self.g1_out_mlp:
+            self.g1_self_mlp = MLPLayer(
+                g1_dim,
+                g1_dim,
+                precision=precision,
+                seed=child_seed(seed, 15),
+            )
+            if self.update_style == "res_residual":
+                self.g1_residual.append(
+                    get_residual(
+                        g1_dim,
+                        self.update_residual,
+                        self.update_residual_init,
+                        precision=precision,
+                        seed=child_seed(seed, 16),
+                    )
+                )
+        else:
+            self.g1_self_mlp = None
+        if self.update_g1_has_conv:
+            if not self.g1_out_conv:
+                self.proj_g1g2 = MLPLayer(
+                    g1_dim,
+                    g2_dim,
+                    bias=False,
+                    precision=precision,
+                    seed=child_seed(seed, 4),
+                )
+            else:
+                self.proj_g1g2 = MLPLayer(
+                    g2_dim,
+                    g1_dim,
+                    bias=False,
+                    precision=precision,
+                    seed=child_seed(seed, 4),
+                )
+                if self.update_style == "res_residual":
+                    self.g1_residual.append(
+                        get_residual(
+                            g1_dim,
+                            self.update_residual,
+                            self.update_residual_init,
+                            precision=precision,
+                            seed=child_seed(seed, 17),
+                        )
+                    )
+        if self.update_g2_has_g1g1:
+            self.proj_g1g1g2 = MLPLayer(
+                g1_dim,
+                g2_dim,
+                bias=False,
+                precision=precision,
+                seed=child_seed(seed, 5),
+            )
+            if self.update_style == "res_residual":
+                self.g2_residual.append(
+                    get_residual(
+                        g2_dim,
+                        self.update_residual,
+                        self.update_residual_init,
+                        precision=precision,
+                        seed=child_seed(seed, 6),
+                    )
+                )
+        if self.update_g2_has_attn or self.update_h2:
+            self.attn2g_map = Atten2Map(
+                g2_dim,
+                attn2_hidden,
+                attn2_nhead,
+                attn2_has_gate,
+                self.smooth,
+                precision=precision,
+                seed=child_seed(seed, 7),
+            )
+            if self.update_g2_has_attn:
+                self.attn2_mh_apply = Atten2MultiHeadApply(
+                    g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 8)
+                )
+                self.attn2_lm = LayerNorm(
+                    g2_dim,
+                    eps=ln_eps,
+                    trainable=trainable_ln,
+                    precision=precision,
+                    seed=child_seed(seed, 9),
+                )
+                if self.update_style == "res_residual":
+                    self.g2_residual.append(
+                        get_residual(
+                            g2_dim,
+                            self.update_residual,
+                            self.update_residual_init,
+                            precision=precision,
+                            seed=child_seed(seed, 10),
+                        )
+                    )
+
+            if self.update_h2:
+                self.attn2_ev_apply = Atten2EquiVarApply(
+                    g2_dim, attn2_nhead, precision=precision, seed=child_seed(seed, 11)
+                )
+                if self.update_style == "res_residual":
+                    self.h2_residual.append(
+                        get_residual(
+                            1,
+                            self.update_residual,
+                            self.update_residual_init,
+                            precision=precision,
+                            seed=child_seed(seed, 12),
+                        )
+                    )
+        if self.update_g1_has_attn:
+            self.loc_attn = LocalAtten(
+                g1_dim,
+                attn1_hidden,
+                attn1_nhead,
+                self.smooth,
+                precision=precision,
+                seed=child_seed(seed, 13),
+            )
+            if self.update_style == "res_residual":
+                self.g1_residual.append(
+                    get_residual(
+                        g1_dim,
+                        self.update_residual,
+                        self.update_residual_init,
+                        precision=precision,
+                        seed=child_seed(seed, 14),
+                    )
+                )
+
+        self.g1_residual = nn.ParameterList(self.g1_residual)
+        self.g2_residual = nn.ParameterList(self.g2_residual)
+        self.h2_residual = nn.ParameterList(self.h2_residual)
+
+    def cal_1_dim(self, g1d: int, g2d: int, ax: int) -> int:
+        ret = g1d if not self.g1_out_mlp else 0
+        if self.update_g1_has_grrg:
+            ret += g2d * ax
+        if self.update_g1_has_drrd:
+            ret += g1d * ax
+        if self.update_g1_has_conv and not self.g1_out_conv:
+            ret += g2d
+        return ret
+
+    def _update_h2(
+        self,
+        h2: paddle.Tensor,
+        attn: paddle.Tensor,
+    ) -> paddle.Tensor:
+        """
+        Calculate the attention weights update for pair-wise equivariant rep.
+
+        Parameters
+        ----------
+        h2
+            Pair-wise equivariant rep tensors, with shape nf x nloc x nnei x 3.
+        attn
+            Attention weights from g2 attention, with shape nf x nloc x nnei x nnei x nh2.
+        """
+        assert self.attn2_ev_apply is not None
+        # nf x nloc x nnei x nh2
+        h2_1 = self.attn2_ev_apply(attn, h2)
+        return h2_1
+
+    def _update_g1_conv(
+        self,
+        gg1: paddle.Tensor,
+        g2: paddle.Tensor,
+        nlist_mask: paddle.Tensor,
+        sw: paddle.Tensor,
+    ) -> paddle.Tensor:
+        """
+        Calculate the convolution update for atomic invariant rep.
+
+        Parameters
+        ----------
+        gg1
+            Neighbor-wise atomic invariant rep, with shape nb x nloc x nnei x ng1.
+        g2
+            Pair invariant rep, with shape nb x nloc x nnei x ng2.
+        nlist_mask
+            Neighbor list mask, where zero means no neighbor, with shape nb x nloc x nnei.
+        sw
+            The switch function, which equals 1 within the rcut_smth range, smoothly decays from 1 to 0 between rcut_smth and rcut,
+            and remains 0 beyond rcut, with shape nb x nloc x nnei.
+        """
+        assert self.proj_g1g2 is not None
+        nb, nloc, nnei, _ = g2.shape
+        ng1 = gg1.shape[-1]
+        ng2 = g2.shape[-1]
+        if not self.g1_out_conv:
+            # gg1  : nb x nloc x nnei x ng2
+            gg1 = self.proj_g1g2(gg1).reshape([nb, nloc, nnei, ng2])
+        else:
+            gg1 = gg1.reshape([nb, nloc, nnei, ng1])
+        # nb x nloc x nnei x ng2/ng1
+        gg1 = _apply_nlist_mask(gg1, nlist_mask)
+        if not self.smooth:
+            # normalized by number of neighbors, not smooth
+            # nb x nloc x 1
+            # must use astype here to convert bool to float, otherwise there will be numerical difference from numpy
+            invnnei = 1.0 / (
+                self.epsilon + paddle.sum(nlist_mask.astype(gg1.dtype), axis=-1)
+            ).unsqueeze(-1)
+        else:
+            gg1 = _apply_switch(gg1, sw)
+            invnnei = (1.0 / float(nnei)) * paddle.ones(
+                (nb, nloc, 1), dtype=gg1.dtype
+            ).to(device=gg1.place)
+        if not self.g1_out_conv:
+            # nb x nloc x ng2
+            g1_11 = paddle.sum(g2 * gg1, axis=2) * invnnei
+        else:
+            g2 = self.proj_g1g2(g2).reshape([nb, nloc, nnei, ng1])
+            # nb x nloc x ng1
+            g1_11 = paddle.sum(g2 * gg1, axis=2) * invnnei
+        return g1_11
+
+    @staticmethod
+    def _cal_hg(
+        g2: paddle.Tensor,
+        h2: paddle.Tensor,
+        nlist_mask: paddle.Tensor,
+        sw: paddle.Tensor,
+        smooth: bool = True,
+        epsilon: float = 1e-4,
+        use_sqrt_nnei: bool = True,
+    ) -> paddle.Tensor:
+        """
+        Calculate the transposed rotation matrix.
+
+        Parameters
+        ----------
+        g2
+            Neighbor-wise/Pair-wise invariant rep tensors, with shape nb x nloc x nnei x ng2.
+        h2
+            Neighbor-wise/Pair-wise equivariant rep tensors, with shape nb x nloc x nnei x 3.
+        nlist_mask
+            Neighbor list mask, where zero means no neighbor, with shape nb x nloc x nnei.
+        sw
+            The switch function, which equals 1 within the rcut_smth range, smoothly decays from 1 to 0 between rcut_smth and rcut,
+            and remains 0 beyond rcut, with shape nb x nloc x nnei.
+        smooth
+            Whether to use smoothness in processes such as attention weights calculation.
+        epsilon
+            Protection of 1./nnei.
+
+        Returns
+        -------
+        hg
+            The transposed rotation matrix, with shape nb x nloc x 3 x ng2.
+        """
+        # g2:  nb x nloc x nnei x ng2
+        # h2:  nb x nloc x nnei x 3
+        # msk: nb x nloc x nnei
+        nb, nloc, nnei, _ = g2.shape
+        ng2 = g2.shape[-1]
+        # nb x nloc x nnei x ng2
+        g2 = _apply_nlist_mask(g2, nlist_mask)
+        if not smooth:
+            # nb x nloc
+            # must use astype here to convert bool to float, otherwise there will be numerical difference from numpy
+            if not use_sqrt_nnei:
+                invnnei = 1.0 / (
+                    epsilon + paddle.sum(nlist_mask.astype(g2.dtype), axis=-1)
+                )
+            else:
+                invnnei = 1.0 / (
+                    epsilon
+                    + paddle.sqrt(paddle.sum(nlist_mask.astype(g2.dtype), axis=-1))
+                )
+            # nb x nloc x 1 x 1
+            invnnei = invnnei.unsqueeze(-1).unsqueeze(-1)
+        else:
+            g2 = _apply_switch(g2, sw)
+            if not use_sqrt_nnei:
+                invnnei = (1.0 / float(nnei)) * paddle.ones(
+                    (nb, nloc, 1, 1), dtype=g2.dtype
+                ).to(device=g2.place)
+            else:
+                invnnei = paddle.rsqrt(
+                    float(nnei)
+                    * paddle.ones((nb, nloc, 1, 1), dtype=g2.dtype).to(device=g2.place)
+                )
+        # nb x nloc x 3 x ng2
+        h2g2 = paddle.matmul(paddle.transpose(h2, [0, 1, 3, 2]), g2) * invnnei
+        return h2g2
+
+    @staticmethod
+    def _cal_grrg(h2g2: paddle.Tensor, axis_neuron: int) -> paddle.Tensor:
+        """
+        Calculate the atomic invariant rep.
+
+        Parameters
+        ----------
+        h2g2
+            The transposed rotation matrix, with shape nb x nloc x 3 x ng2.
+        axis_neuron
+            Size of the submatrix.
+
+        Returns
+        -------
+        grrg
+            Atomic invariant rep, with shape nb x nloc x (axis_neuron x ng2)
+        """
+        # nb x nloc x 3 x ng2
+        nb, nloc, _, ng2 = h2g2.shape
+        # nb x nloc x 3 x axis
+        # h2g2m = paddle.split(h2g2, decomp.sec(h2g2.shape[-1], axis_neuron), axis=-1)[0]
+        h2g2m = h2g2[..., :axis_neuron]  # use slice instead of split
+        # nb x nloc x axis x ng2
+        g1_13 = paddle.matmul(paddle.transpose(h2g2m, [0, 1, 3, 2]), h2g2) / (3.0**1)
+        # nb x nloc x (axisxng2)
+        g1_13 = g1_13.reshape([nb, nloc, axis_neuron * ng2])
+        return g1_13
+
+    def symmetrization_op(
+        self,
+        g2: paddle.Tensor,
+        h2: paddle.Tensor,
+        nlist_mask: paddle.Tensor,
+        sw: paddle.Tensor,
+        axis_neuron: int,
+        smooth: bool = True,
+        epsilon: float = 1e-4,
+    ) -> paddle.Tensor:
+        """
+        Symmetrization operator to obtain atomic invariant rep.
+
+        Parameters
+        ----------
+        g2
+            Neighbor-wise/Pair-wise invariant rep tensors, with shape nb x nloc x nnei x ng2.
+        h2
+            Neighbor-wise/Pair-wise equivariant rep tensors, with shape nb x nloc x nnei x 3.
+        nlist_mask
+            Neighbor list mask, where zero means no neighbor, with shape nb x nloc x nnei.
+        sw
+            The switch function, which equals 1 within the rcut_smth range, smoothly decays from 1 to 0 between rcut_smth and rcut,
+            and remains 0 beyond rcut, with shape nb x nloc x nnei.
+        axis_neuron
+            Size of the submatrix.
+        smooth
+            Whether to use smoothness in processes such as attention weights calculation.
+        epsilon
+            Protection of 1./nnei.
+
+        Returns
+        -------
+        grrg
+            Atomic invariant rep, with shape nb x nloc x (axis_neuron x ng2)
+        """
+        # g2:  nb x nloc x nnei x ng2
+        # h2:  nb x nloc x nnei x 3
+        # msk: nb x nloc x nnei
+        nb, nloc, nnei, _ = g2.shape
+        # nb x nloc x 3 x ng2
+        h2g2 = self._cal_hg(
+            g2,
+            h2,
+            nlist_mask,
+            sw,
+            smooth=smooth,
+            epsilon=epsilon,
+            use_sqrt_nnei=self.use_sqrt_nnei,
+        )
+        # nb x nloc x (axisxng2)
+        g1_13 = self._cal_grrg(h2g2, axis_neuron)
+        return g1_13
+
+    def _update_g2_g1g1(
+        self,
+        g1: paddle.Tensor,  # nb x nloc x ng1
+        gg1: paddle.Tensor,  # nb x nloc x nnei x ng1
+        nlist_mask: paddle.Tensor,  # nb x nloc x nnei
+        sw: paddle.Tensor,  # nb x nloc x nnei
+    ) -> paddle.Tensor:
+        """
+        Update the g2 using element-wise dot g1_i * g1_j.
+
+        Parameters
+        ----------
+        g1
+            Atomic invariant rep, with shape nb x nloc x ng1.
+        gg1
+            Neighbor-wise atomic invariant rep, with shape nb x nloc x nnei x ng1.
+        nlist_mask
+            Neighbor list mask, where zero means no neighbor, with shape nb x nloc x nnei.
+        sw
+            The switch function, which equals 1 within the rcut_smth range, smoothly decays from 1 to 0 between rcut_smth and rcut,
+            and remains 0 beyond rcut, with shape nb x nloc x nnei.
+        """
+        ret = g1.unsqueeze(-2) * gg1
+        # nb x nloc x nnei x ng1
+        ret = _apply_nlist_mask(ret, nlist_mask)
+        if self.smooth:
+            ret = _apply_switch(ret, sw)
+        return ret
+
+    def forward(
+        self,
+        g1_ext: paddle.Tensor,  # nf x nall x ng1
+        g2: paddle.Tensor,  # nf x nloc x nnei x ng2
+        h2: paddle.Tensor,  # nf x nloc x nnei x 3
+        nlist: paddle.Tensor,  # nf x nloc x nnei
+        nlist_mask: paddle.Tensor,  # nf x nloc x nnei
+        sw: paddle.Tensor,  # switch func, nf x nloc x nnei
+    ):
+        """
+        Parameters
+        ----------
+        g1_ext : nf x nall x ng1         extended single-atom channel
+        g2 : nf x nloc x nnei x ng2  pair-atom channel, invariant
+        h2 : nf x nloc x nnei x 3    pair-atom channel, equivariant
+        nlist : nf x nloc x nnei        neighbor list (padded neis are set to 0)
+        nlist_mask : nf x nloc x nnei   masks of the neighbor list. real nei 1 otherwise 0
+        sw : nf x nloc x nnei        switch function
+
+        Returns
+        -------
+        g1:     nf x nloc x ng1         updated single-atom channel
+        g2:     nf x nloc x nnei x ng2  updated pair-atom channel, invariant
+        h2:     nf x nloc x nnei x 3    updated pair-atom channel, equivariant
+        """
+        cal_gg1 = (
+            self.update_g1_has_drrd
+            or self.update_g1_has_conv
+            or self.update_g1_has_attn
+            or self.update_g2_has_g1g1
+        )
+
+        nb, nloc, nnei, _ = g2.shape
+        nall = g1_ext.shape[1]
+        g1, _ = paddle.split(g1_ext, [nloc, nall - nloc], axis=1)
+        if paddle.in_dynamic_mode():
+            assert [nb, nloc] == g1.shape[:2]
+        if paddle.in_dynamic_mode():
+            assert [nb, nloc, nnei] == h2.shape[:3]
+
+        g2_update: list[paddle.Tensor] = [g2]
+        h2_update: list[paddle.Tensor] = [h2]
+        g1_update: list[paddle.Tensor] = [g1]
+        g1_mlp: list[paddle.Tensor] = [g1] if not self.g1_out_mlp else []
+        if self.g1_out_mlp:
+            if paddle.in_dynamic_mode():
+                assert self.g1_self_mlp is not None
+            g1_self_mlp = self.act(self.g1_self_mlp(g1))
+            g1_update.append(g1_self_mlp)
+
+        if cal_gg1:
+            gg1 = _make_nei_g1(g1_ext, nlist)
+        else:
+            gg1 = None
+
+        if self.update_chnnl_2:
+            # mlp(g2)
+            if paddle.in_dynamic_mode():
+                assert self.linear2 is not None
+            # nb x nloc x nnei x ng2
+            g2_1 = self.act(self.linear2(g2))
+            g2_update.append(g2_1)
+
+            if self.update_g2_has_g1g1:
+                # linear(g1_i * g1_j)
+                if paddle.in_dynamic_mode():
+                    assert gg1 is not None
+                if paddle.in_dynamic_mode():
+                    assert self.proj_g1g1g2 is not None
+                g2_update.append(
+                    self.proj_g1g1g2(self._update_g2_g1g1(g1, gg1, nlist_mask, sw))
+                )
+
+            if self.update_g2_has_attn or self.update_h2:
+                # gated_attention(g2, h2)
+                if paddle.in_dynamic_mode():
+                    assert self.attn2g_map is not None
+                # nb x nloc x nnei x nnei x nh
+                AAg = self.attn2g_map(g2, h2, nlist_mask, sw)
+
+                if self.update_g2_has_attn:
+                    if paddle.in_dynamic_mode():
+                        assert self.attn2_mh_apply is not None
+                    if paddle.in_dynamic_mode():
+                        assert self.attn2_lm is not None
+                    # nb x nloc x nnei x ng2
+                    g2_2 = self.attn2_mh_apply(AAg, g2)
+                    g2_2 = self.attn2_lm(g2_2)
+                    g2_update.append(g2_2)
+
+                if self.update_h2:
+                    # linear_head(attention_weights * h2)
+                    h2_update.append(self._update_h2(h2, AAg))
+
+        if self.update_g1_has_conv:
+            if paddle.in_dynamic_mode():
+                assert gg1 is not None
+            g1_conv = self._update_g1_conv(gg1, g2, nlist_mask, sw)
+            if not self.g1_out_conv:
+                g1_mlp.append(g1_conv)
+            else:
+                g1_update.append(g1_conv)
+
+        if self.update_g1_has_grrg:
+            g1_mlp.append(
+                self.symmetrization_op(
+                    g2,
+                    h2,
+                    nlist_mask,
+                    sw,
+                    self.axis_neuron,
+                    smooth=self.smooth,
+                    epsilon=self.epsilon,
+                )
+            )
+
+        if self.update_g1_has_drrd:
+            if paddle.in_dynamic_mode():
+                assert gg1 is not None
+            g1_mlp.append(
+                self.symmetrization_op(
+                    gg1,
+                    h2,
+                    nlist_mask,
+                    sw,
+                    self.axis_neuron,
+                    smooth=self.smooth,
+                    epsilon=self.epsilon,
+                )
+            )
+
+        # nb x nloc x [ng1+ng2+(axisxng2)+(axisxng1)]
+        #                  conv   grrg      drrd
+        g1_1 = self.act(self.linear1(paddle.concat(g1_mlp, axis=-1)))
+        g1_update.append(g1_1)
+
+        if self.update_g1_has_attn:
+            assert gg1 is not None
+            assert self.loc_attn is not None
+            g1_update.append(self.loc_attn(g1, gg1, nlist_mask, sw))
+
+        # update
+        if self.update_chnnl_2:
+            g2_new = self.list_update(g2_update, "g2")
+            h2_new = self.list_update(h2_update, "h2")
+        else:
+            g2_new, h2_new = g2, h2
+        g1_new = self.list_update(g1_update, "g1")
+        return g1_new, g2_new, h2_new
+
+    def list_update_res_avg(
+        self,
+        update_list: list[paddle.Tensor],
+    ) -> paddle.Tensor:
+        nitem = len(update_list)
+        uu = update_list[0]
+        for ii in range(1, nitem):
+            uu = uu + update_list[ii]
+        return uu / (float(nitem) ** 0.5)
+
+    def list_update_res_incr(self, update_list: list[paddle.Tensor]) -> paddle.Tensor:
+        nitem = len(update_list)
+        uu = update_list[0]
+        scale = 1.0 / (float(nitem - 1) ** 0.5) if nitem > 1 else 0.0
+        for ii in range(1, nitem):
+            uu = uu + scale * update_list[ii]
+        return uu
+
+    def list_update_res_residual(
+        self, update_list: list[paddle.Tensor], update_name: str = "g1"
+    ) -> paddle.Tensor:
+        nitem = len(update_list)
+        uu = update_list[0]
+        # make jit happy
+        if update_name == "g1":
+            for ii, vv in enumerate(self.g1_residual):
+                uu = uu + vv * update_list[ii + 1]
+        elif update_name == "g2":
+            for ii, vv in enumerate(self.g2_residual):
+                uu = uu + vv * update_list[ii + 1]
+        elif update_name == "h2":
+            for ii, vv in enumerate(self.h2_residual):
+                uu = uu + vv * update_list[ii + 1]
+        else:
+            raise NotImplementedError
+        return uu
+
+    def list_update(
+        self, update_list: list[paddle.Tensor], update_name: str = "g1"
+    ) -> paddle.Tensor:
+        if self.update_style == "res_avg":
+            return self.list_update_res_avg(update_list)
+        elif self.update_style == "res_incr":
+            return self.list_update_res_incr(update_list)
+        elif self.update_style == "res_residual":
+            return self.list_update_res_residual(update_list, update_name=update_name)
+        else:
+            raise RuntimeError(f"unknown update style {self.update_style}")
+
+    def serialize(self) -> dict:
+        """Serialize the networks to a dict.
+
+        Returns
+        -------
+        dict
+            The serialized networks.
+        """
+        data = {
+            "@class": "RepformerLayer",
+            "@version": 2,
+            "rcut": self.rcut,
+            "rcut_smth": self.rcut_smth,
+            "sel": self.sel,
+            "ntypes": self.ntypes,
+            "g1_dim": self.g1_dim,
+            "g2_dim": self.g2_dim,
+            "axis_neuron": self.axis_neuron,
+            "update_chnnl_2": self.update_chnnl_2,
+            "update_g1_has_conv": self.update_g1_has_conv,
+            "update_g1_has_drrd": self.update_g1_has_drrd,
+            "update_g1_has_grrg": self.update_g1_has_grrg,
+            "update_g1_has_attn": self.update_g1_has_attn,
+            "update_g2_has_g1g1": self.update_g2_has_g1g1,
+            "update_g2_has_attn": self.update_g2_has_attn,
+            "update_h2": self.update_h2,
+            "attn1_hidden": self.attn1_hidden,
+            "attn1_nhead": self.attn1_nhead,
+            "attn2_hidden": self.attn2_hidden,
+            "attn2_nhead": self.attn2_nhead,
+            "attn2_has_gate": self.attn2_has_gate,
+            "activation_function": self.activation_function,
+            "update_style": self.update_style,
+            "smooth": self.smooth,
+            "precision": self.precision,
+            "trainable_ln": self.trainable_ln,
+            "use_sqrt_nnei": self.use_sqrt_nnei,
+            "g1_out_conv": self.g1_out_conv,
+            "g1_out_mlp": self.g1_out_mlp,
+            "ln_eps": self.ln_eps,
+            "linear1": self.linear1.serialize(),
+        }
+        if self.update_chnnl_2:
+            data.update(
+                {
+                    "linear2": self.linear2.serialize(),
+                }
+            )
+        if self.update_g1_has_conv:
+            data.update(
+                {
+                    "proj_g1g2": self.proj_g1g2.serialize(),
+                }
+            )
+        if self.update_g2_has_g1g1:
+            data.update(
+                {
+                    "proj_g1g1g2": self.proj_g1g1g2.serialize(),
+                }
+            )
+        if self.update_g2_has_attn or self.update_h2:
+            data.update(
+                {
+                    "attn2g_map": self.attn2g_map.serialize(),
+                }
+            )
+            if self.update_g2_has_attn:
+                data.update(
+                    {
+                        "attn2_mh_apply": self.attn2_mh_apply.serialize(),
+                        "attn2_lm": self.attn2_lm.serialize(),
+                    }
+                )
+
+            if self.update_h2:
+                data.update(
+                    {
+                        "attn2_ev_apply": self.attn2_ev_apply.serialize(),
+                    }
+                )
+        if self.update_g1_has_attn:
+            data.update(
+                {
+                    "loc_attn": self.loc_attn.serialize(),
+                }
+            )
+        if self.g1_out_mlp:
+            data.update(
+                {
+                    "g1_self_mlp": self.g1_self_mlp.serialize(),
+                }
+            )
+        if self.update_style == "res_residual":
+            data.update(
+                {
+                    "@variables": {
+                        "g1_residual": [to_numpy_array(t) for t in self.g1_residual],
+                        "g2_residual": [to_numpy_array(t) for t in self.g2_residual],
+                        "h2_residual": [to_numpy_array(t) for t in self.h2_residual],
+                    }
+                }
+            )
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "RepformerLayer":
+        """Deserialize the networks from a dict.
+
+        Parameters
+        ----------
+        data : dict
+            The dict to deserialize from.
+        """
+        data = data.copy()
+        check_version_compatibility(data.pop("@version"), 2, 1)
+        data.pop("@class")
+        linear1 = data.pop("linear1")
+        update_chnnl_2 = data["update_chnnl_2"]
+        update_g1_has_conv = data["update_g1_has_conv"]
+        update_g2_has_g1g1 = data["update_g2_has_g1g1"]
+        update_g2_has_attn = data["update_g2_has_attn"]
+        update_h2 = data["update_h2"]
+        update_g1_has_attn = data["update_g1_has_attn"]
+        update_style = data["update_style"]
+        g1_out_mlp = data["g1_out_mlp"]
+
+        linear2 = data.pop("linear2", None)
+        proj_g1g2 = data.pop("proj_g1g2", None)
+        proj_g1g1g2 = data.pop("proj_g1g1g2", None)
+        attn2g_map = data.pop("attn2g_map", None)
+        attn2_mh_apply = data.pop("attn2_mh_apply", None)
+        attn2_lm = data.pop("attn2_lm", None)
+        attn2_ev_apply = data.pop("attn2_ev_apply", None)
+        loc_attn = data.pop("loc_attn", None)
+        g1_self_mlp = data.pop("g1_self_mlp", None)
+        variables = data.pop("@variables", {})
+        g1_residual = variables.get("g1_residual", data.pop("g1_residual", []))
+        g2_residual = variables.get("g2_residual", data.pop("g2_residual", []))
+        h2_residual = variables.get("h2_residual", data.pop("h2_residual", []))
+
+        obj = cls(**data)
+        obj.linear1 = MLPLayer.deserialize(linear1)
+        if update_chnnl_2:
+            assert isinstance(linear2, dict)
+            obj.linear2 = MLPLayer.deserialize(linear2)
+        if update_g1_has_conv:
+            assert isinstance(proj_g1g2, dict)
+            obj.proj_g1g2 = MLPLayer.deserialize(proj_g1g2)
+        if update_g2_has_g1g1:
+            assert isinstance(proj_g1g1g2, dict)
+            obj.proj_g1g1g2 = MLPLayer.deserialize(proj_g1g1g2)
+        if update_g2_has_attn or update_h2:
+            assert isinstance(attn2g_map, dict)
+            obj.attn2g_map = Atten2Map.deserialize(attn2g_map)
+            if update_g2_has_attn:
+                assert isinstance(attn2_mh_apply, dict)
+                assert isinstance(attn2_lm, dict)
+                obj.attn2_mh_apply = Atten2MultiHeadApply.deserialize(attn2_mh_apply)
+                obj.attn2_lm = LayerNorm.deserialize(attn2_lm)
+            if update_h2:
+                assert isinstance(attn2_ev_apply, dict)
+                obj.attn2_ev_apply = Atten2EquiVarApply.deserialize(attn2_ev_apply)
+        if update_g1_has_attn:
+            assert isinstance(loc_attn, dict)
+            obj.loc_attn = LocalAtten.deserialize(loc_attn)
+        if g1_out_mlp:
+            assert isinstance(g1_self_mlp, dict)
+            obj.g1_self_mlp = MLPLayer.deserialize(g1_self_mlp)
+        if update_style == "res_residual":
+            for ii, t in enumerate(obj.g1_residual):
+                t.data = to_paddle_tensor(g1_residual[ii])
+            for ii, t in enumerate(obj.g2_residual):
+                t.data = to_paddle_tensor(g2_residual[ii])
+            for ii, t in enumerate(obj.h2_residual):
+                t.data = to_paddle_tensor(h2_residual[ii])
+        return obj
diff --git a/deepmd/pd/model/descriptor/repformers.py b/deepmd/pd/model/descriptor/repformers.py
new file mode 100644
index 0000000000..47d92317df
--- /dev/null
+++ b/deepmd/pd/model/descriptor/repformers.py
@@ -0,0 +1,576 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Callable,
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.dpmodel.utils.seed import (
+    child_seed,
+)
+from deepmd.pd.model.descriptor.descriptor import (
+    DescriptorBlock,
+)
+from deepmd.pd.model.descriptor.env_mat import (
+    prod_env_mat,
+)
+from deepmd.pd.model.network.mlp import (
+    MLPLayer,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    PRECISION_DICT,
+)
+from deepmd.pd.utils.env_mat_stat import (
+    EnvMatStatSe,
+)
+from deepmd.pd.utils.exclude_mask import (
+    PairExcludeMask,
+)
+from deepmd.pd.utils.utils import (
+    ActivationFn,
+)
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+
+from .repformer_layer import (
+    RepformerLayer,
+)
+
+
+@DescriptorBlock.register("se_repformer")
+@DescriptorBlock.register("se_uni")
+class DescrptBlockRepformers(DescriptorBlock):
+    def __init__(
+        self,
+        rcut,
+        rcut_smth,
+        sel: int,
+        ntypes: int,
+        nlayers: int = 3,
+        g1_dim=128,
+        g2_dim=16,
+        axis_neuron: int = 4,
+        direct_dist: bool = False,
+        update_g1_has_conv: bool = True,
+        update_g1_has_drrd: bool = True,
+        update_g1_has_grrg: bool = True,
+        update_g1_has_attn: bool = True,
+        update_g2_has_g1g1: bool = True,
+        update_g2_has_attn: bool = True,
+        update_h2: bool = False,
+        attn1_hidden: int = 64,
+        attn1_nhead: int = 4,
+        attn2_hidden: int = 16,
+        attn2_nhead: int = 4,
+        attn2_has_gate: bool = False,
+        activation_function: str = "tanh",
+        update_style: str = "res_avg",
+        update_residual: float = 0.001,
+        update_residual_init: str = "norm",
+        set_davg_zero: bool = True,
+        smooth: bool = True,
+        exclude_types: list[tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        precision: str = "float64",
+        trainable_ln: bool = True,
+        ln_eps: Optional[float] = 1e-5,
+        seed: Optional[Union[int, list[int]]] = None,
+        use_sqrt_nnei: bool = True,
+        g1_out_conv: bool = True,
+        g1_out_mlp: bool = True,
+    ) -> None:
+        r"""
+        The repformer descriptor block.
+
+        Parameters
+        ----------
+        rcut : float
+            The cut-off radius.
+        rcut_smth : float
+            Where to start smoothing. For example the 1/r term is smoothed from rcut to rcut_smth.
+        sel : int
+            Maximally possible number of selected neighbors.
+        ntypes : int
+            Number of element types
+        nlayers : int, optional
+            Number of repformer layers.
+        g1_dim : int, optional
+            Dimension of the first graph convolution layer.
+        g2_dim : int, optional
+            Dimension of the second graph convolution layer.
+        axis_neuron : int, optional
+            Size of the submatrix of G (embedding matrix).
+        direct_dist : bool, optional
+            Whether to use direct distance information (1/r term) in the repformer block.
+        update_g1_has_conv : bool, optional
+            Whether to update the g1 rep with convolution term.
+        update_g1_has_drrd : bool, optional
+            Whether to update the g1 rep with the drrd term.
+        update_g1_has_grrg : bool, optional
+            Whether to update the g1 rep with the grrg term.
+        update_g1_has_attn : bool, optional
+            Whether to update the g1 rep with the localized self-attention.
+        update_g2_has_g1g1 : bool, optional
+            Whether to update the g2 rep with the g1xg1 term.
+        update_g2_has_attn : bool, optional
+            Whether to update the g2 rep with the gated self-attention.
+        update_h2 : bool, optional
+            Whether to update the h2 rep.
+        attn1_hidden : int, optional
+            The hidden dimension of localized self-attention to update the g1 rep.
+        attn1_nhead : int, optional
+            The number of heads in localized self-attention to update the g1 rep.
+        attn2_hidden : int, optional
+            The hidden dimension of gated self-attention to update the g2 rep.
+        attn2_nhead : int, optional
+            The number of heads in gated self-attention to update the g2 rep.
+        attn2_has_gate : bool, optional
+            Whether to use gate in the gated self-attention to update the g2 rep.
+        activation_function : str, optional
+            The activation function in the embedding net.
+        update_style : str, optional
+            Style to update a representation.
+            Supported options are:
+            -'res_avg': Updates a rep `u` with: u = 1/\\sqrt{n+1} (u + u_1 + u_2 + ... + u_n)
+            -'res_incr': Updates a rep `u` with: u = u + 1/\\sqrt{n} (u_1 + u_2 + ... + u_n)
+            -'res_residual': Updates a rep `u` with: u = u + (r1*u_1 + r2*u_2 + ... + r3*u_n)
+            where `r1`, `r2` ... `r3` are residual weights defined by `update_residual`
+            and `update_residual_init`.
+        update_residual : float, optional
+            When update using residual mode, the initial std of residual vector weights.
+        update_residual_init : str, optional
+            When update using residual mode, the initialization mode of residual vector weights.
+        set_davg_zero : bool, optional
+            Set the normalization average to zero.
+        precision : str, optional
+            The precision of the embedding net parameters.
+        smooth : bool, optional
+            Whether to use smoothness in processes such as attention weights calculation.
+        exclude_types : list[list[int]], optional
+            The excluded pairs of types which have no interaction with each other.
+            For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+        env_protection : float, optional
+            Protection parameter to prevent division by zero errors during environment matrix calculations.
+            For example, when using paddings, there may be zero distances of neighbors, which may make division by zero error during environment matrix calculations without protection.
+        trainable_ln : bool, optional
+            Whether to use trainable shift and scale weights in layer normalization.
+        use_sqrt_nnei : bool, optional
+            Whether to use the square root of the number of neighbors for symmetrization_op normalization instead of using the number of neighbors directly.
+        g1_out_conv : bool, optional
+            Whether to put the convolutional update of g1 separately outside the concatenated MLP update.
+        g1_out_mlp : bool, optional
+            Whether to put the self MLP update of g1 separately outside the concatenated MLP update.
+        ln_eps : float, optional
+            The epsilon value for layer normalization.
+        seed : int, optional
+            Random seed for parameter initialization.
+        """
+        super().__init__()
+        self.rcut = float(rcut)
+        self.rcut_smth = float(rcut_smth)
+        self.ntypes = ntypes
+        self.nlayers = nlayers
+        sel = [sel] if isinstance(sel, int) else sel
+        self.nnei = sum(sel)
+        self.ndescrpt = self.nnei * 4  # use full descriptor.
+        assert len(sel) == 1
+        self.sel = sel
+        self.sec = self.sel
+        self.split_sel = self.sel
+        self.axis_neuron = axis_neuron
+        self.set_davg_zero = set_davg_zero
+        self.g1_dim = g1_dim
+        self.g2_dim = g2_dim
+        self.update_g1_has_conv = update_g1_has_conv
+        self.update_g1_has_drrd = update_g1_has_drrd
+        self.update_g1_has_grrg = update_g1_has_grrg
+        self.update_g1_has_attn = update_g1_has_attn
+        self.update_g2_has_g1g1 = update_g2_has_g1g1
+        self.update_g2_has_attn = update_g2_has_attn
+        self.update_h2 = update_h2
+        self.attn1_hidden = attn1_hidden
+        self.attn1_nhead = attn1_nhead
+        self.attn2_has_gate = attn2_has_gate
+        self.attn2_hidden = attn2_hidden
+        self.attn2_nhead = attn2_nhead
+        self.activation_function = activation_function
+        self.update_style = update_style
+        self.update_residual = update_residual
+        self.update_residual_init = update_residual_init
+        self.direct_dist = direct_dist
+        self.act = ActivationFn(activation_function)
+        self.smooth = smooth
+        self.use_sqrt_nnei = use_sqrt_nnei
+        self.g1_out_conv = g1_out_conv
+        self.g1_out_mlp = g1_out_mlp
+        # order matters, placed after the assignment of self.ntypes
+        self.reinit_exclude(exclude_types)
+        self.env_protection = env_protection
+        self.precision = precision
+        self.prec = PRECISION_DICT[precision]
+        self.trainable_ln = trainable_ln
+        self.ln_eps = ln_eps
+        self.epsilon = 1e-4
+        self.seed = seed
+
+        self.g2_embd = MLPLayer(
+            1, self.g2_dim, precision=precision, seed=child_seed(seed, 0)
+        )
+        layers = []
+        for ii in range(nlayers):
+            layers.append(
+                RepformerLayer(
+                    self.rcut,
+                    self.rcut_smth,
+                    self.sel,
+                    self.ntypes,
+                    self.g1_dim,
+                    self.g2_dim,
+                    axis_neuron=self.axis_neuron,
+                    update_chnnl_2=(ii != nlayers - 1),
+                    update_g1_has_conv=self.update_g1_has_conv,
+                    update_g1_has_drrd=self.update_g1_has_drrd,
+                    update_g1_has_grrg=self.update_g1_has_grrg,
+                    update_g1_has_attn=self.update_g1_has_attn,
+                    update_g2_has_g1g1=self.update_g2_has_g1g1,
+                    update_g2_has_attn=self.update_g2_has_attn,
+                    update_h2=self.update_h2,
+                    attn1_hidden=self.attn1_hidden,
+                    attn1_nhead=self.attn1_nhead,
+                    attn2_has_gate=self.attn2_has_gate,
+                    attn2_hidden=self.attn2_hidden,
+                    attn2_nhead=self.attn2_nhead,
+                    activation_function=self.activation_function,
+                    update_style=self.update_style,
+                    update_residual=self.update_residual,
+                    update_residual_init=self.update_residual_init,
+                    smooth=self.smooth,
+                    trainable_ln=self.trainable_ln,
+                    ln_eps=self.ln_eps,
+                    precision=precision,
+                    use_sqrt_nnei=self.use_sqrt_nnei,
+                    g1_out_conv=self.g1_out_conv,
+                    g1_out_mlp=self.g1_out_mlp,
+                    seed=child_seed(child_seed(seed, 1), ii),
+                )
+            )
+        self.layers = paddle.nn.LayerList(layers)
+
+        wanted_shape = (self.ntypes, self.nnei, 4)
+        mean = paddle.zeros(wanted_shape, dtype=env.GLOBAL_PD_FLOAT_PRECISION).to(
+            device=env.DEVICE
+        )
+        stddev = paddle.ones(wanted_shape, dtype=env.GLOBAL_PD_FLOAT_PRECISION).to(
+            device=env.DEVICE
+        )
+        self.register_buffer("mean", mean)
+        self.register_buffer("stddev", stddev)
+        self.stats = None
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.rcut
+
+    def get_rcut_smth(self) -> float:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0."""
+        return self.rcut_smth
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return sum(self.sel)
+
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sel
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return self.dim_out
+
+    def get_dim_in(self) -> int:
+        """Returns the input dimension."""
+        return self.dim_in
+
+    def get_dim_emb(self) -> int:
+        """Returns the embedding dimension g2."""
+        return self.g2_dim
+
+    def __setitem__(self, key, value) -> None:
+        if key in ("avg", "data_avg", "davg"):
+            self.mean = value
+        elif key in ("std", "data_std", "dstd"):
+            self.stddev = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ("avg", "data_avg", "davg"):
+            return self.mean
+        elif key in ("std", "data_std", "dstd"):
+            return self.stddev
+        else:
+            raise KeyError(key)
+
+    def mixed_types(self) -> bool:
+        """If true, the descriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the descriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return True
+
+    def get_env_protection(self) -> float:
+        """Returns the protection of building environment matrix."""
+        return self.env_protection
+
+    @property
+    def dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.g1_dim
+
+    @property
+    def dim_in(self):
+        """Returns the atomic input dimension of this descriptor."""
+        return self.g1_dim
+
+    @property
+    def dim_emb(self):
+        """Returns the embedding dimension g2."""
+        return self.get_dim_emb()
+
+    def reinit_exclude(
+        self,
+        exclude_types: list[tuple[int, int]] = [],
+    ) -> None:
+        self.exclude_types = exclude_types
+        self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
+
+    def forward(
+        self,
+        nlist: paddle.Tensor,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        extended_atype_embd: Optional[paddle.Tensor] = None,
+        mapping: Optional[paddle.Tensor] = None,
+        type_embedding: Optional[paddle.Tensor] = None,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ):
+        if comm_dict is None:
+            assert mapping is not None
+            assert extended_atype_embd is not None
+        nframes, nloc, nnei = nlist.shape
+        nall = extended_coord.reshape([nframes, -1]).shape[1] // 3
+        atype = extended_atype[:, :nloc]
+        # nb x nloc x nnei
+        exclude_mask = self.emask(nlist, extended_atype)
+        nlist = paddle.where(exclude_mask != 0, nlist, paddle.full_like(nlist, -1))
+        # nb x nloc x nnei x 4, nb x nloc x nnei x 3, nb x nloc x nnei x 1
+        dmatrix, diff, sw = prod_env_mat(
+            extended_coord,
+            nlist,
+            atype,
+            self.mean,
+            self.stddev,
+            self.rcut,
+            self.rcut_smth,
+            protection=self.env_protection,
+        )
+        nlist_mask = nlist != -1
+        sw = paddle.squeeze(sw, -1)
+        # beyond the cutoff sw should be 0.0
+        sw = sw.masked_fill(~nlist_mask, 0.0)
+
+        # [nframes, nloc, tebd_dim]
+        if comm_dict is None:
+            if paddle.in_dynamic_mode():
+                assert isinstance(extended_atype_embd, paddle.Tensor)  # for jit
+            atype_embd = extended_atype_embd[:, :nloc, :]
+            if paddle.in_dynamic_mode():
+                assert list(atype_embd.shape) == [nframes, nloc, self.g1_dim]
+        else:
+            atype_embd = extended_atype_embd
+            if paddle.in_dynamic_mode():
+                assert isinstance(atype_embd, paddle.Tensor)  # for jit
+        g1 = self.act(atype_embd)
+        ng1 = g1.shape[-1]
+        # nb x nloc x nnei x 1,  nb x nloc x nnei x 3
+        if not self.direct_dist:
+            g2, h2 = paddle.split(dmatrix, [1, 3], axis=-1)
+        else:
+            # g2, h2 = paddle.linalg.norm(diff, axis=-1, keepdim=True), diff
+            g2, h2 = paddle.linalg.norm(diff, axis=-1, keepdim=True), diff
+            g2 = g2 / self.rcut
+            h2 = h2 / self.rcut
+        # nb x nloc x nnei x ng2
+        g2 = self.act(self.g2_embd(g2))
+
+        # set all padding positions to index of 0
+        # if the a neighbor is real or not is indicated by nlist_mask
+        nlist[nlist == -1] = 0
+        # nb x nall x ng1
+        if comm_dict is None:
+            assert mapping is not None
+            mapping = (
+                mapping.reshape([nframes, nall])
+                .unsqueeze(-1)
+                .expand([-1, -1, self.g1_dim])
+            )
+        for idx, ll in enumerate(self.layers):
+            # g1:     nb x nloc x ng1
+            # g1_ext: nb x nall x ng1
+            if comm_dict is None:
+                assert mapping is not None
+                g1_ext = paddle.take_along_axis(g1, axis=1, indices=mapping)
+            else:
+                raise NotImplementedError("Not implemented yet")
+                # has_spin = "has_spin" in comm_dict
+                # if not has_spin:
+                #     n_padding = nall - nloc
+                #     g1 = paddle.nn.functional.pad(
+                #         g1.squeeze(0), (0, 0, 0, n_padding), value=0.0
+                #     )
+                #     real_nloc = nloc
+                #     real_nall = nall
+                # else:
+                #     # for spin
+                #     real_nloc = nloc // 2
+                #     real_nall = nall // 2
+                #     real_n_padding = real_nall - real_nloc
+                #     g1_real, g1_virtual = paddle.split(
+                #         g1, [real_nloc, real_nloc], axis=1
+                #     )
+                #     # mix_g1: nb x real_nloc x (ng1 * 2)
+                #     mix_g1 = paddle.concat([g1_real, g1_virtual], axis=2)
+                #     # nb x real_nall x (ng1 * 2)
+                #     g1 = paddle.nn.functional.pad(
+                #         mix_g1.squeeze(0), (0, 0, 0, real_n_padding), value=0.0
+                #     )
+
+                # assert "send_list" in comm_dict
+                # assert "send_proc" in comm_dict
+                # assert "recv_proc" in comm_dict
+                # assert "send_num" in comm_dict
+                # assert "recv_num" in comm_dict
+                # assert "communicator" in comm_dict
+                # ret = paddle.ops.deepmd.border_op(
+                #     comm_dict["send_list"],
+                #     comm_dict["send_proc"],
+                #     comm_dict["recv_proc"],
+                #     comm_dict["send_num"],
+                #     comm_dict["recv_num"],
+                #     g1,
+                #     comm_dict["communicator"],
+                #     paddle.to_tensor(
+                #         real_nloc,
+                #         dtype=paddle.int32,
+                #         place=env.DEVICE,
+                #     ),  # should be int of c++
+                #     paddle.to_tensor(
+                #         real_nall - real_nloc,
+                #         dtype=paddle.int32,
+                #         place=env.DEVICE,
+                #     ),  # should be int of c++
+                # )
+                # g1_ext = ret[0].unsqueeze(0)
+                # if has_spin:
+                #     g1_real_ext, g1_virtual_ext = paddle.split(
+                #         g1_ext, [ng1, ng1], axis=2
+                #     )
+                #     g1_ext = concat_switch_virtual(
+                #         g1_real_ext, g1_virtual_ext, real_nloc
+                #     )
+            g1, g2, h2 = ll.forward(
+                g1_ext,
+                g2,
+                h2,
+                nlist,
+                nlist_mask,
+                sw,
+            )
+
+        # nb x nloc x 3 x ng2
+        h2g2 = RepformerLayer._cal_hg(
+            g2,
+            h2,
+            nlist_mask,
+            sw,
+            smooth=self.smooth,
+            epsilon=self.epsilon,
+            use_sqrt_nnei=self.use_sqrt_nnei,
+        )
+        # (nb x nloc) x ng2 x 3
+        rot_mat = paddle.transpose(h2g2, (0, 1, 3, 2))
+
+        return g1, g2, h2, rot_mat.reshape([nframes, nloc, self.dim_emb, 3]), sw
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        path: Optional[DPPath] = None,
+    ) -> None:
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        env_mat_stat = EnvMatStatSe(self)
+        if path is not None:
+            path = path / env_mat_stat.get_hash()
+        if path is None or not path.is_dir():
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
+        else:
+            sampled = []
+        env_mat_stat.load_or_compute_stats(sampled, path)
+        self.stats = env_mat_stat.stats
+        mean, stddev = env_mat_stat()
+        if not self.set_davg_zero:
+            paddle.assign(paddle.to_tensor(mean).to(device=env.DEVICE), self.mean)  # pylint: disable=no-explicit-dtype
+        paddle.assign(paddle.to_tensor(stddev).to(device=env.DEVICE), self.stddev)  # pylint: disable=no-explicit-dtype
+
+    def get_stats(self) -> dict[str, StatItem]:
+        """Get the statistics of the descriptor."""
+        if self.stats is None:
+            raise RuntimeError(
+                "The statistics of the descriptor has not been computed."
+            )
+        return self.stats
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor block has message passing."""
+        return True
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the descriptor block needs sorted nlist when using `forward_lower`."""
+        return False
diff --git a/deepmd/pd/model/descriptor/se_t_tebd.py b/deepmd/pd/model/descriptor/se_t_tebd.py
new file mode 100644
index 0000000000..a8b9a6a417
--- /dev/null
+++ b/deepmd/pd/model/descriptor/se_t_tebd.py
@@ -0,0 +1,931 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+from typing import (
+    Callable,
+    Optional,
+    Union,
+)
+
+import paddle
+
+from deepmd.dpmodel.utils import EnvMat as DPEnvMat
+from deepmd.dpmodel.utils.seed import (
+    child_seed,
+)
+from deepmd.pd.model.descriptor import (
+    DescriptorBlock,
+)
+from deepmd.pd.model.descriptor.env_mat import (
+    prod_env_mat,
+)
+from deepmd.pd.model.network.mlp import (
+    EmbeddingNet,
+    NetworkCollection,
+)
+from deepmd.pd.model.network.network import (
+    TypeEmbedNet,
+    TypeEmbedNetConsistent,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    PRECISION_DICT,
+    RESERVED_PRECISON_DICT,
+)
+from deepmd.pd.utils.env_mat_stat import (
+    EnvMatStatSe,
+)
+from deepmd.pd.utils.exclude_mask import (
+    PairExcludeMask,
+)
+from deepmd.pd.utils.update_sel import (
+    UpdateSel,
+)
+from deepmd.utils.data_system import (
+    DeepmdDataSystem,
+)
+from deepmd.utils.env_mat_stat import (
+    StatItem,
+)
+from deepmd.utils.finetune import (
+    get_index_between_two_maps,
+    map_pair_exclude_types,
+)
+from deepmd.utils.path import (
+    DPPath,
+)
+from deepmd.utils.version import (
+    check_version_compatibility,
+)
+
+from .base_descriptor import (
+    BaseDescriptor,
+)
+from .descriptor import (
+    extend_descrpt_stat,
+)
+
+
+@BaseDescriptor.register("se_e3_tebd")
+class DescrptSeTTebd(BaseDescriptor, paddle.nn.Layer):
+    r"""Construct an embedding net that takes angles between two neighboring atoms and type embeddings as input.
+
+    Parameters
+    ----------
+    rcut
+            The cut-off radius
+    rcut_smth
+            From where the environment matrix should be smoothed
+    sel : Union[list[int], int]
+            list[int]: sel[i] specifies the maxmum number of type i atoms in the cut-off radius
+            int: the total maxmum number of atoms in the cut-off radius
+    ntypes : int
+            Number of element types
+    neuron : list[int]
+            Number of neurons in each hidden layers of the embedding net
+    tebd_dim : int
+            Dimension of the type embedding
+    tebd_input_mode : str
+            The input mode of the type embedding. Supported modes are ["concat", "strip"].
+            - "concat": Concatenate the type embedding with the smoothed angular information as the union input for the embedding network.
+            - "strip": Use a separated embedding network for the type embedding and combine the output with the angular embedding network output.
+    resnet_dt
+            Time-step `dt` in the resnet construction:
+            y = x + dt * \phi (Wx + b)
+    set_davg_zero
+            Set the shift of embedding net input to zero.
+    activation_function
+            The activation function in the embedding net. Supported options are |ACTIVATION_FN|
+    env_protection: float
+            Protection parameter to prevent division by zero errors during environment matrix calculations.
+    exclude_types : list[tuple[int, int]]
+            The excluded pairs of types which have no interaction with each other.
+            For example, `[[0, 1]]` means no interaction between type 0 and type 1.
+    precision
+            The precision of the embedding net parameters. Supported options are |PRECISION|
+    trainable
+            If the weights of embedding net are trainable.
+    seed
+            Random seed for initializing the network parameters.
+    type_map: list[str], Optional
+            A list of strings. Give the name to each type of atoms.
+    concat_output_tebd: bool
+            Whether to concat type embedding at the output of the descriptor.
+    use_econf_tebd: bool, Optional
+            Whether to use electronic configuration type embedding.
+    use_tebd_bias : bool, Optional
+            Whether to use bias in the type embedding layer.
+    smooth: bool
+            Whether to use smooth process in calculation.
+
+    """
+
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: Union[list[int], int],
+        ntypes: int,
+        neuron: list = [2, 4, 8],
+        tebd_dim: int = 8,
+        tebd_input_mode: str = "concat",
+        resnet_dt: bool = False,
+        set_davg_zero: bool = True,
+        activation_function: str = "tanh",
+        env_protection: float = 0.0,
+        exclude_types: list[tuple[int, int]] = [],
+        precision: str = "float64",
+        trainable: bool = True,
+        seed: Optional[Union[int, list[int]]] = None,
+        type_map: Optional[list[str]] = None,
+        concat_output_tebd: bool = True,
+        use_econf_tebd: bool = False,
+        use_tebd_bias=False,
+        smooth: bool = True,
+    ) -> None:
+        super().__init__()
+        self.se_ttebd = DescrptBlockSeTTebd(
+            rcut,
+            rcut_smth,
+            sel,
+            ntypes,
+            neuron=neuron,
+            tebd_dim=tebd_dim,
+            tebd_input_mode=tebd_input_mode,
+            set_davg_zero=set_davg_zero,
+            activation_function=activation_function,
+            precision=precision,
+            resnet_dt=resnet_dt,
+            exclude_types=exclude_types,
+            env_protection=env_protection,
+            smooth=smooth,
+            seed=child_seed(seed, 1),
+        )
+        self.prec = PRECISION_DICT[precision]
+        self.use_econf_tebd = use_econf_tebd
+        self.type_map = type_map
+        self.smooth = smooth
+        self.type_embedding = TypeEmbedNet(
+            ntypes,
+            tebd_dim,
+            precision=precision,
+            seed=child_seed(seed, 2),
+            use_econf_tebd=use_econf_tebd,
+            type_map=type_map,
+            use_tebd_bias=use_tebd_bias,
+        )
+        self.tebd_dim = tebd_dim
+        self.tebd_input_mode = tebd_input_mode
+        self.concat_output_tebd = concat_output_tebd
+        self.trainable = trainable
+        # set trainable
+        for param in self.parameters():
+            param.stop_gradient = not trainable
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.se_ttebd.get_rcut()
+
+    def get_rcut_smth(self) -> float:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0."""
+        return self.se_ttebd.get_rcut_smth()
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return self.se_ttebd.get_nsel()
+
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.se_ttebd.get_sel()
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.se_ttebd.get_ntypes()
+
+    def get_type_map(self) -> list[str]:
+        """Get the name to each type of atoms."""
+        return self.type_map
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        ret = self.se_ttebd.get_dim_out()
+        if self.concat_output_tebd:
+            ret += self.tebd_dim
+        return ret
+
+    def get_dim_emb(self) -> int:
+        return self.se_ttebd.dim_emb
+
+    def mixed_types(self) -> bool:
+        """If true, the descriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the descriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return self.se_ttebd.mixed_types()
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor has message passing."""
+        return self.se_ttebd.has_message_passing()
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the descriptor needs sorted nlist when using `forward_lower`."""
+        return self.se_ttebd.need_sorted_nlist_for_lower()
+
+    def get_env_protection(self) -> float:
+        """Returns the protection of building environment matrix."""
+        return self.se_ttebd.get_env_protection()
+
+    def share_params(self, base_class, shared_level, resume=False) -> None:
+        """
+        Share the parameters of self to the base_class with shared_level during multitask training.
+        If not start from checkpoint (resume is False),
+        some separated parameters (e.g. mean and stddev) will be re-calculated across different classes.
+        """
+        assert (
+            self.__class__ == base_class.__class__
+        ), "Only descriptors of the same type can share params!"
+        # For DPA1 descriptors, the user-defined share-level
+        # shared_level: 0
+        # share all parameters in both type_embedding and se_ttebd
+        if shared_level == 0:
+            self._sub_layers["type_embedding"] = base_class._sub_layers[
+                "type_embedding"
+            ]
+            self.se_ttebd.share_params(base_class.se_ttebd, 0, resume=resume)
+        # shared_level: 1
+        # share all parameters in type_embedding
+        elif shared_level == 1:
+            self._sub_layers["type_embedding"] = base_class._sub_layers[
+                "type_embedding"
+            ]
+        # Other shared levels
+        else:
+            raise NotImplementedError
+
+    @property
+    def dim_out(self):
+        return self.get_dim_out()
+
+    @property
+    def dim_emb(self):
+        return self.get_dim_emb()
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        path: Optional[DPPath] = None,
+    ):
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        return self.se_ttebd.compute_input_stats(merged, path)
+
+    def set_stat_mean_and_stddev(
+        self,
+        mean: paddle.Tensor,
+        stddev: paddle.Tensor,
+    ) -> None:
+        """Update mean and stddev for descriptor."""
+        self.se_ttebd.mean = mean
+        self.se_ttebd.stddev = stddev
+
+    def get_stat_mean_and_stddev(self) -> tuple[paddle.Tensor, paddle.Tensor]:
+        """Get mean and stddev for descriptor."""
+        return self.se_ttebd.mean, self.se_ttebd.stddev
+
+    def change_type_map(
+        self, type_map: list[str], model_with_new_type_stat=None
+    ) -> None:
+        """Change the type related params to new ones, according to `type_map` and the original one in the model.
+        If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
+        """
+        assert (
+            self.type_map is not None
+        ), "'type_map' must be defined when performing type changing!"
+        remap_index, has_new_type = get_index_between_two_maps(self.type_map, type_map)
+        obj = self.se_ttebd
+        obj.ntypes = len(type_map)
+        self.type_map = type_map
+        self.type_embedding.change_type_map(type_map=type_map)
+        obj.reinit_exclude(map_pair_exclude_types(obj.exclude_types, remap_index))
+        if has_new_type:
+            # the avg and std of new types need to be updated
+            extend_descrpt_stat(
+                obj,
+                type_map,
+                des_with_stat=model_with_new_type_stat.se_ttebd
+                if model_with_new_type_stat is not None
+                else None,
+            )
+        obj["davg"] = obj["davg"][remap_index]
+        obj["dstd"] = obj["dstd"][remap_index]
+
+    def serialize(self) -> dict:
+        obj = self.se_ttebd
+        data = {
+            "@class": "Descriptor",
+            "type": "se_e3_tebd",
+            "@version": 1,
+            "rcut": obj.rcut,
+            "rcut_smth": obj.rcut_smth,
+            "sel": obj.sel,
+            "ntypes": obj.ntypes,
+            "neuron": obj.neuron,
+            "tebd_dim": obj.tebd_dim,
+            "tebd_input_mode": obj.tebd_input_mode,
+            "set_davg_zero": obj.set_davg_zero,
+            "activation_function": obj.activation_function,
+            "resnet_dt": obj.resnet_dt,
+            "concat_output_tebd": self.concat_output_tebd,
+            "use_econf_tebd": self.use_econf_tebd,
+            "type_map": self.type_map,
+            # make deterministic
+            "precision": RESERVED_PRECISON_DICT[obj.prec],
+            "embeddings": obj.filter_layers.serialize(),
+            "env_mat": DPEnvMat(obj.rcut, obj.rcut_smth).serialize(),
+            "type_embedding": self.type_embedding.embedding.serialize(),
+            "exclude_types": obj.exclude_types,
+            "env_protection": obj.env_protection,
+            "smooth": self.smooth,
+            "@variables": {
+                "davg": obj["davg"].numpy(),
+                "dstd": obj["dstd"].numpy(),
+            },
+            "trainable": self.trainable,
+        }
+        if obj.tebd_input_mode in ["strip"]:
+            data.update({"embeddings_strip": obj.filter_layers_strip.serialize()})
+        return data
+
+    @classmethod
+    def deserialize(cls, data: dict) -> "DescrptSeTTebd":
+        data = data.copy()
+        check_version_compatibility(data.pop("@version"), 1, 1)
+        data.pop("@class")
+        data.pop("type")
+        variables = data.pop("@variables")
+        embeddings = data.pop("embeddings")
+        type_embedding = data.pop("type_embedding")
+        env_mat = data.pop("env_mat")
+        tebd_input_mode = data["tebd_input_mode"]
+        if tebd_input_mode in ["strip"]:
+            embeddings_strip = data.pop("embeddings_strip")
+        else:
+            embeddings_strip = None
+        obj = cls(**data)
+
+        def t_cvt(xx):
+            return paddle.to_tensor(xx, dtype=obj.se_ttebd.prec).to(device=env.DEVICE)
+
+        obj.type_embedding.embedding = TypeEmbedNetConsistent.deserialize(
+            type_embedding
+        )
+        obj.se_ttebd["davg"] = t_cvt(variables["davg"])
+        obj.se_ttebd["dstd"] = t_cvt(variables["dstd"])
+        obj.se_ttebd.filter_layers = NetworkCollection.deserialize(embeddings)
+        if tebd_input_mode in ["strip"]:
+            obj.se_ttebd.filter_layers_strip = NetworkCollection.deserialize(
+                embeddings_strip
+            )
+        return obj
+
+    def forward(
+        self,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        nlist: paddle.Tensor,
+        mapping: Optional[paddle.Tensor] = None,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        extended_coord
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        extended_atype
+            The extended aotm types. shape: nf x nall
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        mapping
+            The index mapping, not required by this descriptor.
+        comm_dict
+            The data needed for communication for parallel inference.
+
+        Returns
+        -------
+        descriptor
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        g2
+            The rotationally invariant pair-partical representation.
+            shape: nf x nloc x nnei x ng
+        h2
+            The rotationally equivariant pair-partical representation.
+            shape: nf x nloc x nnei x 3
+        sw
+            The smooth switch function. shape: nf x nloc x nnei
+
+        """
+        # cast the input to internal precsion
+        extended_coord = extended_coord.to(dtype=self.prec)
+        del mapping
+        nframes, nloc, nnei = nlist.shape
+        nall = extended_coord.reshape([nframes, -1]).shape[1] // 3
+        g1_ext = self.type_embedding(extended_atype)
+        g1_inp = g1_ext[:, :nloc, :]
+        if self.tebd_input_mode in ["strip"]:
+            type_embedding = self.type_embedding.get_full_embedding(g1_ext.place)
+        else:
+            type_embedding = None
+        g1, _, _, _, sw = self.se_ttebd(
+            nlist,
+            extended_coord,
+            extended_atype,
+            g1_ext,
+            mapping=None,
+            type_embedding=type_embedding,
+        )
+        if self.concat_output_tebd:
+            g1 = paddle.concat([g1, g1_inp], axis=-1)
+
+        return (
+            g1.to(dtype=env.GLOBAL_PD_FLOAT_PRECISION),
+            None,
+            None,
+            None,
+            sw.to(dtype=env.GLOBAL_PD_FLOAT_PRECISION),
+        )
+
+    @classmethod
+    def update_sel(
+        cls,
+        train_data: DeepmdDataSystem,
+        type_map: Optional[list[str]],
+        local_jdata: dict,
+    ) -> tuple[dict, Optional[float]]:
+        """Update the selection and perform neighbor statistics.
+
+        Parameters
+        ----------
+        train_data : DeepmdDataSystem
+            data used to do neighbor statistics
+        type_map : list[str], optional
+            The name of each type of atoms
+        local_jdata : dict
+            The local data refer to the current class
+
+        Returns
+        -------
+        dict
+            The updated local data
+        float
+            The minimum distance between two atoms
+        """
+        local_jdata_cpy = local_jdata.copy()
+        min_nbor_dist, sel = UpdateSel().update_one_sel(
+            train_data, type_map, local_jdata_cpy["rcut"], local_jdata_cpy["sel"], True
+        )
+        local_jdata_cpy["sel"] = sel[0]
+        return local_jdata_cpy, min_nbor_dist
+
+
+@DescriptorBlock.register("se_ttebd")
+class DescrptBlockSeTTebd(DescriptorBlock):
+    def __init__(
+        self,
+        rcut: float,
+        rcut_smth: float,
+        sel: Union[list[int], int],
+        ntypes: int,
+        neuron: list = [25, 50, 100],
+        tebd_dim: int = 8,
+        tebd_input_mode: str = "concat",
+        set_davg_zero: bool = True,
+        activation_function="tanh",
+        precision: str = "float64",
+        resnet_dt: bool = False,
+        exclude_types: list[tuple[int, int]] = [],
+        env_protection: float = 0.0,
+        smooth: bool = True,
+        seed: Optional[Union[int, list[int]]] = None,
+    ) -> None:
+        super().__init__()
+        self.rcut = float(rcut)
+        self.rcut_smth = float(rcut_smth)
+        self.neuron = neuron
+        self.filter_neuron = self.neuron
+        self.tebd_dim = tebd_dim
+        self.tebd_input_mode = tebd_input_mode
+        self.set_davg_zero = set_davg_zero
+        self.activation_function = activation_function
+        self.precision = precision
+        self.prec = PRECISION_DICT[self.precision]
+        self.resnet_dt = resnet_dt
+        self.env_protection = env_protection
+        self.seed = seed
+        self.smooth = smooth
+
+        if isinstance(sel, int):
+            sel = [sel]
+
+        self.ntypes = ntypes
+        self.sel = sel
+        self.sec = self.sel
+        self.split_sel = self.sel
+        self.nnei = sum(sel)
+        self.ndescrpt = self.nnei * 4
+        # order matters, placed after the assignment of self.ntypes
+        self.reinit_exclude(exclude_types)
+
+        wanted_shape = (self.ntypes, self.nnei, 4)
+        mean = paddle.zeros(wanted_shape, dtype=env.GLOBAL_PD_FLOAT_PRECISION).to(
+            device=env.DEVICE
+        )
+        stddev = paddle.ones(wanted_shape, dtype=env.GLOBAL_PD_FLOAT_PRECISION).to(
+            device=env.DEVICE
+        )
+        self.register_buffer("mean", mean)
+        self.register_buffer("stddev", stddev)
+        self.tebd_dim_input = self.tebd_dim * 2
+        if self.tebd_input_mode in ["concat"]:
+            self.embd_input_dim = 1 + self.tebd_dim_input
+        else:
+            self.embd_input_dim = 1
+
+        self.filter_layers = None
+        self.filter_layers_strip = None
+        filter_layers = NetworkCollection(
+            ndim=0, ntypes=self.ntypes, network_type="embedding_network"
+        )
+        filter_layers[0] = EmbeddingNet(
+            self.embd_input_dim,
+            self.filter_neuron,
+            activation_function=self.activation_function,
+            precision=self.precision,
+            resnet_dt=self.resnet_dt,
+            seed=child_seed(self.seed, 1),
+        )
+        self.filter_layers = filter_layers
+        if self.tebd_input_mode in ["strip"]:
+            filter_layers_strip = NetworkCollection(
+                ndim=0, ntypes=self.ntypes, network_type="embedding_network"
+            )
+            filter_layers_strip[0] = EmbeddingNet(
+                self.tebd_dim_input,
+                self.filter_neuron,
+                activation_function=self.activation_function,
+                precision=self.precision,
+                resnet_dt=self.resnet_dt,
+                seed=child_seed(self.seed, 2),
+            )
+            self.filter_layers_strip = filter_layers_strip
+        self.stats = None
+
+    def get_rcut(self) -> float:
+        """Returns the cut-off radius."""
+        return self.rcut
+
+    def get_rcut_smth(self) -> float:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0."""
+        return self.rcut_smth
+
+    def get_nsel(self) -> int:
+        """Returns the number of selected atoms in the cut-off radius."""
+        return sum(self.sel)
+
+    def get_sel(self) -> list[int]:
+        """Returns the number of selected atoms for each type."""
+        return self.sel
+
+    def get_ntypes(self) -> int:
+        """Returns the number of element types."""
+        return self.ntypes
+
+    def get_dim_in(self) -> int:
+        """Returns the input dimension."""
+        return self.dim_in
+
+    def get_dim_out(self) -> int:
+        """Returns the output dimension."""
+        return self.dim_out
+
+    def get_dim_emb(self) -> int:
+        """Returns the output dimension of embedding."""
+        return self.filter_neuron[-1]
+
+    def __setitem__(self, key, value) -> None:
+        if key in ("avg", "data_avg", "davg"):
+            self.mean = value
+        elif key in ("std", "data_std", "dstd"):
+            self.stddev = value
+        else:
+            raise KeyError(key)
+
+    def __getitem__(self, key):
+        if key in ("avg", "data_avg", "davg"):
+            return self.mean
+        elif key in ("std", "data_std", "dstd"):
+            return self.stddev
+        else:
+            raise KeyError(key)
+
+    def mixed_types(self) -> bool:
+        """If true, the descriptor
+        1. assumes total number of atoms aligned across frames;
+        2. requires a neighbor list that does not distinguish different atomic types.
+
+        If false, the descriptor
+        1. assumes total number of atoms of each atom type aligned across frames;
+        2. requires a neighbor list that distinguishes different atomic types.
+
+        """
+        return True
+
+    def get_env_protection(self) -> float:
+        """Returns the protection of building environment matrix."""
+        return self.env_protection
+
+    @property
+    def dim_out(self):
+        """Returns the output dimension of this descriptor."""
+        return self.filter_neuron[-1]
+
+    @property
+    def dim_in(self):
+        """Returns the atomic input dimension of this descriptor."""
+        return self.tebd_dim
+
+    @property
+    def dim_emb(self):
+        """Returns the output dimension of embedding."""
+        return self.get_dim_emb()
+
+    def compute_input_stats(
+        self,
+        merged: Union[Callable[[], list[dict]], list[dict]],
+        path: Optional[DPPath] = None,
+    ) -> None:
+        """
+        Compute the input statistics (e.g. mean and stddev) for the descriptors from packed data.
+
+        Parameters
+        ----------
+        merged : Union[Callable[[], list[dict]], list[dict]]
+            - list[dict]: A list of data samples from various data systems.
+                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
+                originating from the `i`-th data system.
+            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
+                only when needed. Since the sampling process can be slow and memory-intensive,
+                the lazy function helps by only sampling once.
+        path : Optional[DPPath]
+            The path to the stat file.
+
+        """
+        env_mat_stat = EnvMatStatSe(self)
+        if path is not None:
+            path = path / env_mat_stat.get_hash()
+        if path is None or not path.is_dir():
+            if callable(merged):
+                # only get data for once
+                sampled = merged()
+            else:
+                sampled = merged
+        else:
+            sampled = []
+        env_mat_stat.load_or_compute_stats(sampled, path)
+        self.stats = env_mat_stat.stats
+        mean, stddev = env_mat_stat()
+        if not self.set_davg_zero:
+            paddle.assign(paddle.to_tensor(mean).to(device=env.DEVICE), self.mean)  # pylint: disable=no-explicit-dtype
+        paddle.assign(paddle.to_tensor(stddev).to(device=env.DEVICE), self.stddev)  # pylint: disable=no-explicit-dtype
+
+    def get_stats(self) -> dict[str, StatItem]:
+        """Get the statistics of the descriptor."""
+        if self.stats is None:
+            raise RuntimeError(
+                "The statistics of the descriptor has not been computed."
+            )
+        return self.stats
+
+    def reinit_exclude(
+        self,
+        exclude_types: list[tuple[int, int]] = [],
+    ) -> None:
+        self.exclude_types = exclude_types
+        self.emask = PairExcludeMask(self.ntypes, exclude_types=exclude_types)
+
+    def forward(
+        self,
+        nlist: paddle.Tensor,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        extended_atype_embd: Optional[paddle.Tensor] = None,
+        mapping: Optional[paddle.Tensor] = None,
+        type_embedding: Optional[paddle.Tensor] = None,
+    ):
+        """Compute the descriptor.
+
+        Parameters
+        ----------
+        nlist
+            The neighbor list. shape: nf x nloc x nnei
+        extended_coord
+            The extended coordinates of atoms. shape: nf x (nallx3)
+        extended_atype
+            The extended aotm types. shape: nf x nall x nt
+        extended_atype_embd
+            The extended type embedding of atoms. shape: nf x nall
+        mapping
+            The index mapping, not required by this descriptor.
+        type_embedding
+            Full type embeddings. shape: (ntypes+1) x nt
+            Required for stripped type embeddings.
+
+        Returns
+        -------
+        result
+            The descriptor. shape: nf x nloc x (ng x axis_neuron)
+        g2
+            The rotationally invariant pair-partical representation.
+            shape: nf x nloc x nnei x ng
+        h2
+            The rotationally equivariant pair-partical representation.
+            shape: nf x nloc x nnei x 3
+        gr
+            The rotationally equivariant and permutationally invariant single particle
+            representation. shape: nf x nloc x ng x 3
+        sw
+            The smooth switch function. shape: nf x nloc x nnei
+
+        """
+        del mapping
+        assert extended_atype_embd is not None
+        nframes, nloc, nnei = nlist.shape
+        atype = extended_atype[:, :nloc]
+        nb = nframes
+        nall = extended_coord.reshape([nb, -1, 3]).shape[1]
+        dmatrix, diff, sw = prod_env_mat(
+            extended_coord,
+            nlist,
+            atype,
+            self.mean,
+            self.stddev,
+            self.rcut,
+            self.rcut_smth,
+            protection=self.env_protection,
+        )
+        # nb x nloc x nnei
+        exclude_mask = self.emask(nlist, extended_atype)
+        nlist = paddle.where(exclude_mask != 0, nlist, paddle.full_like(nlist, -1))
+        nlist_mask = nlist != -1
+        nlist = paddle.where(nlist == -1, paddle.zeros_like(nlist), nlist)
+        sw = paddle.squeeze(sw, -1)
+        # nf x nall x nt
+        nt = extended_atype_embd.shape[-1]
+        # beyond the cutoff sw should be 0.0
+        sw = sw.masked_fill(~nlist_mask, 0.0)
+        # (nb x nloc) x nnei
+        exclude_mask = exclude_mask.reshape([nb * nloc, nnei])
+        assert self.filter_layers is not None
+        # nfnl x nnei x 4
+        dmatrix = dmatrix.reshape([-1, self.nnei, 4])
+        nfnl = dmatrix.shape[0]
+        # nfnl x nnei x 4
+        rr = dmatrix
+        rr = rr * exclude_mask[:, :, None].astype(rr.dtype)
+
+        # nfnl x nt_i x 3
+        rr_i = rr[:, :, 1:]
+        # nfnl x nt_j x 3
+        rr_j = rr[:, :, 1:]
+        # nfnl x nt_i x nt_j
+        # env_ij = paddle.einsum("ijm,ikm->ijk", rr_i, rr_j)
+        env_ij = (
+            # ij1m x i1km -> ijkm -> ijk
+            rr_i.unsqueeze(2) * rr_j.unsqueeze(1)
+        ).sum(-1)
+        # nfnl x nt_i x nt_j x 1
+        ss = env_ij.unsqueeze(-1)
+        if self.tebd_input_mode in ["concat"]:
+            atype_tebd_ext = extended_atype_embd
+            # nb x (nloc x nnei) x nt
+            index = nlist.reshape([nb, nloc * nnei]).unsqueeze(-1).expand([-1, -1, nt])
+            # nb x (nloc x nnei) x nt
+            # atype_tebd_nlist = paddle.take_along_axis(atype_tebd_ext, axis=1, index=index)
+            atype_tebd_nlist = paddle.take_along_axis(
+                atype_tebd_ext, axis=1, indices=index
+            )
+            # nb x nloc x nnei x nt
+            atype_tebd_nlist = atype_tebd_nlist.reshape([nb, nloc, nnei, nt])
+            # nfnl x nnei x tebd_dim
+            nlist_tebd = atype_tebd_nlist.reshape([nfnl, nnei, self.tebd_dim])
+            # nfnl x nt_i x nt_j x tebd_dim
+            nlist_tebd_i = nlist_tebd.unsqueeze(2).expand([-1, -1, self.nnei, -1])
+            nlist_tebd_j = nlist_tebd.unsqueeze(1).expand([-1, self.nnei, -1, -1])
+            # nfnl x nt_i x nt_j x (1 + tebd_dim * 2)
+            ss = paddle.concat([ss, nlist_tebd_i, nlist_tebd_j], axis=-1)
+            # nfnl x nt_i x nt_j x ng
+            gg = self.filter_layers.networks[0](ss)
+        elif self.tebd_input_mode in ["strip"]:
+            # nfnl x nt_i x nt_j x ng
+            gg_s = self.filter_layers.networks[0](ss)
+            assert self.filter_layers_strip is not None
+            assert type_embedding is not None
+            ng = self.filter_neuron[-1]
+            ntypes_with_padding = type_embedding.shape[0]
+            # nf x (nl x nnei)
+            nlist_index = nlist.reshape([nb, nloc * nnei])
+            # nf x (nl x nnei)
+            nei_type = paddle.take_along_axis(
+                extended_atype, indices=nlist_index, axis=1
+            )
+            # nfnl x nnei
+            nei_type = nei_type.reshape([nfnl, nnei])
+            # nfnl x nnei x nnei
+            nei_type_i = nei_type.unsqueeze(2).expand([-1, -1, nnei])
+            nei_type_j = nei_type.unsqueeze(1).expand([-1, nnei, -1])
+            idx_i = nei_type_i * ntypes_with_padding
+            idx_j = nei_type_j
+            # (nf x nl x nt_i x nt_j) x ng
+            idx = (
+                (idx_i + idx_j)
+                .reshape([-1, 1])
+                .expand([-1, ng])
+                .astype(paddle.int64)
+                .to(paddle.int64)
+            )
+            # ntypes * (ntypes) * nt
+            type_embedding_i = paddle.tile(
+                type_embedding.reshape([ntypes_with_padding, 1, nt]),
+                [1, ntypes_with_padding, 1],
+            )
+            # (ntypes) * ntypes * nt
+            type_embedding_j = paddle.tile(
+                type_embedding.reshape([1, ntypes_with_padding, nt]),
+                [ntypes_with_padding, 1, 1],
+            )
+            # (ntypes * ntypes) * (nt+nt)
+            two_side_type_embedding = paddle.concat(
+                [type_embedding_i, type_embedding_j], -1
+            ).reshape([-1, nt * 2])
+            tt_full = self.filter_layers_strip.networks[0](two_side_type_embedding)
+            # (nfnl x nt_i x nt_j) x ng
+            gg_t = paddle.take_along_axis(tt_full, indices=idx, axis=0)
+            # (nfnl x nt_i x nt_j) x ng
+            gg_t = gg_t.reshape([nfnl, nnei, nnei, ng])
+            if self.smooth:
+                gg_t = (
+                    gg_t
+                    * sw.reshape([nfnl, self.nnei, 1, 1])
+                    * sw.reshape([nfnl, 1, self.nnei, 1])
+                )
+            # nfnl x nt_i x nt_j x ng
+            gg = gg_s * gg_t + gg_s
+        else:
+            raise NotImplementedError
+
+        # nfnl x ng
+        # res_ij = paddle.einsum("ijk,ijkm->im", env_ij, gg)
+        res_ij = (
+            # ijk1 x ijkm -> ijkm -> im
+            env_ij.unsqueeze(-1) * gg
+        ).sum([1, 2])
+        res_ij = res_ij * (1.0 / float(self.nnei) / float(self.nnei))
+        # nf x nl x ng
+        result = res_ij.reshape([nframes, nloc, self.filter_neuron[-1]])
+        return (
+            result,
+            None,
+            None,
+            None,
+            sw,
+        )
+
+    def has_message_passing(self) -> bool:
+        """Returns whether the descriptor block has message passing."""
+        return False
+
+    def need_sorted_nlist_for_lower(self) -> bool:
+        """Returns whether the descriptor block needs sorted nlist when using `forward_lower`."""
+        return False
diff --git a/deepmd/pd/model/task/fitting.py b/deepmd/pd/model/task/fitting.py
index d9db44aff5..6e96b7b081 100644
--- a/deepmd/pd/model/task/fitting.py
+++ b/deepmd/pd/model/task/fitting.py
@@ -211,8 +211,8 @@ def __init__(
         if self.dim_case_embd > 0:
             self.register_buffer(
                 "case_embd",
-                paddle.zeros(self.dim_case_embd, dtype=self.prec, place=device),
-                # paddle.eye(self.dim_case_embd, dtype=self.prec, place=device)[0],
+                paddle.zeros(self.dim_case_embd, dtype=self.prec).to(device=device),
+                # paddle.eye(self.dim_case_embd, dtype=self.prec).to(device=device)[0],
             )
         else:
             self.case_embd = None
diff --git a/deepmd/pd/utils/auto_batch_size.py b/deepmd/pd/utils/auto_batch_size.py
index 8cdb5ddea2..0eb5e46d5f 100644
--- a/deepmd/pd/utils/auto_batch_size.py
+++ b/deepmd/pd/utils/auto_batch_size.py
@@ -49,12 +49,8 @@ def is_oom_error(self, e: Exception) -> bool:
         # several sources think CUSOLVER_STATUS_INTERNAL_ERROR is another out-of-memory error,
         # such as https://github.com/JuliaGPU/CUDA.jl/issues/1924
         # (the meaningless error message should be considered as a bug in cusolver)
-        if isinstance(e, RuntimeError) and (
-            "CUDA out of memory." in e.args[0]
-            or "CUDA driver error: out of memory" in e.args[0]
-            or "cusolver error: CUSOLVER_STATUS_INTERNAL_ERROR" in e.args[0]
-        ):
+        if isinstance(e, MemoryError) and ("ResourceExhaustedError" in e.args[0]):
             # Release all unoccupied cached memory
-            # paddle.device.cuda.empty_cache()
+            paddle.device.cuda.empty_cache()
             return True
         return False
diff --git a/deepmd/pd/utils/dataloader.py b/deepmd/pd/utils/dataloader.py
index 9d59ea0da7..80b3e7cb8b 100644
--- a/deepmd/pd/utils/dataloader.py
+++ b/deepmd/pd/utils/dataloader.py
@@ -36,6 +36,10 @@
 from deepmd.pd.utils.dataset import (
     DeepmdDataSetForLoader,
 )
+from deepmd.pt.utils.utils import (
+    mix_entropy,
+)
+from deepmd.utils import random as dp_random
 from deepmd.utils.data import (
     DataRequirementItem,
 )
@@ -50,8 +54,13 @@
 
 
 def setup_seed(seed):
-    paddle.seed(seed)
+    if isinstance(seed, (list, tuple)):
+        mixed_seed = mix_entropy(seed)
+    else:
+        mixed_seed = seed
+    paddle.seed(mixed_seed)
     os.environ["FLAGS_cudnn_deterministic"] = "True"
+    dp_random.seed(seed)
 
 
 class DpLoaderSet(Dataset):
diff --git a/deepmd/pd/utils/multi_task.py b/deepmd/pd/utils/multi_task.py
index 680dc53c79..321883c12e 100644
--- a/deepmd/pd/utils/multi_task.py
+++ b/deepmd/pd/utils/multi_task.py
@@ -96,7 +96,9 @@ def preprocess_shared_params(model_config):
     shared_links = {}
     type_map_keys = []
 
-    def replace_one_item(params_dict, key_type, key_in_dict, suffix="", index=None):
+    def replace_one_item(
+        params_dict, key_type, key_in_dict, suffix="", index=None
+    ) -> None:
         shared_type = key_type
         shared_key = key_in_dict
         shared_level = 0
diff --git a/deepmd/pd/utils/region.py b/deepmd/pd/utils/region.py
index f3e3eaa52d..d2600ef16e 100644
--- a/deepmd/pd/utils/region.py
+++ b/deepmd/pd/utils/region.py
@@ -108,5 +108,5 @@ def normalize_coord(
 
     """
     icoord = phys2inter(coord, cell)
-    icoord = paddle.remainder(icoord, paddle.full([], 1.0))
+    icoord = paddle.remainder(icoord, paddle.full([], 1.0, dtype=icoord.dtype))
     return inter2phys(icoord, cell)
diff --git a/deepmd/pd/utils/spin.py b/deepmd/pd/utils/spin.py
new file mode 100644
index 0000000000..934fb3762a
--- /dev/null
+++ b/deepmd/pd/utils/spin.py
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+
+import paddle
+
+
+def concat_switch_virtual(
+    extended_tensor,
+    extended_tensor_virtual,
+    nloc: int,
+):
+    """
+    Concat real and virtual extended tensors, and switch all the local ones to the first nloc * 2 atoms.
+    - [:, :nloc]: original nloc real atoms.
+    - [:, nloc: nloc + nloc]: virtual atoms corresponding to nloc real atoms.
+    - [:, nloc + nloc: nloc + nall]: ghost real atoms.
+    - [:, nloc + nall: nall + nall]: virtual atoms corresponding to ghost real atoms.
+    """
+    nframes, nall = extended_tensor.shape[:2]
+    out_shape = list(extended_tensor.shape)
+    out_shape[1] *= 2
+    extended_tensor_updated = paddle.zeros(
+        out_shape,
+        dtype=extended_tensor.dtype,
+        device=extended_tensor.place,
+    )
+    extended_tensor_updated[:, :nloc] = extended_tensor[:, :nloc]
+    extended_tensor_updated[:, nloc : nloc + nloc] = extended_tensor_virtual[:, :nloc]
+    extended_tensor_updated[:, nloc + nloc : nloc + nall] = extended_tensor[:, nloc:]
+    extended_tensor_updated[:, nloc + nall :] = extended_tensor_virtual[:, nloc:]
+    return extended_tensor_updated.reshape(out_shape)
diff --git a/deepmd/pt/entrypoints/main.py b/deepmd/pt/entrypoints/main.py
index 1e5314a821..fd4be73e84 100644
--- a/deepmd/pt/entrypoints/main.py
+++ b/deepmd/pt/entrypoints/main.py
@@ -138,7 +138,7 @@ def prepare_trainer_input_single(
 
         # validation and training data
         # avoid the same batch sequence among devices
-        rank_seed = (seed + rank) % (2**32) if seed is not None else None
+        rank_seed = [rank, seed % (2**32)] if seed is not None else None
         validation_data_single = (
             DpLoaderSet(
                 validation_systems,
diff --git a/deepmd/pt/infer/deep_eval.py b/deepmd/pt/infer/deep_eval.py
index 59b833d34c..facead838e 100644
--- a/deepmd/pt/infer/deep_eval.py
+++ b/deepmd/pt/infer/deep_eval.py
@@ -184,6 +184,15 @@ def get_dim_aparam(self) -> int:
     def get_intensive(self) -> bool:
         return self.dp.model["Default"].get_intensive()
 
+    def get_var_name(self) -> str:
+        """Get the name of the property."""
+        if hasattr(self.dp.model["Default"], "get_var_name") and callable(
+            getattr(self.dp.model["Default"], "get_var_name")
+        ):
+            return self.dp.model["Default"].get_var_name()
+        else:
+            raise NotImplementedError
+
     @property
     def model_type(self) -> type["DeepEvalWrapper"]:
         """The the evaluator of the model type."""
@@ -200,7 +209,7 @@ def model_type(self) -> type["DeepEvalWrapper"]:
             return DeepGlobalPolar
         elif "wfc" in model_output_type:
             return DeepWFC
-        elif "property" in model_output_type:
+        elif self.get_var_name() in model_output_type:
             return DeepProperty
         else:
             raise RuntimeError("Unknown model type")
diff --git a/deepmd/pt/loss/property.py b/deepmd/pt/loss/property.py
index 07e394650a..9d42c81b45 100644
--- a/deepmd/pt/loss/property.py
+++ b/deepmd/pt/loss/property.py
@@ -1,5 +1,8 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
 import logging
+from typing import (
+    Union,
+)
 
 import torch
 import torch.nn.functional as F
@@ -21,9 +24,13 @@ class PropertyLoss(TaskLoss):
     def __init__(
         self,
         task_dim,
+        var_name: str,
         loss_func: str = "smooth_mae",
         metric: list = ["mae"],
         beta: float = 1.00,
+        out_bias: Union[list, None] = None,
+        out_std: Union[list, None] = None,
+        intensive: bool = False,
         **kwargs,
     ) -> None:
         r"""Construct a layer to compute loss on property.
@@ -32,18 +39,32 @@ def __init__(
         ----------
         task_dim : float
             The output dimension of property fitting net.
+        var_name : str
+            The atomic property to fit, 'energy', 'dipole', and 'polar'.
         loss_func : str
             The loss function, such as "smooth_mae", "mae", "rmse".
         metric : list
             The metric such as mae, rmse which will be printed.
-        beta:
+        beta : float
             The 'beta' parameter in 'smooth_mae' loss.
+        out_bias : Union[list, None]
+            It is the average value of the label. The shape is nkeys * ntypes * task_dim.
+            In property fitting, nkeys = 1, so the shape is 1 * ntypes * task_dim.
+        out_std : Union[list, None]
+            It is the standard deviation of the label. The shape is nkeys * ntypes * task_dim.
+            In property fitting, nkeys = 1, so the shape is 1 * ntypes * task_dim.
+        intensive : bool
+            Whether the property is intensive.
         """
         super().__init__()
         self.task_dim = task_dim
         self.loss_func = loss_func
         self.metric = metric
         self.beta = beta
+        self.out_bias = out_bias
+        self.out_std = out_std
+        self.intensive = intensive
+        self.var_name = var_name
 
     def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False):
         """Return loss on properties .
@@ -69,34 +90,64 @@ def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False
             Other losses for display.
         """
         model_pred = model(**input_dict)
-        assert label["property"].shape[-1] == self.task_dim
-        assert model_pred["property"].shape[-1] == self.task_dim
+        var_name = self.var_name
+        nbz = model_pred[var_name].shape[0]
+        assert model_pred[var_name].shape == (nbz, self.task_dim)
+        assert label[var_name].shape == (nbz, self.task_dim)
+        if not self.intensive:
+            model_pred[var_name] = model_pred[var_name] / natoms
+            label[var_name] = label[var_name] / natoms
+
+        if self.out_std is None:
+            out_std = model.atomic_model.out_std[0][0]
+        else:
+            out_std = torch.tensor(
+                self.out_std, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+            )
+        if out_std.shape != (self.task_dim,):
+            raise ValueError(
+                f"Expected out_std to have shape ({self.task_dim},), but got {out_std.shape}"
+            )
+
+        if self.out_bias is None:
+            out_bias = model.atomic_model.out_bias[0][0]
+        else:
+            out_bias = torch.tensor(
+                self.out_bias, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE
+            )
+        if out_bias.shape != (self.task_dim,):
+            raise ValueError(
+                f"Expected out_bias to have shape ({self.task_dim},), but got {out_bias.shape}"
+            )
+
         loss = torch.zeros(1, dtype=env.GLOBAL_PT_FLOAT_PRECISION, device=env.DEVICE)[0]
         more_loss = {}
 
         # loss
         if self.loss_func == "smooth_mae":
             loss += F.smooth_l1_loss(
-                label["property"],
-                model_pred["property"],
+                (label[var_name] - out_bias) / out_std,
+                (model_pred[var_name] - out_bias) / out_std,
                 reduction="sum",
                 beta=self.beta,
             )
         elif self.loss_func == "mae":
             loss += F.l1_loss(
-                label["property"], model_pred["property"], reduction="sum"
+                (label[var_name] - out_bias) / out_std,
+                (model_pred[var_name] - out_bias) / out_std,
+                reduction="sum",
             )
         elif self.loss_func == "mse":
             loss += F.mse_loss(
-                label["property"],
-                model_pred["property"],
+                (label[var_name] - out_bias) / out_std,
+                (model_pred[var_name] - out_bias) / out_std,
                 reduction="sum",
             )
         elif self.loss_func == "rmse":
             loss += torch.sqrt(
                 F.mse_loss(
-                    label["property"],
-                    model_pred["property"],
+                    (label[var_name] - out_bias) / out_std,
+                    (model_pred[var_name] - out_bias) / out_std,
                     reduction="mean",
                 )
             )
@@ -106,28 +157,28 @@ def forward(self, input_dict, model, label, natoms, learning_rate=0.0, mae=False
         # more loss
         if "smooth_mae" in self.metric:
             more_loss["smooth_mae"] = F.smooth_l1_loss(
-                label["property"],
-                model_pred["property"],
+                label[var_name],
+                model_pred[var_name],
                 reduction="mean",
                 beta=self.beta,
             ).detach()
         if "mae" in self.metric:
             more_loss["mae"] = F.l1_loss(
-                label["property"],
-                model_pred["property"],
+                label[var_name],
+                model_pred[var_name],
                 reduction="mean",
             ).detach()
         if "mse" in self.metric:
             more_loss["mse"] = F.mse_loss(
-                label["property"],
-                model_pred["property"],
+                label[var_name],
+                model_pred[var_name],
                 reduction="mean",
             ).detach()
         if "rmse" in self.metric:
             more_loss["rmse"] = torch.sqrt(
                 F.mse_loss(
-                    label["property"],
-                    model_pred["property"],
+                    label[var_name],
+                    model_pred[var_name],
                     reduction="mean",
                 )
             ).detach()
@@ -140,10 +191,10 @@ def label_requirement(self) -> list[DataRequirementItem]:
         label_requirement = []
         label_requirement.append(
             DataRequirementItem(
-                "property",
+                self.var_name,
                 ndof=self.task_dim,
                 atomic=False,
-                must=False,
+                must=True,
                 high_prec=True,
             )
         )
diff --git a/deepmd/pt/model/atomic_model/base_atomic_model.py b/deepmd/pt/model/atomic_model/base_atomic_model.py
index a64eca0fe9..c83e35dab3 100644
--- a/deepmd/pt/model/atomic_model/base_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/base_atomic_model.py
@@ -125,6 +125,14 @@ def get_type_map(self) -> list[str]:
         """Get the type map."""
         return self.type_map
 
+    def get_compute_stats_distinguish_types(self) -> bool:
+        """Get whether the fitting net computes stats which are not distinguished between different types of atoms."""
+        return True
+
+    def get_intensive(self) -> bool:
+        """Whether the fitting property is intensive."""
+        return False
+
     def reinit_atom_exclude(
         self,
         exclude_types: list[int] = [],
@@ -456,7 +464,6 @@ def change_out_bias(
                 model_forward=self._get_forward_wrapper_func(),
                 rcond=self.rcond,
                 preset_bias=self.preset_out_bias,
-                atomic_output=self.atomic_output_def(),
             )
             self._store_out_stat(delta_bias, out_std, add=True)
         elif bias_adjust_mode == "set-by-statistic":
@@ -467,7 +474,8 @@ def change_out_bias(
                 stat_file_path=stat_file_path,
                 rcond=self.rcond,
                 preset_bias=self.preset_out_bias,
-                atomic_output=self.atomic_output_def(),
+                stats_distinguish_types=self.get_compute_stats_distinguish_types(),
+                intensive=self.get_intensive(),
             )
             self._store_out_stat(bias_out, std_out)
         else:
diff --git a/deepmd/pt/model/atomic_model/property_atomic_model.py b/deepmd/pt/model/atomic_model/property_atomic_model.py
index 1fdc72b2b6..3622c9f476 100644
--- a/deepmd/pt/model/atomic_model/property_atomic_model.py
+++ b/deepmd/pt/model/atomic_model/property_atomic_model.py
@@ -19,31 +19,31 @@ def __init__(self, descriptor, fitting, type_map, **kwargs):
             )
         super().__init__(descriptor, fitting, type_map, **kwargs)
 
+    def get_compute_stats_distinguish_types(self) -> bool:
+        """Get whether the fitting net computes stats which are not distinguished between different types of atoms."""
+        return False
+
+    def get_intensive(self) -> bool:
+        """Whether the fitting property is intensive."""
+        return self.fitting_net.get_intensive()
+
     def apply_out_stat(
         self,
         ret: dict[str, torch.Tensor],
         atype: torch.Tensor,
     ):
         """Apply the stat to each atomic output.
-        This function defines how the bias is applied to the atomic output of the model.
+        In property fitting, each output will be multiplied by label std and then plus the label average value.
 
         Parameters
         ----------
         ret
             The returned dict by the forward_atomic method
         atype
-            The atom types. nf x nloc
+            The atom types. nf x nloc. It is useless in property fitting.
 
         """
-        if self.fitting_net.get_bias_method() == "normal":
-            out_bias, out_std = self._fetch_out_stat(self.bias_keys)
-            for kk in self.bias_keys:
-                # nf x nloc x odims, out_bias: ntypes x odims
-                ret[kk] = ret[kk] + out_bias[kk][atype]
-            return ret
-        elif self.fitting_net.get_bias_method() == "no_bias":
-            return ret
-        else:
-            raise NotImplementedError(
-                "Only 'normal' and 'no_bias' is supported for parameter 'bias_method'."
-            )
+        out_bias, out_std = self._fetch_out_stat(self.bias_keys)
+        for kk in self.bias_keys:
+            ret[kk] = ret[kk] * out_std[kk][0] + out_bias[kk][0]
+        return ret
diff --git a/deepmd/pt/model/descriptor/repformer_layer.py b/deepmd/pt/model/descriptor/repformer_layer.py
index 86b09e9b40..1e2cba66d6 100644
--- a/deepmd/pt/model/descriptor/repformer_layer.py
+++ b/deepmd/pt/model/descriptor/repformer_layer.py
@@ -1003,7 +1003,7 @@ def _cal_grrg(h2g2: torch.Tensor, axis_neuron: int) -> torch.Tensor:
         # nb x nloc x 3 x ng2
         nb, nloc, _, ng2 = h2g2.shape
         # nb x nloc x 3 x axis
-        h2g2m = torch.split(h2g2, axis_neuron, dim=-1)[0]
+        h2g2m = h2g2[..., :axis_neuron]
         # nb x nloc x axis x ng2
         g1_13 = torch.matmul(torch.transpose(h2g2m, -1, -2), h2g2) / (3.0**1)
         # nb x nloc x (axisxng2)
diff --git a/deepmd/pt/model/model/property_model.py b/deepmd/pt/model/model/property_model.py
index 4581a2bc3e..7c50c75ff1 100644
--- a/deepmd/pt/model/model/property_model.py
+++ b/deepmd/pt/model/model/property_model.py
@@ -37,8 +37,8 @@ def __init__(
     def translated_output_def(self):
         out_def_data = self.model_output_def().get_data()
         output_def = {
-            "atom_property": out_def_data["property"],
-            "property": out_def_data["property_redu"],
+            f"atom_{self.get_var_name()}": out_def_data[self.get_var_name()],
+            self.get_var_name(): out_def_data[f"{self.get_var_name()}_redu"],
         }
         if "mask" in out_def_data:
             output_def["mask"] = out_def_data["mask"]
@@ -62,8 +62,8 @@ def forward(
             do_atomic_virial=do_atomic_virial,
         )
         model_predict = {}
-        model_predict["atom_property"] = model_ret["property"]
-        model_predict["property"] = model_ret["property_redu"]
+        model_predict[f"atom_{self.get_var_name()}"] = model_ret[self.get_var_name()]
+        model_predict[self.get_var_name()] = model_ret[f"{self.get_var_name()}_redu"]
         if "mask" in model_ret:
             model_predict["mask"] = model_ret["mask"]
         return model_predict
@@ -76,7 +76,12 @@ def get_task_dim(self) -> int:
     @torch.jit.export
     def get_intensive(self) -> bool:
         """Get whether the property is intensive."""
-        return self.model_output_def()["property"].intensive
+        return self.model_output_def()[self.get_var_name()].intensive
+
+    @torch.jit.export
+    def get_var_name(self) -> str:
+        """Get the name of the property."""
+        return self.get_fitting_net().var_name
 
     @torch.jit.export
     def forward_lower(
@@ -102,8 +107,8 @@ def forward_lower(
             extra_nlist_sort=self.need_sorted_nlist_for_lower(),
         )
         model_predict = {}
-        model_predict["atom_property"] = model_ret["property"]
-        model_predict["property"] = model_ret["property_redu"]
+        model_predict[f"atom_{self.get_var_name()}"] = model_ret[self.get_var_name()]
+        model_predict[self.get_var_name()] = model_ret[f"{self.get_var_name()}_redu"]
         if "mask" in model_ret:
             model_predict["mask"] = model_ret["mask"]
         return model_predict
diff --git a/deepmd/pt/model/task/property.py b/deepmd/pt/model/task/property.py
index dec0f1447b..c15e60fe04 100644
--- a/deepmd/pt/model/task/property.py
+++ b/deepmd/pt/model/task/property.py
@@ -43,17 +43,16 @@ class PropertyFittingNet(InvarFitting):
     dim_descrpt : int
         Embedding width per atom.
     task_dim : int
-            The dimension of outputs of fitting net.
+        The dimension of outputs of fitting net.
+    property_name:
+        The name of fitting property, which should be consistent with the property name in the dataset.
+        If the data file is named `humo.npy`, this parameter should be "humo".
     neuron : list[int]
         Number of neurons in each hidden layers of the fitting net.
     bias_atom_p : torch.Tensor, optional
         Average property per atom for each element.
     intensive : bool, optional
         Whether the fitting property is intensive.
-    bias_method : str, optional
-        The method of applying the bias to each atomic output, user can select 'normal' or 'no_bias'.
-        If 'normal' is used, the computed bias will be added to the atomic output.
-        If 'no_bias' is used, no bias will be added to the atomic output.
     resnet_dt : bool
         Using time-step in the ResNet construction.
     numb_fparam : int
@@ -77,11 +76,11 @@ def __init__(
         self,
         ntypes: int,
         dim_descrpt: int,
+        property_name: str,
         task_dim: int = 1,
         neuron: list[int] = [128, 128, 128],
         bias_atom_p: Optional[torch.Tensor] = None,
         intensive: bool = False,
-        bias_method: str = "normal",
         resnet_dt: bool = True,
         numb_fparam: int = 0,
         numb_aparam: int = 0,
@@ -94,9 +93,8 @@ def __init__(
     ) -> None:
         self.task_dim = task_dim
         self.intensive = intensive
-        self.bias_method = bias_method
         super().__init__(
-            var_name="property",
+            var_name=property_name,
             ntypes=ntypes,
             dim_descrpt=dim_descrpt,
             dim_out=task_dim,
@@ -113,9 +111,6 @@ def __init__(
             **kwargs,
         )
 
-    def get_bias_method(self) -> str:
-        return self.bias_method
-
     def output_def(self) -> FittingOutputDef:
         return FittingOutputDef(
             [
@@ -130,12 +125,16 @@ def output_def(self) -> FittingOutputDef:
             ]
         )
 
+    def get_intensive(self) -> bool:
+        """Whether the fitting property is intensive."""
+        return self.intensive
+
     @classmethod
     def deserialize(cls, data: dict) -> "PropertyFittingNet":
         data = data.copy()
-        check_version_compatibility(data.pop("@version", 1), 3, 1)
+        check_version_compatibility(data.pop("@version", 1), 4, 1)
         data.pop("dim_out")
-        data.pop("var_name")
+        data["property_name"] = data.pop("var_name")
         obj = super().deserialize(data)
 
         return obj
@@ -146,7 +145,9 @@ def serialize(self) -> dict:
             **InvarFitting.serialize(self),
             "type": "property",
             "task_dim": self.task_dim,
+            "intensive": self.intensive,
         }
+        dd["@version"] = 4
 
         return dd
 
diff --git a/deepmd/pt/train/training.py b/deepmd/pt/train/training.py
index 8ca510492c..eca952d7f8 100644
--- a/deepmd/pt/train/training.py
+++ b/deepmd/pt/train/training.py
@@ -1240,7 +1240,11 @@ def get_loss(loss_params, start_lr, _ntypes, _model):
         return TensorLoss(**loss_params)
     elif loss_type == "property":
         task_dim = _model.get_task_dim()
+        var_name = _model.get_var_name()
+        intensive = _model.get_intensive()
         loss_params["task_dim"] = task_dim
+        loss_params["var_name"] = var_name
+        loss_params["intensive"] = intensive
         return PropertyLoss(**loss_params)
     else:
         loss_params["starter_learning_rate"] = start_lr
diff --git a/deepmd/pt/utils/dataloader.py b/deepmd/pt/utils/dataloader.py
index 67e5195f6d..12681a304d 100644
--- a/deepmd/pt/utils/dataloader.py
+++ b/deepmd/pt/utils/dataloader.py
@@ -36,6 +36,9 @@
 from deepmd.pt.utils.dataset import (
     DeepmdDataSetForLoader,
 )
+from deepmd.pt.utils.utils import (
+    mix_entropy,
+)
 from deepmd.utils.data import (
     DataRequirementItem,
 )
@@ -50,8 +53,12 @@
 
 
 def setup_seed(seed) -> None:
-    torch.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
+    if isinstance(seed, (list, tuple)):
+        mixed_seed = mix_entropy(seed)
+    else:
+        mixed_seed = seed
+    torch.manual_seed(mixed_seed)
+    torch.cuda.manual_seed_all(mixed_seed)
     torch.backends.cudnn.deterministic = True
     dp_random.seed(seed)
 
diff --git a/deepmd/pt/utils/nlist.py b/deepmd/pt/utils/nlist.py
index db1e87785b..ec94e8cd60 100644
--- a/deepmd/pt/utils/nlist.py
+++ b/deepmd/pt/utils/nlist.py
@@ -310,7 +310,7 @@ def nlist_distinguish_types(
         inlist = torch.gather(nlist, 2, imap)
         inlist = inlist.masked_fill(~(pick_mask.to(torch.bool)), -1)
         # nloc x nsel[ii]
-        ret_nlist.append(torch.split(inlist, [ss, snsel - ss], dim=-1)[0])
+        ret_nlist.append(inlist[..., :ss])
     return torch.concat(ret_nlist, dim=-1)
 
 
diff --git a/deepmd/pt/utils/stat.py b/deepmd/pt/utils/stat.py
index 1c5e3f1c52..710d392ac3 100644
--- a/deepmd/pt/utils/stat.py
+++ b/deepmd/pt/utils/stat.py
@@ -12,9 +12,6 @@
 import numpy as np
 import torch
 
-from deepmd.dpmodel.output_def import (
-    FittingOutputDef,
-)
 from deepmd.pt.utils import (
     AtomExcludeMask,
 )
@@ -27,6 +24,7 @@
     to_torch_tensor,
 )
 from deepmd.utils.out_stat import (
+    compute_stats_do_not_distinguish_types,
     compute_stats_from_atomic,
     compute_stats_from_redu,
 )
@@ -136,11 +134,16 @@ def _post_process_stat(
 
     For global statistics, we do not have the std for each type of atoms,
     thus fake the output std by ones for all the types.
+    If the shape of out_std is already the same as out_bias,
+    we do not need to do anything.
 
     """
     new_std = {}
     for kk, vv in out_bias.items():
-        new_std[kk] = np.ones_like(vv)
+        if vv.shape == out_std[kk].shape:
+            new_std[kk] = out_std[kk]
+        else:
+            new_std[kk] = np.ones_like(vv)
     return out_bias, new_std
 
 
@@ -242,7 +245,8 @@ def compute_output_stats(
     rcond: Optional[float] = None,
     preset_bias: Optional[dict[str, list[Optional[np.ndarray]]]] = None,
     model_forward: Optional[Callable[..., torch.Tensor]] = None,
-    atomic_output: Optional[FittingOutputDef] = None,
+    stats_distinguish_types: bool = True,
+    intensive: bool = False,
 ):
     """
     Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
@@ -272,8 +276,10 @@ def compute_output_stats(
         If not None, the model will be utilized to generate the original energy prediction,
         which will be subtracted from the energy label of the data.
         The difference will then be used to calculate the delta complement energy bias for each type.
-    atomic_output : FittingOutputDef, optional
-        The output of atomic model.
+    stats_distinguish_types : bool, optional
+        Whether to distinguish different element types in the statistics.
+    intensive : bool, optional
+        Whether the fitting target is intensive.
     """
     # try to restore the bias from stat file
     bias_atom_e, std_atom_e = _restore_from_file(stat_file_path, keys)
@@ -362,7 +368,8 @@ def compute_output_stats(
             rcond,
             preset_bias,
             model_pred_g,
-            atomic_output,
+            stats_distinguish_types,
+            intensive,
         )
         bias_atom_a, std_atom_a = compute_output_stats_atomic(
             sampled,
@@ -405,7 +412,8 @@ def compute_output_stats_global(
     rcond: Optional[float] = None,
     preset_bias: Optional[dict[str, list[Optional[np.ndarray]]]] = None,
     model_pred: Optional[dict[str, np.ndarray]] = None,
-    atomic_output: Optional[FittingOutputDef] = None,
+    stats_distinguish_types: bool = True,
+    intensive: bool = False,
 ):
     """This function only handle stat computation from reduced global labels."""
     # return directly if model predict is empty for global
@@ -476,19 +484,22 @@ def compute_output_stats_global(
     std_atom_e = {}
     for kk in keys:
         if kk in stats_input:
-            if atomic_output is not None and atomic_output.get_data()[kk].intensive:
-                task_dim = stats_input[kk].shape[1]
-                assert merged_natoms[kk].shape == (nf[kk], ntypes)
-                stats_input[kk] = (
-                    merged_natoms[kk].sum(axis=1).reshape(-1, 1) * stats_input[kk]
+            if not stats_distinguish_types:
+                bias_atom_e[kk], std_atom_e[kk] = (
+                    compute_stats_do_not_distinguish_types(
+                        stats_input[kk],
+                        merged_natoms[kk],
+                        assigned_bias=assigned_atom_ener[kk],
+                        intensive=intensive,
+                    )
+                )
+            else:
+                bias_atom_e[kk], std_atom_e[kk] = compute_stats_from_redu(
+                    stats_input[kk],
+                    merged_natoms[kk],
+                    assigned_bias=assigned_atom_ener[kk],
+                    rcond=rcond,
                 )
-                assert stats_input[kk].shape == (nf[kk], task_dim)
-            bias_atom_e[kk], std_atom_e[kk] = compute_stats_from_redu(
-                stats_input[kk],
-                merged_natoms[kk],
-                assigned_bias=assigned_atom_ener[kk],
-                rcond=rcond,
-            )
         else:
             # this key does not have global labels, skip it.
             continue
diff --git a/deepmd/tf/entrypoints/train.py b/deepmd/tf/entrypoints/train.py
index 590fa04635..1762f1049a 100755
--- a/deepmd/tf/entrypoints/train.py
+++ b/deepmd/tf/entrypoints/train.py
@@ -220,9 +220,10 @@ def _do_work(
     seed = jdata["training"].get("seed", None)
     if seed is not None:
         # avoid the same batch sequence among workers
-        seed += run_opt.my_rank
         seed = seed % (2**32)
-    dp_random.seed(seed)
+        dp_random.seed([run_opt.my_rank, seed])
+    else:
+        dp_random.seed(seed)
 
     # setup data modifier
     modifier = get_modifier(jdata["model"].get("modifier", None))
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index 9eac0e804d..50ef07b2af 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -1580,7 +1580,7 @@ def fitting_property():
     doc_seed = "Random seed for parameter initialization of the fitting net"
     doc_task_dim = "The dimension of outputs of fitting net"
     doc_intensive = "Whether the fitting property is intensive"
-    doc_bias_method = "The method of applying the bias to each atomic output, user can select 'normal' or 'no_bias'. If 'no_bias' is used, no bias will be added to the atomic output."
+    doc_property_name = "The names of fitting property, which should be consistent with the property name in the dataset."
     return [
         Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
         Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
@@ -1612,7 +1612,10 @@ def fitting_property():
         Argument("task_dim", int, optional=True, default=1, doc=doc_task_dim),
         Argument("intensive", bool, optional=True, default=False, doc=doc_intensive),
         Argument(
-            "bias_method", str, optional=True, default="normal", doc=doc_bias_method
+            "property_name",
+            str,
+            optional=False,
+            doc=doc_property_name,
         ),
     ]
 
diff --git a/deepmd/utils/out_stat.py b/deepmd/utils/out_stat.py
index 4d0d788f8b..ecbd379e2d 100644
--- a/deepmd/utils/out_stat.py
+++ b/deepmd/utils/out_stat.py
@@ -130,3 +130,64 @@ def compute_stats_from_atomic(
             output[mask].std(axis=0) if output[mask].size > 0 else np.nan
         )
     return output_bias, output_std
+
+
+def compute_stats_do_not_distinguish_types(
+    output_redu: np.ndarray,
+    natoms: np.ndarray,
+    assigned_bias: Optional[np.ndarray] = None,
+    intensive: bool = False,
+) -> tuple[np.ndarray, np.ndarray]:
+    """Compute element-independent statistics for property fitting.
+
+    Computes mean and standard deviation of the output, treating all elements equally.
+    For extensive properties, the output is normalized by the total number of atoms
+    before computing statistics.
+
+    Parameters
+    ----------
+    output_redu
+        The reduced output value, shape is [nframes, *(odim0, odim1, ...)].
+    natoms
+        The number of atoms for each atom, shape is [nframes, ntypes].
+        Used for normalization of extensive properties and generating uniform bias.
+    assigned_bias
+        The assigned output bias, shape is [ntypes, *(odim0, odim1, ...)].
+        Set to a tensor of shape (odim0, odim1, ...) filled with nan if the bias
+        of the type is not assigned.
+    intensive
+        Whether the output is intensive or extensive.
+        If False, the output will be normalized by the total number of atoms before computing statistics.
+
+    Returns
+    -------
+    np.ndarray
+        The computed output mean(fake bias), shape is [ntypes, *(odim0, odim1, ...)].
+        The same bias is used for all atom types.
+    np.ndarray
+        The computed output standard deviation, shape is [ntypes, *(odim0, odim1, ...)].
+        The same standard deviation is used for all atom types.
+    """
+    natoms = np.array(natoms)  # [nf, ntypes]
+    nf, ntypes = natoms.shape
+    output_redu = np.array(output_redu)
+    var_shape = list(output_redu.shape[1:])
+    output_redu = output_redu.reshape(nf, -1)
+    if not intensive:
+        total_atoms = natoms.sum(axis=1)
+        output_redu = output_redu / total_atoms[:, np.newaxis]
+    # check shape
+    assert output_redu.ndim == 2
+    assert natoms.ndim == 2
+    assert output_redu.shape[0] == natoms.shape[0]  # [nf,1]
+
+    computed_output_bias = np.repeat(
+        np.mean(output_redu, axis=0)[np.newaxis, :], ntypes, axis=0
+    )
+    output_std = np.std(output_redu, axis=0)
+
+    computed_output_bias = computed_output_bias.reshape([natoms.shape[1]] + var_shape)  # noqa: RUF005
+    output_std = output_std.reshape(var_shape)
+    output_std = np.tile(output_std, (computed_output_bias.shape[0], 1))
+
+    return computed_output_bias, output_std
diff --git a/deepmd/utils/random.py b/deepmd/utils/random.py
index b0c75600fb..10ebdf0790 100644
--- a/deepmd/utils/random.py
+++ b/deepmd/utils/random.py
@@ -56,7 +56,7 @@ def random(size=None):
     return _RANDOM_GENERATOR.random_sample(size)
 
 
-def seed(val: Optional[int] = None) -> None:
+def seed(val: Optional[Union[int, list[int]]] = None) -> None:
     """Seed the generator.
 
     Parameters
diff --git a/doc/model/index.rst b/doc/model/index.rst
index c067ea4207..5e7ba32486 100644
--- a/doc/model/index.rst
+++ b/doc/model/index.rst
@@ -16,6 +16,7 @@ Model
    train-energy-spin
    train-fitting-tensor
    train-fitting-dos
+   train-fitting-property
    train-se-e2-a-tebd
    train-se-a-mask
    train-se-e3-tebd
diff --git a/doc/model/train-fitting-property.md b/doc/model/train-fitting-property.md
new file mode 100644
index 0000000000..be1b63bf6f
--- /dev/null
+++ b/doc/model/train-fitting-property.md
@@ -0,0 +1,194 @@
+# Fit other properties {{ pytorch_icon }} {{ jax_icon }} {{ dpmodel_icon }}
+
+:::{note}
+**Supported backends**: PyTorch {{ pytorch_icon }}, JAX {{ jax_icon }}, DP {{ dpmodel_icon }}
+:::
+
+Here we present an API to DeepProperty model, which can be used to fit other properties like band gap, bulk modulus, critical temperature, etc.
+
+In this example, we will show you how to train a model to fit properties of `humo`, `lumo` and `band gap`. A complete training input script of the examples can be found in
+
+```bash
+$deepmd_source_dir/examples/property/train
+```
+
+The training and validation data are also provided our examples. But note that **the data provided along with the examples are of limited amount, and should not be used to train a production model.**
+
+Similar to the `input.json` used in `ener` mode, training JSON is also divided into {ref}`model <model>`, {ref}`learning_rate <learning_rate>`, {ref}`loss <loss>` and {ref}`training <training>`. Most keywords remain the same as `ener` mode, and their meaning can be found [here](train-se-atten.md). To fit the `property`, one needs to modify {ref}`model[standard]/fitting_net <model[standard]/fitting_net>` and {ref}`loss <loss>`.
+
+## The fitting Network
+
+The {ref}`fitting_net <model[standard]/fitting_net>` section tells DP which fitting net to use.
+
+The JSON of `property` type should be provided like
+
+```json
+"fitting_net" : {
+	"type": "property",
+        "intensive": true,
+        "property_name": "band_prop",
+        "task_dim": 3,
+	"neuron": [240,240,240],
+	"resnet_dt": true,
+	"seed": 1,
+},
+```
+
+- `type` specifies which type of fitting net should be used. It should be `property`.
+- `intensive` indicates whether the fitting property is intensive. If `intensive` is `true`, the model output is the average of the property contribution of each atom. If `intensive` is `false`, the model output is the sum of the property contribution of each atom.
+- `property_name` is the name of the property to be predicted. It should be consistent with the property name in the dataset. In each system, code will read `set.*/{property_name}.npy` file as prediction label if you use NumPy format data.
+- `fitting_net/task_dim` is the dimension of model output. It should be consistent with the property dimension in the dataset, which means if the shape of data stored in `set.*/{property_name}.npy` is `batch size * 3`, `fitting_net/task_dim` should be set to 3.
+- The rest arguments have the same meaning as they do in `ener` mode.
+
+## Loss
+
+DeepProperty supports trainings of the global system (one or more global labels are provided in a frame). For example, when fitting `property`, each frame will provide a `1 x task_dim` vector which gives the fitting properties.
+
+The loss section should be provided like
+
+```json
+"loss" : {
+	"type": "property",
+	"metric": ["mae"],
+	"loss_func": "smooth_mae"
+},
+```
+
+- {ref}`type <loss/type>` should be written as `property` as a distinction from `ener` mode.
+- `metric`: The metric for display, which will be printed in `lcurve.out`. This list can include 'smooth_mae', 'mae', 'mse' and 'rmse'.
+- `loss_func`: The loss function to minimize, you can use 'mae','smooth_mae', 'mse' and 'rmse'.
+
+## Training Data Preparation
+
+The label should be named `{property_name}.npy/raw`, `property_name` is defined by `fitting_net/property_name` in `input.json`.
+
+To prepare the data, you can use `dpdata` tools, for example:
+
+```py
+import dpdata
+import numpy as np
+from dpdata.data_type import (
+    Axis,
+    DataType,
+)
+
+property_name = "band_prop"  # fittng_net/property_name
+task_dim = 3  # fitting_net/task_dim
+
+# register datatype
+datatypes = [
+    DataType(
+        property_name,
+        np.ndarray,
+        shape=(Axis.NFRAMES, task_dim),
+        required=False,
+    ),
+]
+datatypes.extend(
+    [
+        DataType(
+            "energies",
+            np.ndarray,
+            shape=(Axis.NFRAMES, 1),
+            required=False,
+        ),
+        DataType(
+            "forces",
+            np.ndarray,
+            shape=(Axis.NFRAMES, Axis.NATOMS, 1),
+            required=False,
+        ),
+    ]
+)
+
+for datatype in datatypes:
+    dpdata.System.register_data_type(datatype)
+    dpdata.LabeledSystem.register_data_type(datatype)
+
+ls = dpdata.MultiSystems()
+frame = dpdata.System("POSCAR", fmt="vasp/poscar")
+labelframe = dpdata.LabeledSystem()
+labelframe.append(frame)
+labelframe.data[property_name] = np.array([[-0.236, 0.056, 0.292]], dtype=np.float32)
+ls.append(labelframe)
+ls.to_deepmd_npy_mixed("deepmd")
+```
+
+## Train the Model
+
+The training command is the same as `ener` mode, i.e.
+
+::::{tab-set}
+
+:::{tab-item} PyTorch {{ pytorch_icon }}
+
+```bash
+dp --pt train input.json
+```
+
+:::
+
+::::
+
+The detailed loss can be found in `lcurve.out`:
+
+```
+# step        mae_val     mae_trn   lr
+# If there is no available reference data, rmse_*_{val,trn} will print nan
+      1      2.72e-02    2.40e-02    2.0e-04
+    100      1.79e-02    1.34e-02    2.0e-04
+    200      1.45e-02    1.86e-02    2.0e-04
+    300      1.61e-02    4.90e-03    2.0e-04
+    400      2.04e-02    1.05e-02    2.0e-04
+    500      9.09e-03    1.85e-02    2.0e-04
+    600      1.01e-02    5.63e-03    2.0e-04
+    700      1.10e-02    1.76e-02    2.0e-04
+    800      1.14e-02    1.50e-02    2.0e-04
+    900      9.54e-03    2.70e-02    2.0e-04
+   1000      1.00e-02    2.73e-02    2.0e-04
+```
+
+## Test the Model
+
+We can use `dp test` to infer the properties for given frames.
+
+::::{tab-set}
+
+:::{tab-item} PyTorch {{ pytorch_icon }}
+
+```bash
+
+dp --pt freeze -o frozen_model.pth
+
+dp --pt test -m frozen_model.pth -s ../data/data_0/ -d ${output_prefix} -n 100
+```
+
+:::
+
+::::
+
+if `dp test -d ${output_prefix}` is specified, the predicted properties for each frame are output in the working directory
+
+```
+${output_prefix}.property.out.0   ${output_prefix}.property.out.1  ${output_prefix}.property.out.2  ${output_prefix}.property.out.3
+```
+
+for `*.property.out.*`, it contains matrix with shape of `(2, task_dim)`,
+
+```
+# ../data/data_0 - 0: data_property pred_property
+-2.449000030755996704e-01 -2.315840660495154801e-01
+6.400000303983688354e-02 5.810663314446311983e-02
+3.088999986648559570e-01 2.917143316092784544e-01
+```
+
+## Data Normalization
+
+When `fitting_net/type` is `ener`, the energy bias layer “$e_{bias}$” adds a constant bias to the atomic energy contribution according to the atomic number.i.e.,
+$$e_{bias} (Z_i) (MLP(D_i))= MLP(D_i) + e_{bias} (Z_i)$$
+
+But when `fitting_net/type` is `property`. The property bias layer is used to normalize the property output of the model.i.e.,
+$$p_{bias} (MLP(D_i))= MLP(D_i) * std+ mean$$
+
+1. `std`: The standard deviation of the property label
+2. `mean`: The average value of the property label
diff --git a/examples/property/data/data_0/set.000000/property.npy b/examples/property/data/data_0/set.000000/band_prop.npy
similarity index 100%
rename from examples/property/data/data_0/set.000000/property.npy
rename to examples/property/data/data_0/set.000000/band_prop.npy
diff --git a/examples/property/data/data_1/set.000000/property.npy b/examples/property/data/data_1/set.000000/band_prop.npy
similarity index 100%
rename from examples/property/data/data_1/set.000000/property.npy
rename to examples/property/data/data_1/set.000000/band_prop.npy
diff --git a/examples/property/data/data_2/set.000000/property.npy b/examples/property/data/data_2/set.000000/band_prop.npy
similarity index 100%
rename from examples/property/data/data_2/set.000000/property.npy
rename to examples/property/data/data_2/set.000000/band_prop.npy
diff --git a/examples/property/train/README.md b/examples/property/train/README.md
new file mode 100644
index 0000000000..e4dc9ed704
--- /dev/null
+++ b/examples/property/train/README.md
@@ -0,0 +1,5 @@
+Some explanations of the parameters in `input.json`:
+
+1. `fitting_net/property_name` is the name of the property to be predicted. It should be consistent with the property name in the dataset. In each system, code will read `set.*/{property_name}.npy` file as prediction label if you use NumPy format data.
+2. `fitting_net/task_dim` is the dimension of model output. It should be consistent with the property dimension in the dataset, which means if the shape of data stored in `set.*/{property_name}.npy` is `batch size * 3`, `fitting_net/task_dim` should be set to 3.
+3. `fitting/intensive` indicates whether the fitting property is intensive. If `intensive` is `true`, the model output is the average of the property contribution of each atom. If `intensive` is `false`, the model output is the sum of the property contribution of each atom.
diff --git a/examples/property/train/input_torch.json b/examples/property/train/input_torch.json
index 33eaa28a07..1e6ce00048 100644
--- a/examples/property/train/input_torch.json
+++ b/examples/property/train/input_torch.json
@@ -33,6 +33,7 @@
       "type": "property",
       "intensive": true,
       "task_dim": 3,
+      "property_name": "band_prop",
       "neuron": [
         240,
         240,
@@ -53,6 +54,11 @@
   },
   "loss": {
     "type": "property",
+    "metric": [
+      "mae"
+    ],
+    "loss_func": "smooth_mae",
+    "beta": 1.0,
     "_comment": " that's all"
   },
   "training": {
diff --git a/source/tests/common/test_out_stat.py b/source/tests/common/test_out_stat.py
index c175d7c643..0236c39f22 100644
--- a/source/tests/common/test_out_stat.py
+++ b/source/tests/common/test_out_stat.py
@@ -4,6 +4,7 @@
 import numpy as np
 
 from deepmd.utils.out_stat import (
+    compute_stats_do_not_distinguish_types,
     compute_stats_from_atomic,
     compute_stats_from_redu,
 )
@@ -89,6 +90,58 @@ def test_compute_stats_from_redu_with_assigned_bias(self) -> None:
             rtol=1e-7,
         )
 
+    def test_compute_stats_do_not_distinguish_types_intensive(self) -> None:
+        """Test compute_stats_property function with intensive scenario."""
+        bias, std = compute_stats_do_not_distinguish_types(
+            self.output_redu, self.natoms, intensive=True
+        )
+        # Test shapes
+        assert bias.shape == (len(self.mean), self.output_redu.shape[1])
+        assert std.shape == (len(self.mean), self.output_redu.shape[1])
+
+        # Test values
+        for fake_atom_bias in bias:
+            np.testing.assert_allclose(
+                fake_atom_bias, np.mean(self.output_redu, axis=0), rtol=1e-7
+            )
+        for fake_atom_std in std:
+            np.testing.assert_allclose(
+                fake_atom_std, np.std(self.output_redu, axis=0), rtol=1e-7
+            )
+
+    def test_compute_stats_do_not_distinguish_types_extensive(self) -> None:
+        """Test compute_stats_property function with extensive scenario."""
+        bias, std = compute_stats_do_not_distinguish_types(
+            self.output_redu, self.natoms
+        )
+        # Test shapes
+        assert bias.shape == (len(self.mean), self.output_redu.shape[1])
+        assert std.shape == (len(self.mean), self.output_redu.shape[1])
+
+        # Test values
+        for fake_atom_bias in bias:
+            np.testing.assert_allclose(
+                fake_atom_bias,
+                np.array(
+                    [
+                        6218.91610282,
+                        7183.82275736,
+                        4445.23155934,
+                        5748.23644722,
+                        5362.8519454,
+                    ]
+                ),
+                rtol=1e-7,
+            )
+        for fake_atom_std in std:
+            np.testing.assert_allclose(
+                fake_atom_std,
+                np.array(
+                    [128.78691576, 36.53743668, 105.82372405, 96.43642486, 33.68885327]
+                ),
+                rtol=1e-7,
+            )
+
     def test_compute_stats_from_atomic(self) -> None:
         bias, std = compute_stats_from_atomic(self.output, self.atype)
         np.testing.assert_allclose(bias, self.mean)
diff --git a/source/tests/consistent/descriptor/test_dpa2.py b/source/tests/consistent/descriptor/test_dpa2.py
index 72c0967a78..ef840bf9d7 100644
--- a/source/tests/consistent/descriptor/test_dpa2.py
+++ b/source/tests/consistent/descriptor/test_dpa2.py
@@ -17,6 +17,7 @@
 from ..common import (
     INSTALLED_ARRAY_API_STRICT,
     INSTALLED_JAX,
+    INSTALLED_PD,
     INSTALLED_PT,
     CommonTest,
     parameterized,
@@ -34,6 +35,12 @@
     from deepmd.jax.descriptor.dpa2 import DescrptDPA2 as DescrptDPA2JAX
 else:
     DescrptDPA2JAX = None
+
+if INSTALLED_PD:
+    from deepmd.pd.model.descriptor.dpa2 import DescrptDPA2 as DescrptDPA2PD
+else:
+    DescrptDPA2PD = None
+
 if INSTALLED_ARRAY_API_STRICT:
     from ...array_api_strict.descriptor.dpa2 import DescrptDPA2 as DescrptDPA2Strict
 else:
@@ -214,6 +221,39 @@ def skip_pt(self) -> bool:
         ) = self.param
         return CommonTest.skip_pt
 
+    @property
+    def skip_pd(self) -> bool:
+        (
+            repinit_tebd_input_mode,
+            repinit_set_davg_zero,
+            repinit_type_one_side,
+            repinit_use_three_body,
+            repformer_update_g1_has_conv,
+            repformer_direct_dist,
+            repformer_update_g1_has_drrd,
+            repformer_update_g1_has_grrg,
+            repformer_update_g1_has_attn,
+            repformer_update_g2_has_g1g1,
+            repformer_update_g2_has_attn,
+            repformer_update_h2,
+            repformer_attn2_has_gate,
+            repformer_update_style,
+            repformer_update_residual_init,
+            repformer_set_davg_zero,
+            repformer_trainable_ln,
+            repformer_ln_eps,
+            repformer_use_sqrt_nnei,
+            repformer_g1_out_conv,
+            repformer_g1_out_mlp,
+            smooth,
+            exclude_types,
+            precision,
+            add_tebd_to_repinit_out,
+            use_econf_tebd,
+            use_tebd_bias,
+        ) = self.param
+        return not INSTALLED_PD or precision == "bfloat16"
+
     @property
     def skip_dp(self) -> bool:
         (
@@ -286,6 +326,7 @@ def skip_tf(self) -> bool:
     tf_class = DescrptDPA2TF
     dp_class = DescrptDPA2DP
     pt_class = DescrptDPA2PT
+    pd_class = DescrptDPA2PD
     jax_class = DescrptDPA2JAX
     array_api_strict_class = DescrptDPA2Strict
     args = descrpt_dpa2_args().append(Argument("ntypes", int, optional=False))
@@ -383,6 +424,16 @@ def eval_pt(self, pt_obj: Any) -> Any:
             mixed_types=True,
         )
 
+    def eval_pd(self, pd_obj: Any) -> Any:
+        return self.eval_pd_descriptor(
+            pd_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+            mixed_types=True,
+        )
+
     def eval_jax(self, jax_obj: Any) -> Any:
         return self.eval_jax_descriptor(
             jax_obj,
diff --git a/source/tests/consistent/descriptor/test_se_t_tebd.py b/source/tests/consistent/descriptor/test_se_t_tebd.py
index bb4a5db6e7..9cdca9bde3 100644
--- a/source/tests/consistent/descriptor/test_se_t_tebd.py
+++ b/source/tests/consistent/descriptor/test_se_t_tebd.py
@@ -17,6 +17,7 @@
 from ..common import (
     INSTALLED_ARRAY_API_STRICT,
     INSTALLED_JAX,
+    INSTALLED_PD,
     INSTALLED_PT,
     CommonTest,
     parameterized,
@@ -34,6 +35,10 @@
     from deepmd.jax.descriptor.se_t_tebd import DescrptSeTTebd as DescrptSeTTebdJAX
 else:
     DescrptSeTTebdJAX = None
+if INSTALLED_PD:
+    from deepmd.pd.model.descriptor.se_t_tebd import DescrptSeTTebd as DescrptSeTTebdPD
+else:
+    DescrptSeTTebdPD = None
 if INSTALLED_ARRAY_API_STRICT:
     from ...array_api_strict.descriptor.se_t_tebd import (
         DescrptSeTTebd as DescrptSeTTebdStrict,
@@ -146,12 +151,14 @@ def skip_tf(self) -> bool:
         ) = self.param
         return True
 
+    skip_pd = not INSTALLED_PD
     skip_jax = not INSTALLED_JAX
     skip_array_api_strict = not INSTALLED_ARRAY_API_STRICT
 
     tf_class = DescrptSeTTebdTF
     dp_class = DescrptSeTTebdDP
     pt_class = DescrptSeTTebdPT
+    pd_class = DescrptSeTTebdPD
     jax_class = DescrptSeTTebdJAX
     array_api_strict_class = DescrptSeTTebdStrict
     args = descrpt_se_e3_tebd_args().append(Argument("ntypes", int, optional=False))
@@ -243,6 +250,16 @@ def eval_jax(self, jax_obj: Any) -> Any:
             mixed_types=True,
         )
 
+    def eval_pd(self, pd_obj: Any) -> Any:
+        return self.eval_pd_descriptor(
+            pd_obj,
+            self.natoms,
+            self.coords,
+            self.atype,
+            self.box,
+            mixed_types=True,
+        )
+
     def eval_array_api_strict(self, array_api_strict_obj: Any) -> Any:
         return self.eval_array_api_strict_descriptor(
             array_api_strict_obj,
diff --git a/source/tests/consistent/fitting/test_property.py b/source/tests/consistent/fitting/test_property.py
index 3abd672c88..4c359026c7 100644
--- a/source/tests/consistent/fitting/test_property.py
+++ b/source/tests/consistent/fitting/test_property.py
@@ -86,6 +86,7 @@ def data(self) -> dict:
             "seed": 20240217,
             "task_dim": task_dim,
             "intensive": intensive,
+            "property_name": "foo",
         }
 
     @property
@@ -186,7 +187,7 @@ def eval_pt(self, pt_obj: Any) -> Any:
                 aparam=torch.from_numpy(self.aparam).to(device=PT_DEVICE)
                 if numb_aparam
                 else None,
-            )["property"]
+            )[pt_obj.var_name]
             .detach()
             .cpu()
             .numpy()
@@ -207,7 +208,7 @@ def eval_dp(self, dp_obj: Any) -> Any:
             self.atype.reshape(1, -1),
             fparam=self.fparam if numb_fparam else None,
             aparam=self.aparam if numb_aparam else None,
-        )["property"]
+        )[dp_obj.var_name]
 
     def eval_jax(self, jax_obj: Any) -> Any:
         (
@@ -225,7 +226,7 @@ def eval_jax(self, jax_obj: Any) -> Any:
                 jnp.asarray(self.atype.reshape(1, -1)),
                 fparam=jnp.asarray(self.fparam) if numb_fparam else None,
                 aparam=jnp.asarray(self.aparam) if numb_aparam else None,
-            )["property"]
+            )[jax_obj.var_name]
         )
 
     def eval_array_api_strict(self, array_api_strict_obj: Any) -> Any:
@@ -244,7 +245,7 @@ def eval_array_api_strict(self, array_api_strict_obj: Any) -> Any:
                 array_api_strict.asarray(self.atype.reshape(1, -1)),
                 fparam=array_api_strict.asarray(self.fparam) if numb_fparam else None,
                 aparam=array_api_strict.asarray(self.aparam) if numb_aparam else None,
-            )["property"]
+            )[array_api_strict_obj.var_name]
         )
 
     def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
diff --git a/source/tests/consistent/model/test_property.py b/source/tests/consistent/model/test_property.py
index 29786fb247..75aded98fd 100644
--- a/source/tests/consistent/model/test_property.py
+++ b/source/tests/consistent/model/test_property.py
@@ -56,6 +56,7 @@ def data(self) -> dict:
             "fitting_net": {
                 "type": "property",
                 "neuron": [4, 4, 4],
+                "property_name": "foo",
                 "resnet_dt": True,
                 "numb_fparam": 0,
                 "precision": "float64",
@@ -182,14 +183,15 @@ def eval_jax(self, jax_obj: Any) -> Any:
 
     def extract_ret(self, ret: Any, backend) -> tuple[np.ndarray, ...]:
         # shape not matched. ravel...
+        property_name = self.data["fitting_net"]["property_name"]
         if backend in {self.RefBackend.DP, self.RefBackend.JAX}:
             return (
-                ret["property_redu"].ravel(),
-                ret["property"].ravel(),
+                ret[f"{property_name}_redu"].ravel(),
+                ret[property_name].ravel(),
             )
         elif backend is self.RefBackend.PT:
             return (
-                ret["property"].ravel(),
-                ret["atom_property"].ravel(),
+                ret[property_name].ravel(),
+                ret[f"atom_{property_name}"].ravel(),
             )
         raise ValueError(f"Unknown backend: {backend}")
diff --git a/source/tests/pd/model/models/dpa2.json b/source/tests/pd/model/models/dpa2.json
new file mode 100644
index 0000000000..f83e319de3
--- /dev/null
+++ b/source/tests/pd/model/models/dpa2.json
@@ -0,0 +1,57 @@
+{
+  "type_map": [
+    "O",
+    "H"
+  ],
+  "descriptor": {
+    "type": "dpa2",
+    "repinit": {
+      "rcut": 6.0,
+      "rcut_smth": 2.0,
+      "nsel": 30,
+      "neuron": [
+        2,
+        4,
+        8
+      ],
+      "axis_neuron": 4,
+      "activation_function": "tanh"
+
+    },
+    "repformer": {
+      "rcut": 4.0,
+      "rcut_smth": 0.5,
+      "nsel": 10,
+      "nlayers": 12,
+      "g1_dim": 8,
+      "g2_dim": 5,
+      "attn2_hidden": 3,
+      "attn2_nhead": 1,
+      "attn1_hidden": 5,
+      "attn1_nhead": 1,
+      "axis_neuron": 4,
+      "update_h2": false,
+      "update_g1_has_conv": true,
+      "update_g1_has_grrg": true,
+      "update_g1_has_drrd": true,
+      "update_g1_has_attn": true,
+      "update_g2_has_g1g1": true,
+      "update_g2_has_attn": true,
+      "attn2_has_gate": true,
+      "use_sqrt_nnei": false,
+      "g1_out_conv": false,
+      "g1_out_mlp": false
+    },
+    "seed": 1,
+    "add_tebd_to_repinit_out": false
+  },
+  "fitting_net": {
+    "neuron": [
+      240,
+      240,
+      240
+    ],
+    "resnet_dt": true,
+    "seed": 1
+  }
+}
diff --git a/source/tests/pd/model/models/dpa2.pd b/source/tests/pd/model/models/dpa2.pd
new file mode 100644
index 0000000000..650f0c144e
Binary files /dev/null and b/source/tests/pd/model/models/dpa2.pd differ
diff --git a/source/tests/pd/model/test_autodiff.py b/source/tests/pd/model/test_autodiff.py
index 1bd9dd0d0f..8442844a24 100644
--- a/source/tests/pd/model/test_autodiff.py
+++ b/source/tests/pd/model/test_autodiff.py
@@ -60,7 +60,7 @@ def stretch_box(old_coord, old_box, new_box):
 class ForceTest:
     def test(
         self,
-    ):
+    ) -> None:
         env.enable_prim(True)
         places = 5
         delta = 1e-5
@@ -86,10 +86,10 @@ def np_infer_coord(
         ):
             result = eval_model(
                 self.model,
-                paddle.to_tensor(coord).to(device=env.DEVICE).unsqueeze(0),
+                paddle.to_tensor(coord, place=env.DEVICE).unsqueeze(0),
                 cell.unsqueeze(0),
                 atype,
-                spins=paddle.to_tensor(spin).to(device=env.DEVICE).unsqueeze(0),
+                spins=paddle.to_tensor(spin, place=env.DEVICE).unsqueeze(0),
             )
             # detach
             ret = {key: to_numpy_array(result[key].squeeze(0)) for key in test_keys}
@@ -100,10 +100,10 @@ def np_infer_spin(
         ):
             result = eval_model(
                 self.model,
-                paddle.to_tensor(coord).to(device=env.DEVICE).unsqueeze(0),
+                paddle.to_tensor(coord, place=env.DEVICE).unsqueeze(0),
                 cell.unsqueeze(0),
                 atype,
-                spins=paddle.to_tensor(spin).to(device=env.DEVICE).unsqueeze(0),
+                spins=paddle.to_tensor(spin, place=env.DEVICE).unsqueeze(0),
             )
             # detach
             ret = {key: to_numpy_array(result[key].squeeze(0)) for key in test_keys}
@@ -133,7 +133,7 @@ def ff_spin(_spin):
 class VirialTest:
     def test(
         self,
-    ):
+    ) -> None:
         places = 5
         delta = 1e-4
         natoms = 5
@@ -153,10 +153,10 @@ def np_infer(
         ):
             result = eval_model(
                 self.model,
-                paddle.to_tensor(stretch_box(coord, cell, new_cell))
-                .to(device="cpu")
-                .unsqueeze(0),
-                paddle.to_tensor(new_cell).to(device="cpu").unsqueeze(0),
+                paddle.to_tensor(
+                    stretch_box(coord, cell, new_cell), place="cpu"
+                ).unsqueeze(0),
+                paddle.to_tensor(new_cell, place="cpu").unsqueeze(0),
                 atype,
             )
             # detach
@@ -177,36 +177,35 @@ def ff(bb):
 
 
 class TestEnergyModelSeAForce(unittest.TestCase, ForceTest):
-    def setUp(self):
+    def setUp(self) -> None:
         model_params = copy.deepcopy(model_se_e2_a)
         self.type_split = False
         self.model = get_model(model_params).to(env.DEVICE)
 
 
 class TestEnergyModelSeAVirial(unittest.TestCase, VirialTest):
-    def setUp(self):
+    def setUp(self) -> None:
         model_params = copy.deepcopy(model_se_e2_a)
         self.type_split = False
         self.model = get_model(model_params).to(env.DEVICE)
 
 
 class TestEnergyModelDPA1Force(unittest.TestCase, ForceTest):
-    def setUp(self):
+    def setUp(self) -> None:
         model_params = copy.deepcopy(model_dpa1)
         self.type_split = True
         self.model = get_model(model_params).to(env.DEVICE)
 
 
 class TestEnergyModelDPA1Virial(unittest.TestCase, VirialTest):
-    def setUp(self):
+    def setUp(self) -> None:
         model_params = copy.deepcopy(model_dpa1)
         self.type_split = True
         self.model = get_model(model_params).to(env.DEVICE)
 
 
-@unittest.skip("Skip for not implemented yet")
 class TestEnergyModelDPA2Force(unittest.TestCase, ForceTest):
-    def setUp(self):
+    def setUp(self) -> None:
         model_params = copy.deepcopy(model_dpa2)
         self.type_split = True
         self.model = get_model(model_params).to(env.DEVICE)
@@ -214,7 +213,7 @@ def setUp(self):
 
 @unittest.skip("Skip for not implemented yet")
 class TestEnergyModelDPAUniVirial(unittest.TestCase, VirialTest):
-    def setUp(self):
+    def setUp(self) -> None:
         model_params = copy.deepcopy(model_dpa2)
         self.type_split = True
         self.model = get_model(model_params).to(env.DEVICE)
@@ -222,7 +221,7 @@ def setUp(self):
 
 @unittest.skip("Skip for not implemented yet")
 class TestEnergyModelHybridForce(unittest.TestCase, ForceTest):
-    def setUp(self):
+    def setUp(self) -> None:
         model_params = copy.deepcopy(model_hybrid)
         self.type_split = True
         self.model = get_model(model_params).to(env.DEVICE)
@@ -230,7 +229,7 @@ def setUp(self):
 
 @unittest.skip("Skip for not implemented yet")
 class TestEnergyModelHybridVirial(unittest.TestCase, VirialTest):
-    def setUp(self):
+    def setUp(self) -> None:
         model_params = copy.deepcopy(model_hybrid)
         self.type_split = True
         self.model = get_model(model_params).to(env.DEVICE)
@@ -238,7 +237,7 @@ def setUp(self):
 
 @unittest.skip("Skip for not implemented yet")
 class TestEnergyModelZBLForce(unittest.TestCase, ForceTest):
-    def setUp(self):
+    def setUp(self) -> None:
         model_params = copy.deepcopy(model_zbl)
         self.type_split = False
         self.model = get_model(model_params).to(env.DEVICE)
@@ -246,7 +245,7 @@ def setUp(self):
 
 @unittest.skip("Skip for not implemented yet")
 class TestEnergyModelZBLVirial(unittest.TestCase, VirialTest):
-    def setUp(self):
+    def setUp(self) -> None:
         model_params = copy.deepcopy(model_zbl)
         self.type_split = False
         self.model = get_model(model_params).to(env.DEVICE)
@@ -254,7 +253,7 @@ def setUp(self):
 
 @unittest.skip("Skip for not implemented yet")
 class TestEnergyModelSpinSeAForce(unittest.TestCase, ForceTest):
-    def setUp(self):
+    def setUp(self) -> None:
         model_params = copy.deepcopy(model_spin)
         self.type_split = False
         self.test_spin = True
diff --git a/source/tests/pd/model/test_descriptor_dpa2.py b/source/tests/pd/model/test_descriptor_dpa2.py
new file mode 100644
index 0000000000..12017bb840
--- /dev/null
+++ b/source/tests/pd/model/test_descriptor_dpa2.py
@@ -0,0 +1,208 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import unittest
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.pd.model.descriptor import (
+    DescrptDPA2,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.nlist import (
+    extend_input_and_build_neighbor_list,
+)
+
+CUR_DIR = os.path.dirname(__file__)
+
+
+class TestDPA2(unittest.TestCase):
+    def setUp(self):
+        cell = [
+            5.122106549439247480e00,
+            4.016537340154059388e-01,
+            6.951654033828678081e-01,
+            4.016537340154059388e-01,
+            6.112136112297989143e00,
+            8.178091365465004481e-01,
+            6.951654033828678081e-01,
+            8.178091365465004481e-01,
+            6.159552512682983760e00,
+        ]
+        self.cell = (
+            paddle.to_tensor(cell, dtype=env.GLOBAL_PD_FLOAT_PRECISION)
+            .reshape([1, 3, 3])
+            .to(device=env.DEVICE)
+        )
+        coord = [
+            2.978060152121375648e00,
+            3.588469695887098077e00,
+            2.792459820604495491e00,
+            3.895592322591093115e00,
+            2.712091020667753760e00,
+            1.366836847133650501e00,
+            9.955616170888935690e-01,
+            4.121324820711413039e00,
+            1.817239061889086571e00,
+            3.553661462345699906e00,
+            5.313046969500791583e00,
+            6.635182659098815883e00,
+            6.088601018589653080e00,
+            6.575011420004332585e00,
+            6.825240650611076099e00,
+        ]
+        self.coord = (
+            paddle.to_tensor(coord, dtype=env.GLOBAL_PD_FLOAT_PRECISION)
+            .reshape([1, -1, 3])
+            .to(device=env.DEVICE)
+        )
+        self.atype = (
+            paddle.to_tensor([0, 0, 0, 1, 1], dtype=paddle.int32)
+            .reshape([1, -1])
+            .to(device=env.DEVICE)
+        )
+        self.ref_d = paddle.to_tensor(
+            [
+                8.435412613327306630e-01,
+                -4.717109614540972440e-01,
+                -1.812643456954206256e00,
+                -2.315248767961955167e-01,
+                -7.112973006771171613e-01,
+                -4.162041919507591392e-01,
+                -1.505159810095323181e00,
+                -1.191652416985768403e-01,
+                8.439214937875325617e-01,
+                -4.712976890460106594e-01,
+                -1.812605149396642856e00,
+                -2.307222236291133766e-01,
+                -7.115427800870099961e-01,
+                -4.164729253167227530e-01,
+                -1.505483119125936797e00,
+                -1.191288524278367872e-01,
+                8.286420823261241297e-01,
+                -4.535033763979030574e-01,
+                -1.787877160970498425e00,
+                -1.961763875645104460e-01,
+                -7.475459187804838201e-01,
+                -5.231446874663764346e-01,
+                -1.488399984491664219e00,
+                -3.974117581747104583e-02,
+                8.283793431613817315e-01,
+                -4.551551577556525729e-01,
+                -1.789253136645859943e00,
+                -1.977673627726055372e-01,
+                -7.448826048241211639e-01,
+                -5.161350182531234676e-01,
+                -1.487589463573479209e00,
+                -4.377376017839779143e-02,
+                8.295404560710329944e-01,
+                -4.492219258475603216e-01,
+                -1.784484611185287450e00,
+                -1.901182059718481143e-01,
+                -7.537407667483000395e-01,
+                -5.384371277650709109e-01,
+                -1.490368056268364549e00,
+                -3.073744832541754762e-02,
+            ],
+            dtype=env.GLOBAL_PD_FLOAT_PRECISION,
+            place=env.DEVICE,
+        )
+        self.file_model_param = Path(CUR_DIR) / "models" / "dpa2.pd"
+        self.file_type_embed = Path(CUR_DIR) / "models" / "dpa2_tebd.pd"
+
+    def test_descriptor(self) -> None:
+        with open(Path(CUR_DIR) / "models" / "dpa2.json") as fp:
+            self.model_json = json.load(fp)
+        model_dpa2 = self.model_json
+        ntypes = len(model_dpa2["type_map"])
+        dparams = model_dpa2["descriptor"]
+        dparams["ntypes"] = ntypes
+        assert dparams["type"] == "dpa2"
+        dparams.pop("type")
+        dparams["concat_output_tebd"] = False
+        dparams["use_tebd_bias"] = True
+        des = DescrptDPA2(
+            **dparams,
+        ).to(env.DEVICE)
+        target_dict = des.state_dict()
+        source_dict = paddle.load(str(self.file_model_param))
+        # type_embd of repformer is removed
+        source_dict.pop("type_embedding.embedding.embedding_net.layers.0.bias")
+        type_embd_dict = paddle.load(str(self.file_type_embed))
+        target_dict = translate_type_embd_dicts_to_dpa2(
+            target_dict,
+            source_dict,
+            type_embd_dict,
+        )
+        des.set_state_dict(target_dict)
+
+        coord = self.coord
+        atype = self.atype
+        box = self.cell
+        (
+            extended_coord,
+            extended_atype,
+            mapping,
+            nlist,
+        ) = extend_input_and_build_neighbor_list(
+            coord,
+            atype,
+            des.get_rcut(),
+            des.get_sel(),
+            mixed_types=des.mixed_types(),
+            box=box,
+        )
+        descriptor, env_mat, diff, rot_mat, sw = des(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+        )
+        self.assertEqual(descriptor.shape[-1], des.get_dim_out())
+        self.assertAlmostEqual(6.0, des.get_rcut())
+        self.assertEqual(30, des.get_nsel())
+        self.assertEqual(2, des.get_ntypes())
+        np.testing.assert_allclose(
+            descriptor.reshape([-1]).numpy(), self.ref_d.numpy(), atol=1e-10, rtol=1e-10
+        )
+
+        dparams["concat_output_tebd"] = True
+        des = DescrptDPA2(
+            **dparams,
+        ).to(env.DEVICE)
+        descriptor, env_mat, diff, rot_mat, sw = des(
+            extended_coord,
+            extended_atype,
+            nlist,
+            mapping=mapping,
+        )
+        self.assertEqual(descriptor.shape[-1], des.get_dim_out())
+
+
+def translate_type_embd_dicts_to_dpa2(
+    target_dict,
+    source_dict,
+    type_embd_dict,
+):
+    all_keys = list(target_dict.keys())
+    record = [False for ii in all_keys]
+    for kk, vv in source_dict.items():
+        record[all_keys.index(kk)] = True
+        target_dict[kk] = vv
+    assert len(type_embd_dict.keys()) == 2
+    it = iter(type_embd_dict.keys())
+    for _ in range(2):
+        kk = next(it)
+        tk = "type_embedding." + kk
+        record[all_keys.index(tk)] = True
+        target_dict[tk] = type_embd_dict[kk]
+    record[all_keys.index("repinit.compress_data.0")] = True
+    record[all_keys.index("repinit.compress_info.0")] = True
+    assert all(record)
+    return target_dict
diff --git a/source/tests/pd/model/test_dpa2.py b/source/tests/pd/model/test_dpa2.py
new file mode 100644
index 0000000000..f441007cad
--- /dev/null
+++ b/source/tests/pd/model/test_dpa2.py
@@ -0,0 +1,333 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import itertools
+import unittest
+
+import numpy as np
+import paddle
+
+from deepmd.dpmodel.descriptor.dpa2 import DescrptDPA2 as DPDescrptDPA2
+from deepmd.dpmodel.descriptor.dpa2 import (
+    RepformerArgs,
+    RepinitArgs,
+)
+from deepmd.pd.model.descriptor.dpa2 import (
+    DescrptDPA2,
+)
+from deepmd.pd.utils import (
+    env,
+)
+from deepmd.pd.utils.env import (
+    PRECISION_DICT,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from .test_env_mat import (
+    TestCaseSingleFrameWithNlist,
+)
+from .test_mlp import (
+    get_tols,
+)
+
+dtype = env.GLOBAL_PD_FLOAT_PRECISION
+
+
+class TestDescrptDPA2(unittest.TestCase, TestCaseSingleFrameWithNlist):
+    def setUp(self) -> None:
+        TestCaseSingleFrameWithNlist.setUp(self)
+
+    def test_consistency(
+        self,
+    ) -> None:
+        rng = np.random.default_rng(100)
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        davg_2 = rng.normal(size=(self.nt, nnei // 2, 4))
+        dstd_2 = rng.normal(size=(self.nt, nnei // 2, 4))
+        dstd = 0.1 + np.abs(dstd)
+        dstd_2 = 0.1 + np.abs(dstd_2)
+
+        for (
+            riti,
+            riz,
+            rp1c,
+            rp1d,
+            rp1g,
+            rp1a,
+            rp2g,
+            rp2a,
+            rph,
+            rp2gate,
+            rus,
+            rpz,
+            sm,
+            prec,
+            ect,
+            ns,
+        ) in itertools.product(
+            ["concat", "strip"],  # repinit_tebd_input_mode
+            [
+                True,
+            ],  # repinit_set_davg_zero
+            [True, False],  # repformer_update_g1_has_conv
+            [True, False],  # repformer_update_g1_has_drrd
+            [True, False],  # repformer_update_g1_has_grrg
+            [
+                False,
+            ],  # repformer_update_g1_has_attn
+            [
+                False,
+            ],  # repformer_update_g2_has_g1g1
+            [True, False],  # repformer_update_g2_has_attn
+            [
+                False,
+            ],  # repformer_update_h2
+            [
+                True,
+            ],  # repformer_attn2_has_gate
+            ["res_avg", "res_residual"],  # repformer_update_style
+            [
+                True,
+            ],  # repformer_set_davg_zero
+            [
+                True,
+            ],  # smooth
+            ["float64"],  # precision
+            [False, True],  # use_econf_tebd
+            [
+                False,
+                True,
+            ],  # new sub-structures (use_sqrt_nnei, g1_out_conv, g1_out_mlp)
+        ):
+            if ns and not rp1d and not rp1g:
+                continue
+            dtype = PRECISION_DICT[prec]
+            rtol, atol = get_tols(prec)
+            if prec == "float64":
+                atol = 1e-8  # marginal GPU test cases...
+
+            repinit = RepinitArgs(
+                rcut=self.rcut,
+                rcut_smth=self.rcut_smth,
+                nsel=self.sel_mix,
+                tebd_input_mode=riti,
+                set_davg_zero=riz,
+            )
+            repformer = RepformerArgs(
+                rcut=self.rcut / 2,
+                rcut_smth=self.rcut_smth,
+                nsel=nnei // 2,
+                nlayers=3,
+                g1_dim=20,
+                g2_dim=10,
+                axis_neuron=4,
+                update_g1_has_conv=rp1c,
+                update_g1_has_drrd=rp1d,
+                update_g1_has_grrg=rp1g,
+                update_g1_has_attn=rp1a,
+                update_g2_has_g1g1=rp2g,
+                update_g2_has_attn=rp2a,
+                update_h2=rph,
+                attn1_hidden=20,
+                attn1_nhead=2,
+                attn2_hidden=10,
+                attn2_nhead=2,
+                attn2_has_gate=rp2gate,
+                update_style=rus,
+                set_davg_zero=rpz,
+                use_sqrt_nnei=ns,
+                g1_out_conv=ns,
+                g1_out_mlp=ns,
+            )
+
+            # dpa2 new impl
+            dd0 = DescrptDPA2(
+                self.nt,
+                repinit=repinit,
+                repformer=repformer,
+                # kwargs for descriptor
+                smooth=sm,
+                exclude_types=[],
+                add_tebd_to_repinit_out=False,
+                precision=prec,
+                use_econf_tebd=ect,
+                type_map=["O", "H"] if ect else None,
+                seed=GLOBAL_SEED,
+            ).to(env.DEVICE)
+
+            dd0.repinit.mean = paddle.to_tensor(davg, dtype=dtype).to(device=env.DEVICE)
+            dd0.repinit.stddev = paddle.to_tensor(dstd, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            dd0.repformers.mean = paddle.to_tensor(davg_2, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            dd0.repformers.stddev = paddle.to_tensor(dstd_2, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            rd0, _, _, _, _ = dd0(
+                paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+                paddle.to_tensor(self.atype_ext, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.mapping, dtype="int64").to(device=env.DEVICE),
+            )
+            # serialization
+            dd1 = DescrptDPA2.deserialize(dd0.serialize())
+            rd1, _, _, _, _ = dd1(
+                paddle.to_tensor(self.coord_ext, dtype=dtype).to(device=env.DEVICE),
+                paddle.to_tensor(self.atype_ext, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.nlist, dtype="int64").to(device=env.DEVICE),
+                paddle.to_tensor(self.mapping, dtype="int64").to(device=env.DEVICE),
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy(),
+                rd1.detach().cpu().numpy(),
+                rtol=rtol,
+                atol=atol,
+            )
+            # dp impl
+            dd2 = DPDescrptDPA2.deserialize(dd0.serialize())
+            rd2, _, _, _, _ = dd2.call(
+                self.coord_ext, self.atype_ext, self.nlist, self.mapping
+            )
+            np.testing.assert_allclose(
+                rd0.detach().cpu().numpy(),
+                rd2,
+                rtol=rtol,
+                atol=atol,
+            )
+
+    @unittest.skip("skip jit in paddle temporally")
+    def test_jit(
+        self,
+    ) -> None:
+        rng = np.random.default_rng(100)
+        nf, nloc, nnei = self.nlist.shape
+        davg = rng.normal(size=(self.nt, nnei, 4))
+        dstd = rng.normal(size=(self.nt, nnei, 4))
+        davg_2 = rng.normal(size=(self.nt, nnei // 2, 4))
+        dstd_2 = rng.normal(size=(self.nt, nnei // 2, 4))
+        dstd = 0.1 + np.abs(dstd)
+
+        for (
+            riti,
+            riz,
+            rp1c,
+            rp1d,
+            rp1g,
+            rp1a,
+            rp2g,
+            rp2a,
+            rph,
+            rp2gate,
+            rus,
+            rpz,
+            sm,
+            prec,
+            ect,
+            ns,
+        ) in itertools.product(
+            ["concat", "strip"],  # repinit_tebd_input_mode
+            [
+                True,
+            ],  # repinit_set_davg_zero
+            [
+                True,
+            ],  # repformer_update_g1_has_conv
+            [
+                True,
+            ],  # repformer_update_g1_has_drrd
+            [
+                True,
+            ],  # repformer_update_g1_has_grrg
+            [
+                True,
+            ],  # repformer_update_g1_has_attn
+            [
+                True,
+            ],  # repformer_update_g2_has_g1g1
+            [
+                True,
+            ],  # repformer_update_g2_has_attn
+            [
+                False,
+            ],  # repformer_update_h2
+            [
+                True,
+            ],  # repformer_attn2_has_gate
+            ["res_avg", "res_residual"],  # repformer_update_style
+            [
+                True,
+            ],  # repformer_set_davg_zero
+            [
+                True,
+            ],  # smooth
+            ["float64"],  # precision
+            [False, True],  # use_econf_tebd
+            [True],  # new sub-structures (use_sqrt_nnei, g1_out_conv, g1_out_mlp)
+        ):
+            dtype = PRECISION_DICT[prec]
+            rtol, atol = get_tols(prec)
+
+            repinit = RepinitArgs(
+                rcut=self.rcut,
+                rcut_smth=self.rcut_smth,
+                nsel=self.sel_mix,
+                tebd_input_mode=riti,
+                set_davg_zero=riz,
+            )
+            repformer = RepformerArgs(
+                rcut=self.rcut / 2,
+                rcut_smth=self.rcut_smth,
+                nsel=nnei // 2,
+                nlayers=3,
+                g1_dim=20,
+                g2_dim=10,
+                axis_neuron=4,
+                update_g1_has_conv=rp1c,
+                update_g1_has_drrd=rp1d,
+                update_g1_has_grrg=rp1g,
+                update_g1_has_attn=rp1a,
+                update_g2_has_g1g1=rp2g,
+                update_g2_has_attn=rp2a,
+                update_h2=rph,
+                attn1_hidden=20,
+                attn1_nhead=2,
+                attn2_hidden=10,
+                attn2_nhead=2,
+                attn2_has_gate=rp2gate,
+                update_style=rus,
+                set_davg_zero=rpz,
+                use_sqrt_nnei=ns,
+                g1_out_conv=ns,
+                g1_out_mlp=ns,
+            )
+
+            # dpa2 new impl
+            dd0 = DescrptDPA2(
+                self.nt,
+                repinit=repinit,
+                repformer=repformer,
+                # kwargs for descriptor
+                smooth=sm,
+                exclude_types=[],
+                add_tebd_to_repinit_out=False,
+                precision=prec,
+                use_econf_tebd=ect,
+                type_map=["O", "H"] if ect else None,
+                seed=GLOBAL_SEED,
+            ).to(env.DEVICE)
+
+            dd0.repinit.mean = paddle.to_tensor(davg, dtype=dtype).to(device=env.DEVICE)
+            dd0.repinit.stddev = paddle.to_tensor(dstd, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            dd0.repformers.mean = paddle.to_tensor(davg_2, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            dd0.repformers.stddev = paddle.to_tensor(dstd_2, dtype=dtype).to(
+                device=env.DEVICE
+            )
+            model = paddle.jit.to_static(dd0)
diff --git a/source/tests/pd/model/test_forward_lower.py b/source/tests/pd/model/test_forward_lower.py
index db6497b605..1d924e2d3d 100644
--- a/source/tests/pd/model/test_forward_lower.py
+++ b/source/tests/pd/model/test_forward_lower.py
@@ -140,22 +140,21 @@ def test(
 
 
 class TestEnergyModelSeA(unittest.TestCase, ForwardLowerTest):
-    def setUp(self):
+    def setUp(self) -> None:
         self.prec = 1e-10
         model_params = copy.deepcopy(model_se_e2_a)
         self.model = get_model(model_params).to(env.DEVICE)
 
 
 class TestEnergyModelDPA1(unittest.TestCase, ForwardLowerTest):
-    def setUp(self):
+    def setUp(self) -> None:
         self.prec = 1e-10
         model_params = copy.deepcopy(model_dpa1)
         self.model = get_model(model_params).to(env.DEVICE)
 
 
-@unittest.skip("Skip for not implemented yet")
 class TestEnergyModelDPA2(unittest.TestCase, ForwardLowerTest):
-    def setUp(self):
+    def setUp(self) -> None:
         self.prec = 1e-10
         model_params = copy.deepcopy(model_dpa2)
         self.model = get_model(model_params).to(env.DEVICE)
@@ -163,7 +162,7 @@ def setUp(self):
 
 @unittest.skip("Skip for not implemented yet")
 class TestEnergyModelZBL(unittest.TestCase, ForwardLowerTest):
-    def setUp(self):
+    def setUp(self) -> None:
         self.prec = 1e-10
         model_params = copy.deepcopy(model_zbl)
         self.model = get_model(model_params).to(env.DEVICE)
@@ -171,7 +170,7 @@ def setUp(self):
 
 @unittest.skip("Skip for not implemented yet")
 class TestEnergyModelSpinSeA(unittest.TestCase, ForwardLowerTest):
-    def setUp(self):
+    def setUp(self) -> None:
         self.prec = 1e-10
         model_params = copy.deepcopy(model_spin)
         self.test_spin = True
@@ -180,7 +179,7 @@ def setUp(self):
 
 @unittest.skip("Skip for not implemented yet")
 class TestEnergyModelSpinDPA1(unittest.TestCase, ForwardLowerTest):
-    def setUp(self):
+    def setUp(self) -> None:
         self.prec = 1e-10
         model_params = copy.deepcopy(model_spin)
         model_params["descriptor"] = copy.deepcopy(model_dpa1)["descriptor"]
@@ -192,7 +191,7 @@ def setUp(self):
 
 @unittest.skip("Skip for not implemented yet")
 class TestEnergyModelSpinDPA2(unittest.TestCase, ForwardLowerTest):
-    def setUp(self):
+    def setUp(self) -> None:
         self.prec = 1e-10
         model_params = copy.deepcopy(model_spin)
         model_params["descriptor"] = copy.deepcopy(model_dpa2)["descriptor"]
diff --git a/source/tests/pd/model/test_null_input.py b/source/tests/pd/model/test_null_input.py
index 5d67491943..29d2f84eea 100644
--- a/source/tests/pd/model/test_null_input.py
+++ b/source/tests/pd/model/test_null_input.py
@@ -23,6 +23,7 @@
 )
 from .test_permutation import (
     model_dpa1,
+    model_dpa2,
     model_se_e2_a,
 )
 
@@ -32,7 +33,7 @@
 class NullTest:
     def test_nloc_1(
         self,
-    ):
+    ) -> None:
         natoms = 1
         generator = paddle.seed(GLOBAL_SEED)
         # paddle.seed(1000)
@@ -60,7 +61,7 @@ def test_nloc_1(
 
     def test_nloc_2_far(
         self,
-    ):
+    ) -> None:
         natoms = 2
         generator = paddle.seed(GLOBAL_SEED)
         cell = paddle.rand([3, 3], dtype=dtype).to(device=env.DEVICE)
@@ -100,3 +101,10 @@ def setUp(self):
         model_params = copy.deepcopy(model_dpa1)
         self.type_split = True
         self.model = get_model(model_params).to(env.DEVICE)
+
+
+class TestEnergyModelDPA2(unittest.TestCase, NullTest):
+    def setUp(self) -> None:
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
diff --git a/source/tests/pd/model/test_permutation.py b/source/tests/pd/model/test_permutation.py
index 4543348d3b..88672457a9 100644
--- a/source/tests/pd/model/test_permutation.py
+++ b/source/tests/pd/model/test_permutation.py
@@ -331,10 +331,10 @@
     },
     "fitting_net": {
         "type": "property",
+        "property_name": "band_property",
         "task_dim": 3,
         "neuron": [24, 24, 24],
         "resnet_dt": True,
-        "bias_method": "normal",
         "intensive": True,
         "seed": 1,
     },
@@ -416,7 +416,6 @@ def setUp(self) -> None:
         self.model = get_model(model_params).to(env.DEVICE)
 
 
-@unittest.skip("Skip for not implemented yet")
 class TestEnergyModelDPA2(unittest.TestCase, PermutationTest):
     def setUp(self) -> None:
         model_params = copy.deepcopy(model_dpa2)
diff --git a/source/tests/pd/model/test_rot.py b/source/tests/pd/model/test_rot.py
index 85c90dc60f..84a0d3d724 100644
--- a/source/tests/pd/model/test_rot.py
+++ b/source/tests/pd/model/test_rot.py
@@ -176,7 +176,6 @@ def setUp(self):
         self.model = get_model(model_params).to(env.DEVICE)
 
 
-@unittest.skip("Skip for not implemented yet")
 class TestEnergyModelDPA2(unittest.TestCase, RotTest):
     def setUp(self):
         model_params = copy.deepcopy(model_dpa2)
diff --git a/source/tests/pd/model/test_rot_denoise.py b/source/tests/pd/model/test_rot_denoise.py
index 74d5d41791..4a1841d10b 100644
--- a/source/tests/pd/model/test_rot_denoise.py
+++ b/source/tests/pd/model/test_rot_denoise.py
@@ -18,8 +18,9 @@
 from ..common import (
     eval_model,
 )
-from .test_permutation_denoise import (  # model_dpa2,
+from .test_permutation_denoise import (
     model_dpa1,
+    model_dpa2,
 )
 
 dtype = paddle.float64
@@ -112,6 +113,14 @@ def setUp(self):
         self.model = get_model(model_params).to(env.DEVICE)
 
 
+@unittest.skip("support of the denoise is temporally disabled")
+class TestDenoiseModelDPA2(unittest.TestCase, RotDenoiseTest):
+    def setUp(self) -> None:
+        model_params = copy.deepcopy(model_dpa2)
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+
+
 # @unittest.skip("hybrid not supported at the moment")
 # class TestEnergyModelHybrid(unittest.TestCase, TestRotDenoise):
 #     def setUp(self):
diff --git a/source/tests/pd/model/test_smooth.py b/source/tests/pd/model/test_smooth.py
index cc50043ad8..f907e6f4ee 100644
--- a/source/tests/pd/model/test_smooth.py
+++ b/source/tests/pd/model/test_smooth.py
@@ -20,6 +20,7 @@
 )
 from .test_permutation import (  # model_dpau,
     model_dpa1,
+    model_dpa2,
     model_se_e2_a,
 )
 
@@ -189,6 +190,36 @@ def setUp(self):
         self.aprec = 1e-5
 
 
+class TestEnergyModelDPA2(unittest.TestCase, SmoothTest):
+    def setUp(self) -> None:
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["descriptor"]["repinit"]["rcut"] = 8
+        model_params["descriptor"]["repinit"]["rcut_smth"] = 3.5
+        self.type_split = True
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = 1e-5, 1e-4
+
+
+class TestEnergyModelDPA2_1(unittest.TestCase, SmoothTest):
+    def setUp(self) -> None:
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["fitting_net"]["type"] = "ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+
+
+class TestEnergyModelDPA2_2(unittest.TestCase, SmoothTest):
+    def setUp(self) -> None:
+        model_params = copy.deepcopy(model_dpa2)
+        model_params["fitting_net"]["type"] = "ener"
+        self.type_split = True
+        self.test_virial = False
+        self.model = get_model(model_params).to(env.DEVICE)
+        self.epsilon, self.aprec = None, None
+
+
 # class TestEnergyFoo(unittest.TestCase):
 #   def test(self):
 #     model_params = model_dpau
diff --git a/source/tests/pd/model/test_trans.py b/source/tests/pd/model/test_trans.py
index 3fae49d598..f050596996 100644
--- a/source/tests/pd/model/test_trans.py
+++ b/source/tests/pd/model/test_trans.py
@@ -110,7 +110,6 @@ def setUp(self):
         self.model = get_model(model_params).to(env.DEVICE)
 
 
-@unittest.skip("Skip for not implemented yet")
 class TestEnergyModelDPA2(unittest.TestCase, TransTest):
     def setUp(self):
         model_params = copy.deepcopy(model_dpa2)
diff --git a/source/tests/pd/model/test_unused_params.py b/source/tests/pd/model/test_unused_params.py
new file mode 100644
index 0000000000..bf92171da1
--- /dev/null
+++ b/source/tests/pd/model/test_unused_params.py
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import copy
+import unittest
+
+import paddle
+
+from deepmd.pd.model.model import (
+    get_model,
+)
+from deepmd.pd.utils import (
+    env,
+)
+
+from ...seed import (
+    GLOBAL_SEED,
+)
+from ..common import (
+    eval_model,
+)
+from .test_permutation import (
+    model_dpa2,
+)
+
+dtype = paddle.float64
+
+
+@unittest.skip("paddle do not support unpacking grad_fn.next_functions")
+class TestUnusedParamsDPA2(unittest.TestCase):
+    def test_unused(self):
+        import itertools
+
+        for conv, drrd, grrg, attn1, g1g1, attn2, h2 in itertools.product(
+            [True],
+            [True],
+            [True],
+            [True],
+            [True],
+            [True],
+            [True],
+        ):
+            if (not drrd) and (not grrg) and h2:
+                # skip the case h2 is not envolved
+                continue
+            if (not grrg) and (not conv):
+                # skip the case g2 is not envolved
+                continue
+            model = copy.deepcopy(model_dpa2)
+            model["descriptor"]["repformer"]["nlayers"] = 2
+            # model["descriptor"]["combine_grrg"] = cmbg2
+            model["descriptor"]["repformer"]["update_g1_has_conv"] = conv
+            model["descriptor"]["repformer"]["update_g1_has_drrd"] = drrd
+            model["descriptor"]["repformer"]["update_g1_has_grrg"] = grrg
+            model["descriptor"]["repformer"]["update_g1_has_attn"] = attn1
+            model["descriptor"]["repformer"]["update_g2_has_g1g1"] = g1g1
+            model["descriptor"]["repformer"]["update_g2_has_attn"] = attn2
+            model["descriptor"]["repformer"]["update_h2"] = h2
+            model["fitting_net"]["neuron"] = [12, 12, 12]
+            self._test_unused(model)
+
+    def _test_unused(self, model_params):
+        self.model = get_model(model_params).to(env.DEVICE)
+        natoms = 5
+        generator = paddle.seed(GLOBAL_SEED)
+        cell = paddle.rand([3, 3], dtype=dtype).to(device=env.DEVICE)
+        cell = (cell + cell.T) + 5.0 * paddle.eye(3).to(device=env.DEVICE)
+        coord = paddle.rand([natoms, 3], dtype=dtype).to(device=env.DEVICE)
+        coord = paddle.matmul(coord, cell)
+        atype = paddle.to_tensor([0, 0, 0, 1, 1]).to(env.DEVICE)
+        idx_perm = [1, 0, 4, 3, 2]
+        result_0 = eval_model(self.model, coord.unsqueeze(0), cell.unsqueeze(0), atype)
+        test_keys = ["energy", "force", "virial"]
+        ret0 = {key: result_0[key].squeeze(0) for key in test_keys}
+
+        # use computation graph to find all contributing tensors
+        def get_contributing_params(y, top_level=True):
+            nf = y.grad_fn.next_functions if top_level else y.next_functions
+            for f, _ in nf:
+                try:
+                    yield f.variable
+                except AttributeError:
+                    pass  # node has no tensor
+                if f is not None:
+                    yield from get_contributing_params(f, top_level=False)
+
+        contributing_parameters = set(get_contributing_params(ret0["energy"]))
+        all_parameters = set(self.model.parameters())
+        non_contributing = all_parameters - contributing_parameters
+        self.assertEqual(len(non_contributing), 0)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/source/tests/pd/model/water/multitask.json b/source/tests/pd/model/water/multitask.json
index 83524a8b77..2786afca59 100644
--- a/source/tests/pd/model/water/multitask.json
+++ b/source/tests/pd/model/water/multitask.json
@@ -10,7 +10,8 @@
         "type": "se_e2_a",
         "sel": [
           46,
-          92
+          92,
+          4
         ],
         "rcut_smth": 0.50,
         "rcut": 6.00,
diff --git a/source/tests/pd/model/water/multitask_sharefit.json b/source/tests/pd/model/water/multitask_sharefit.json
index 246b5992f7..934ef04998 100644
--- a/source/tests/pd/model/water/multitask_sharefit.json
+++ b/source/tests/pd/model/water/multitask_sharefit.json
@@ -91,14 +91,14 @@
         "stat_file": "./stat_files/model_1.hdf5",
         "training_data": {
           "systems": [
-            "pt/water/data/data_0"
+            "pd/water/data/data_0"
           ],
           "batch_size": 1,
           "_comment": "that's all"
         },
         "validation_data": {
           "systems": [
-            "pt/water/data/data_0"
+            "pd/water/data/data_0"
           ],
           "batch_size": 1,
           "_comment": "that's all"
@@ -108,14 +108,14 @@
         "stat_file": "./stat_files/model_2.hdf5",
         "training_data": {
           "systems": [
-            "pt/water/data/data_0"
+            "pd/water/data/data_0"
           ],
           "batch_size": 1,
           "_comment": "that's all"
         },
         "validation_data": {
           "systems": [
-            "pt/water/data/data_0"
+            "pd/water/data/data_0"
           ],
           "batch_size": 1,
           "_comment": "that's all"
diff --git a/source/tests/pd/test_finetune.py b/source/tests/pd/test_finetune.py
index f82f7a8cd0..769ea6f6d3 100644
--- a/source/tests/pd/test_finetune.py
+++ b/source/tests/pd/test_finetune.py
@@ -197,7 +197,7 @@ def test_finetune_change_out_bias(self):
 
         self.tearDown()
 
-    def test_finetune_change_type(self):
+    def test_finetune_change_type(self) -> None:
         if not self.mixed_types:
             # skip when not mixed_types
             return
@@ -284,7 +284,7 @@ def test_finetune_change_type(self):
 
             self.tearDown()
 
-    def tearDown(self):
+    def tearDown(self) -> None:
         for f in os.listdir("."):
             if f.startswith("model") and f.endswith(".pd"):
                 os.remove(f)
@@ -295,7 +295,7 @@ def tearDown(self):
 
 
 class TestEnergyModelSeA(FinetuneTest, unittest.TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         input_json = str(Path(__file__).parent / "water/se_atten.json")
         with open(input_json) as f:
             self.config = json.load(f)
@@ -311,7 +311,7 @@ def setUp(self):
 
 @unittest.skip("Skip for not implemented yet")
 class TestEnergyZBLModelSeA(FinetuneTest, unittest.TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         input_json = str(Path(__file__).parent / "water/se_atten.json")
         with open(input_json) as f:
             self.config = json.load(f)
@@ -327,7 +327,7 @@ def setUp(self):
 
 @unittest.skip("Skip for not implemented yet")
 class TestEnergyDOSModelSeA(FinetuneTest, unittest.TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         input_json = str(Path(__file__).parent / "dos/input.json")
         with open(input_json) as f:
             self.config = json.load(f)
@@ -342,7 +342,7 @@ def setUp(self):
 
 
 class TestEnergyModelDPA1(FinetuneTest, unittest.TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         input_json = str(Path(__file__).parent / "water/se_atten.json")
         with open(input_json) as f:
             self.config = json.load(f)
@@ -356,9 +356,8 @@ def setUp(self):
         self.testkey = None
 
 
-@unittest.skip("Skip for not implemented yet")
 class TestEnergyModelDPA2(FinetuneTest, unittest.TestCase):
-    def setUp(self):
+    def setUp(self) -> None:
         input_json = str(Path(__file__).parent / "water/se_atten.json")
         with open(input_json) as f:
             self.config = json.load(f)
diff --git a/source/tests/pd/test_multitask.py b/source/tests/pd/test_multitask.py
index d59990dcca..72ad251068 100644
--- a/source/tests/pd/test_multitask.py
+++ b/source/tests/pd/test_multitask.py
@@ -30,6 +30,8 @@
 
 from .model.test_permutation import (
     model_dpa1,
+    model_dpa2,
+    model_dpa2tebd,
     model_se_e2_a,
 )
 
@@ -40,6 +42,13 @@ def setUpModule() -> None:
     with open(multitask_template_json) as f:
         multitask_template = json.load(f)
 
+    global multitask_sharefit_template
+    multitask_sharefit_template_json = str(
+        Path(__file__).parent / "water/multitask_sharefit.json"
+    )
+    with open(multitask_sharefit_template_json) as f:
+        multitask_sharefit_template = json.load(f)
+
 
 class MultiTaskTrainTest:
     def test_multitask_train(self) -> None:
@@ -227,6 +236,46 @@ def tearDown(self) -> None:
         MultiTaskTrainTest.tearDown(self)
 
 
+class TestMultiTaskSeASharefit(unittest.TestCase, MultiTaskTrainTest):
+    def setUp(self) -> None:
+        multitask_se_e2_a = deepcopy(multitask_sharefit_template)
+        multitask_se_e2_a["model"]["shared_dict"]["my_descriptor"] = model_se_e2_a[
+            "descriptor"
+        ]
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.stat_files = "se_e2_a_share_fit"
+        os.makedirs(self.stat_files, exist_ok=True)
+        self.config = multitask_se_e2_a
+        self.config["training"]["data_dict"]["model_1"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_1"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["stat_file"] = (
+            f"{self.stat_files}/model_1"
+        )
+        self.config["training"]["data_dict"]["model_2"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_2"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_2"]["stat_file"] = (
+            f"{self.stat_files}/model_2"
+        )
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.origin_config = deepcopy(self.config)
+        self.config["model"], self.shared_links = preprocess_shared_params(
+            self.config["model"]
+        )
+        self.share_fitting = True
+
+    def tearDown(self) -> None:
+        MultiTaskTrainTest.tearDown(self)
+
+
 class TestMultiTaskDPA1(unittest.TestCase, MultiTaskTrainTest):
     def setUp(self) -> None:
         multitask_DPA1 = deepcopy(multitask_template)
@@ -266,5 +315,83 @@ def tearDown(self) -> None:
         MultiTaskTrainTest.tearDown(self)
 
 
+class TestMultiTaskDPA2(unittest.TestCase, MultiTaskTrainTest):
+    def setUp(self) -> None:
+        multitask_DPA2 = deepcopy(multitask_template)
+        multitask_DPA2["model"]["shared_dict"]["my_descriptor"] = model_dpa2[
+            "descriptor"
+        ]
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.stat_files = "DPA2"
+        os.makedirs(self.stat_files, exist_ok=True)
+        self.config = multitask_DPA2
+        self.config["training"]["data_dict"]["model_1"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_1"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["stat_file"] = (
+            f"{self.stat_files}/model_1"
+        )
+        self.config["training"]["data_dict"]["model_2"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_2"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_2"]["stat_file"] = (
+            f"{self.stat_files}/model_2"
+        )
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.origin_config = deepcopy(self.config)
+        self.config["model"], self.shared_links = preprocess_shared_params(
+            self.config["model"]
+        )
+
+    def tearDown(self) -> None:
+        MultiTaskTrainTest.tearDown(self)
+
+
+class TestMultiTaskDPA2Tebd(unittest.TestCase, MultiTaskTrainTest):
+    def setUp(self) -> None:
+        multitask_DPA2 = deepcopy(multitask_template)
+        multitask_DPA2["model"]["shared_dict"]["my_descriptor"] = model_dpa2tebd[
+            "descriptor"
+        ]
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.stat_files = "DPA2Tebd"
+        os.makedirs(self.stat_files, exist_ok=True)
+        self.config = multitask_DPA2
+        self.config["training"]["data_dict"]["model_1"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_1"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_1"]["stat_file"] = (
+            f"{self.stat_files}/model_1"
+        )
+        self.config["training"]["data_dict"]["model_2"]["training_data"]["systems"] = (
+            data_file
+        )
+        self.config["training"]["data_dict"]["model_2"]["validation_data"][
+            "systems"
+        ] = data_file
+        self.config["training"]["data_dict"]["model_2"]["stat_file"] = (
+            f"{self.stat_files}/model_2"
+        )
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.origin_config = deepcopy(self.config)
+        self.config["model"], self.shared_links = preprocess_shared_params(
+            self.config["model"]
+        )
+
+    def tearDown(self) -> None:
+        MultiTaskTrainTest.tearDown(self)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/pd/test_training.py b/source/tests/pd/test_training.py
index c3d65c09df..8958dcb165 100644
--- a/source/tests/pd/test_training.py
+++ b/source/tests/pd/test_training.py
@@ -24,6 +24,7 @@
 
 from .model.test_permutation import (
     model_dpa1,
+    model_dpa2,
     model_se_e2_a,
 )
 
@@ -195,5 +196,21 @@ def tearDown(self) -> None:
         DPTrainTest.tearDown(self)
 
 
+class TestEnergyModelDPA2(unittest.TestCase, DPTrainTest):
+    def setUp(self) -> None:
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        data_file = [str(Path(__file__).parent / "water/data/data_0")]
+        self.config["training"]["training_data"]["systems"] = data_file
+        self.config["training"]["validation_data"]["systems"] = data_file
+        self.config["model"] = deepcopy(model_dpa2)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+
+    def tearDown(self) -> None:
+        DPTrainTest.tearDown(self)
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/source/tests/pd/test_update_sel.py b/source/tests/pd/test_update_sel.py
index e7b1acf6ff..10342357c6 100644
--- a/source/tests/pd/test_update_sel.py
+++ b/source/tests/pd/test_update_sel.py
@@ -31,7 +31,7 @@ def setUp(self) -> None:
         return super().setUp()
 
     @patch("deepmd.pd.utils.update_sel.UpdateSel.get_nbor_stat")
-    def test_update_one_sel(self, sel_mock):
+    def test_update_one_sel(self, sel_mock) -> None:
         sel_mock.return_value = self.mock_min_nbor_dist, [10, 20]
 
         min_nbor_dist, sel = self.update_sel.update_one_sel(None, None, 6, "auto")
@@ -45,7 +45,7 @@ def test_update_one_sel(self, sel_mock):
 
     @unittest.skip("Skip for not implemented yet")
     @patch("deepmd.pd.utils.update_sel.UpdateSel.get_nbor_stat")
-    def test_update_sel_hybrid(self, sel_mock):
+    def test_update_sel_hybrid(self, sel_mock) -> None:
         sel_mock.return_value = self.mock_min_nbor_dist, [10, 20]
 
         jdata = {
@@ -76,7 +76,7 @@ def test_update_sel_hybrid(self, sel_mock):
         self.assertEqual(jdata, expected_out)
 
     @patch("deepmd.pd.utils.update_sel.UpdateSel.get_nbor_stat")
-    def test_update_sel(self, sel_mock):
+    def test_update_sel(self, sel_mock) -> None:
         sel_mock.return_value = self.mock_min_nbor_dist, [10, 20]
 
         jdata = {
@@ -90,9 +90,8 @@ def test_update_sel(self, sel_mock):
         jdata = update_sel(jdata)
         self.assertEqual(jdata, expected_out)
 
-    @unittest.skip("Skip for not implemented yet")
     @patch("deepmd.pd.utils.update_sel.UpdateSel.get_nbor_stat")
-    def test_update_sel_atten_auto(self, sel_mock):
+    def test_update_sel_atten_auto(self, sel_mock) -> None:
         sel_mock.return_value = self.mock_min_nbor_dist, [25]
 
         jdata = {
@@ -118,9 +117,8 @@ def test_update_sel_atten_auto(self, sel_mock):
         jdata = update_sel(jdata)
         self.assertEqual(jdata, expected_out)
 
-    @unittest.skip("Skip for not implemented yet")
     @patch("deepmd.pd.utils.update_sel.UpdateSel.get_nbor_stat")
-    def test_update_sel_atten_int(self, sel_mock):
+    def test_update_sel_atten_int(self, sel_mock) -> None:
         sel_mock.return_value = self.mock_min_nbor_dist, [25]
 
         jdata = {
@@ -146,9 +144,8 @@ def test_update_sel_atten_int(self, sel_mock):
         jdata = update_sel(jdata)
         self.assertEqual(jdata, expected_out)
 
-    @unittest.skip("Skip for not implemented yet")
     @patch("deepmd.pd.utils.update_sel.UpdateSel.get_nbor_stat")
-    def test_update_sel_atten_list(self, sel_mock):
+    def test_update_sel_atten_list(self, sel_mock) -> None:
         sel_mock.return_value = self.mock_min_nbor_dist, [25]
 
         jdata = {
@@ -174,7 +171,50 @@ def test_update_sel_atten_list(self, sel_mock):
         jdata = update_sel(jdata)
         self.assertEqual(jdata, expected_out)
 
-    def test_skip_frozen(self):
+    @patch("deepmd.pd.utils.update_sel.UpdateSel.get_nbor_stat")
+    def test_update_sel_dpa2_auto(self, sel_mock) -> None:
+        sel_mock.return_value = self.mock_min_nbor_dist, [25]
+
+        jdata = {
+            "model": {
+                "descriptor": {
+                    "type": "dpa2",
+                    "repinit": {
+                        "rcut": 6.0,
+                        "nsel": "auto",
+                        "three_body_rcut": 4.0,
+                        "three_body_sel": "auto",
+                    },
+                    "repformer": {
+                        "rcut": 4.0,
+                        "nsel": "auto",
+                    },
+                }
+            },
+            "training": {"training_data": {}},
+        }
+        expected_out = {
+            "model": {
+                "descriptor": {
+                    "type": "dpa2",
+                    "repinit": {
+                        "rcut": 6.0,
+                        "nsel": 28,
+                        "three_body_rcut": 4.0,
+                        "three_body_sel": 28,
+                    },
+                    "repformer": {
+                        "rcut": 4.0,
+                        "nsel": 28,
+                    },
+                }
+            },
+            "training": {"training_data": {}},
+        }
+        jdata = update_sel(jdata)
+        self.assertEqual(jdata, expected_out)
+
+    def test_skip_frozen(self) -> None:
         jdata = {
             "model": {
                 "type": "frozen",
@@ -185,7 +225,7 @@ def test_skip_frozen(self):
         jdata = update_sel(jdata)
         self.assertEqual(jdata, expected_out)
 
-    def test_wrap_up_4(self):
+    def test_wrap_up_4(self) -> None:
         self.assertEqual(self.update_sel.wrap_up_4(12), 3 * 4)
         self.assertEqual(self.update_sel.wrap_up_4(13), 4 * 4)
         self.assertEqual(self.update_sel.wrap_up_4(14), 4 * 4)
diff --git a/source/tests/pt/model/test_permutation.py b/source/tests/pt/model/test_permutation.py
index 5c7b8db9a4..e4eb47a540 100644
--- a/source/tests/pt/model/test_permutation.py
+++ b/source/tests/pt/model/test_permutation.py
@@ -331,9 +331,9 @@
     "fitting_net": {
         "type": "property",
         "task_dim": 3,
+        "property_name": "band_property",
         "neuron": [24, 24, 24],
         "resnet_dt": True,
-        "bias_method": "normal",
         "intensive": True,
         "seed": 1,
     },
diff --git a/source/tests/pt/model/test_property_fitting.py b/source/tests/pt/model/test_property_fitting.py
index 305d1be951..6825924bc1 100644
--- a/source/tests/pt/model/test_property_fitting.py
+++ b/source/tests/pt/model/test_property_fitting.py
@@ -61,7 +61,7 @@ def test_consistency(
             self.atype_ext[:, : self.nloc], dtype=int, device=env.DEVICE
         )
 
-        for nfp, nap, bias_atom_p, intensive, bias_method in itertools.product(
+        for nfp, nap, bias_atom_p, intensive in itertools.product(
             [0, 3],
             [0, 4],
             [
@@ -69,18 +69,17 @@ def test_consistency(
                 np.array([[11, 12, 13, 4, 15], [16, 17, 18, 9, 20]]),
             ],
             [True, False],
-            ["normal", "no_bias"],
         ):
             ft0 = PropertyFittingNet(
                 self.nt,
                 self.dd0.dim_out,
                 task_dim=5,
+                property_name="foo",
                 numb_fparam=nfp,
                 numb_aparam=nap,
                 mixed_types=self.dd0.mixed_types(),
                 bias_atom_p=bias_atom_p,
                 intensive=intensive,
-                bias_method=bias_method,
                 seed=GLOBAL_SEED,
             ).to(env.DEVICE)
 
@@ -120,36 +119,35 @@ def test_consistency(
                 aparam=to_numpy_array(iap),
             )
             np.testing.assert_allclose(
-                to_numpy_array(ret0["property"]),
-                ret1["property"],
+                to_numpy_array(ret0[ft0.var_name]),
+                ret1[ft1.var_name],
             )
             np.testing.assert_allclose(
-                to_numpy_array(ret0["property"]),
-                to_numpy_array(ret2["property"]),
+                to_numpy_array(ret0[ft0.var_name]),
+                to_numpy_array(ret2[ft2.var_name]),
             )
             np.testing.assert_allclose(
-                to_numpy_array(ret0["property"]),
-                ret3["property"],
+                to_numpy_array(ret0[ft0.var_name]),
+                ret3[ft3.var_name],
             )
 
     def test_jit(
         self,
     ) -> None:
-        for nfp, nap, intensive, bias_method in itertools.product(
+        for nfp, nap, intensive in itertools.product(
             [0, 3],
             [0, 4],
             [True, False],
-            ["normal", "no_bias"],
         ):
             ft0 = PropertyFittingNet(
                 self.nt,
                 self.dd0.dim_out,
                 task_dim=5,
+                property_name="foo",
                 numb_fparam=nfp,
                 numb_aparam=nap,
                 mixed_types=self.dd0.mixed_types(),
                 intensive=intensive,
-                bias_method=bias_method,
                 seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             torch.jit.script(ft0)
@@ -201,6 +199,7 @@ def test_trans(self) -> None:
             self.nt,
             self.dd0.dim_out,
             task_dim=11,
+            property_name="bar",
             numb_fparam=0,
             numb_aparam=0,
             mixed_types=self.dd0.mixed_types(),
@@ -229,7 +228,7 @@ def test_trans(self) -> None:
             )
 
             ret0 = ft0(rd0, atype, gr0, fparam=None, aparam=None)
-            res.append(ret0["property"])
+            res.append(ret0[ft0.var_name])
 
         np.testing.assert_allclose(to_numpy_array(res[0]), to_numpy_array(res[1]))
 
@@ -257,21 +256,20 @@ def test_rot(self) -> None:
         # use larger cell to rotate only coord and shift to the center of cell
         cell_rot = 10.0 * torch.eye(3, dtype=dtype, device=env.DEVICE)
 
-        for nfp, nap, intensive, bias_method in itertools.product(
+        for nfp, nap, intensive in itertools.product(
             [0, 3],
             [0, 4],
             [True, False],
-            ["normal", "no_bias"],
         ):
             ft0 = PropertyFittingNet(
                 self.nt,
                 self.dd0.dim_out,  # dim_descrpt
-                task_dim=9,
+                task_dim=5,
+                property_name="bar",
                 numb_fparam=nfp,
                 numb_aparam=nap,
                 mixed_types=self.dd0.mixed_types(),
                 intensive=intensive,
-                bias_method=bias_method,
                 seed=GLOBAL_SEED,
             ).to(env.DEVICE)
             if nfp > 0:
@@ -312,7 +310,7 @@ def test_rot(self) -> None:
                 )
 
                 ret0 = ft0(rd0, atype, gr0, fparam=ifp, aparam=iap)
-                res.append(ret0["property"])
+                res.append(ret0[ft0.var_name])
             np.testing.assert_allclose(
                 to_numpy_array(res[1]),
                 to_numpy_array(res[0]),
@@ -324,6 +322,7 @@ def test_permu(self) -> None:
             self.nt,
             self.dd0.dim_out,
             task_dim=8,
+            property_name="abc",
             numb_fparam=0,
             numb_aparam=0,
             mixed_types=self.dd0.mixed_types(),
@@ -353,7 +352,7 @@ def test_permu(self) -> None:
             )
 
             ret0 = ft0(rd0, atype, gr0, fparam=None, aparam=None)
-            res.append(ret0["property"])
+            res.append(ret0[ft0.var_name])
 
         np.testing.assert_allclose(
             to_numpy_array(res[0][:, idx_perm]),
@@ -372,6 +371,7 @@ def test_trans(self) -> None:
             self.nt,
             self.dd0.dim_out,
             task_dim=11,
+            property_name="foo",
             numb_fparam=0,
             numb_aparam=0,
             mixed_types=self.dd0.mixed_types(),
@@ -400,7 +400,7 @@ def test_trans(self) -> None:
             )
 
             ret0 = ft0(rd0, atype, gr0, fparam=None, aparam=None)
-            res.append(ret0["property"])
+            res.append(ret0[ft0.var_name])
 
         np.testing.assert_allclose(to_numpy_array(res[0]), to_numpy_array(res[1]))
 
@@ -422,6 +422,7 @@ def setUp(self) -> None:
             self.nt,
             self.dd0.dim_out,
             task_dim=3,
+            property_name="bar",
             numb_fparam=0,
             numb_aparam=0,
             mixed_types=self.dd0.mixed_types(),
diff --git a/source/tests/pt/property/double/nopbc b/source/tests/pt/property/double/nopbc
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/source/tests/pt/property/double/set.000000/band_property.npy b/source/tests/pt/property/double/set.000000/band_property.npy
new file mode 100644
index 0000000000..042c1a8b0d
Binary files /dev/null and b/source/tests/pt/property/double/set.000000/band_property.npy differ
diff --git a/source/tests/pt/property/double/set.000000/coord.npy b/source/tests/pt/property/double/set.000000/coord.npy
new file mode 100644
index 0000000000..9c781a81f3
Binary files /dev/null and b/source/tests/pt/property/double/set.000000/coord.npy differ
diff --git a/source/tests/pt/property/double/set.000000/real_atom_types.npy b/source/tests/pt/property/double/set.000000/real_atom_types.npy
new file mode 100644
index 0000000000..3bfe0abd94
Binary files /dev/null and b/source/tests/pt/property/double/set.000000/real_atom_types.npy differ
diff --git a/source/tests/pt/property/double/type.raw b/source/tests/pt/property/double/type.raw
new file mode 100644
index 0000000000..d677b495ec
--- /dev/null
+++ b/source/tests/pt/property/double/type.raw
@@ -0,0 +1,20 @@
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
diff --git a/source/tests/pt/property/double/type_map.raw b/source/tests/pt/property/double/type_map.raw
new file mode 100644
index 0000000000..c8a39f3a9e
--- /dev/null
+++ b/source/tests/pt/property/double/type_map.raw
@@ -0,0 +1,4 @@
+H
+C
+N
+O
diff --git a/source/tests/pt/property/input.json b/source/tests/pt/property/input.json
index 4e005f8277..44bc1e6005 100644
--- a/source/tests/pt/property/input.json
+++ b/source/tests/pt/property/input.json
@@ -27,6 +27,7 @@
     "fitting_net": {
       "type": "property",
       "intensive": true,
+      "property_name": "band_property",
       "task_dim": 3,
       "neuron": [
         100,
diff --git a/source/tests/pt/property/single/set.000000/property.npy b/source/tests/pt/property/single/set.000000/band_property.npy
similarity index 100%
rename from source/tests/pt/property/single/set.000000/property.npy
rename to source/tests/pt/property/single/set.000000/band_property.npy
diff --git a/source/tests/pt/test_dp_test.py b/source/tests/pt/test_dp_test.py
index dbec472cc0..c2915c7ee7 100644
--- a/source/tests/pt/test_dp_test.py
+++ b/source/tests/pt/test_dp_test.py
@@ -183,7 +183,7 @@ def test_dp_test_1_frame(self) -> None:
         pred_property = np.loadtxt(self.detail_file + ".property.out.0")[:, 1]
         np.testing.assert_almost_equal(
             pred_property,
-            to_numpy_array(result["property"])[0],
+            to_numpy_array(result[model.get_var_name()])[0],
         )
 
     def tearDown(self) -> None:
diff --git a/source/tests/pt/test_training.py b/source/tests/pt/test_training.py
index 1fbd01c39f..ad52c5db16 100644
--- a/source/tests/pt/test_training.py
+++ b/source/tests/pt/test_training.py
@@ -464,7 +464,7 @@ def setUp(self) -> None:
         property_input = str(Path(__file__).parent / "property/input.json")
         with open(property_input) as f:
             self.config_property = json.load(f)
-        prop_data_file = [str(Path(__file__).parent / "property/single")]
+        prop_data_file = [str(Path(__file__).parent / "property/double")]
         self.config_property["training"]["training_data"]["systems"] = prop_data_file
         self.config_property["training"]["validation_data"]["systems"] = prop_data_file
         self.config_property["model"]["descriptor"] = deepcopy(model_dpa1["descriptor"])
diff --git a/source/tests/universal/common/cases/model/model.py b/source/tests/universal/common/cases/model/model.py
index cee69d9d6c..06ddd90970 100644
--- a/source/tests/universal/common/cases/model/model.py
+++ b/source/tests/universal/common/cases/model/model.py
@@ -165,7 +165,7 @@ def setUpClass(cls) -> None:
         cls.expected_dim_aparam = 0
         cls.expected_sel_type = [0, 1]
         cls.expected_aparam_nall = False
-        cls.expected_model_output_type = ["property", "mask"]
+        cls.expected_model_output_type = ["band_prop", "mask"]
         cls.model_output_equivariant = []
         cls.expected_sel = [46, 92]
         cls.expected_sel_mix = sum(cls.expected_sel)
diff --git a/source/tests/universal/dpmodel/fitting/test_fitting.py b/source/tests/universal/dpmodel/fitting/test_fitting.py
index db199c02a3..2fe0060003 100644
--- a/source/tests/universal/dpmodel/fitting/test_fitting.py
+++ b/source/tests/universal/dpmodel/fitting/test_fitting.py
@@ -208,6 +208,8 @@ def FittingParamProperty(
         "dim_descrpt": dim_descrpt,
         "mixed_types": mixed_types,
         "type_map": type_map,
+        "task_dim": 3,
+        "property_name": "band_prop",
         "exclude_types": exclude_types,
         "seed": GLOBAL_SEED,
         "precision": precision,
diff --git a/source/tests/universal/dpmodel/loss/test_loss.py b/source/tests/universal/dpmodel/loss/test_loss.py
index 6473c159da..79c67cdba4 100644
--- a/source/tests/universal/dpmodel/loss/test_loss.py
+++ b/source/tests/universal/dpmodel/loss/test_loss.py
@@ -189,11 +189,14 @@ def LossParamTensor(
 
 def LossParamProperty():
     key_to_pref_map = {
-        "property": 1.0,
+        "foo": 1.0,
     }
     input_dict = {
         "key_to_pref_map": key_to_pref_map,
-        "task_dim": 2,
+        "var_name": "foo",
+        "out_bias": [0.1, 0.5, 1.2, -0.1, -10],
+        "out_std": [8, 10, 0.001, -0.2, -10],
+        "task_dim": 5,
     }
     return input_dict