diff --git a/src/force/dftd3.cu b/src/force/dftd3.cu
index b9fe6e027..a25033a7c 100644
--- a/src/force/dftd3.cu
+++ b/src/force/dftd3.cu
@@ -42,13 +42,12 @@ namespace
 {
 const int MN = 10000; // maximum number of neighbors for one atom
 const std::string ELEMENTS[NUM_ELEMENTS] = {
-  "H",  "He", "Li", "Be", "B",  "C",  "N",  "O",  "F",  "Ne", "Na", "Mg", "Al", "Si", "P",
-  "S",  "Cl", "Ar", "K",  "Ca", "Sc", "Ti", "V",  "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn",
-  "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",  "Zr", "Nb", "Mo", "Tc", "Ru", "Rh",
-  "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I",  "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd",
-  "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W",  "Re",
-  "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th",
-  "Pa", "U",  "Np", "Pu"};
+  "H",  "He", "Li", "Be", "B",  "C",  "N",  "O",  "F",  "Ne", "Na", "Mg", "Al", "Si", "P",  "S",
+  "Cl", "Ar", "K",  "Ca", "Sc", "Ti", "V",  "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge",
+  "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",  "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd",
+  "In", "Sn", "Sb", "Te", "I",  "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd",
+  "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W",  "Re", "Os", "Ir", "Pt", "Au", "Hg",
+  "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", "Pa", "U",  "Np", "Pu"};
 
 void __global__ find_dftd3_coordination_number_small_box(
   DFTD3::DFTD3_Para dftd3_para,
diff --git a/src/force/nep3.cu b/src/force/nep3.cu
index a4b340257..347fd5c2c 100644
--- a/src/force/nep3.cu
+++ b/src/force/nep3.cu
@@ -32,13 +32,12 @@ heat transport, Phys. Rev. B. 104, 104309 (2021).
 #include <vector>
 
 const std::string ELEMENTS[NUM_ELEMENTS] = {
-  "H",  "He", "Li", "Be", "B",  "C",  "N",  "O",  "F",  "Ne", "Na", "Mg", "Al", "Si", "P",
-  "S",  "Cl", "Ar", "K",  "Ca", "Sc", "Ti", "V",  "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn",
-  "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",  "Zr", "Nb", "Mo", "Tc", "Ru", "Rh",
-  "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I",  "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd",
-  "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W",  "Re",
-  "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th",
-  "Pa", "U",  "Np", "Pu"};
+  "H",  "He", "Li", "Be", "B",  "C",  "N",  "O",  "F",  "Ne", "Na", "Mg", "Al", "Si", "P",  "S",
+  "Cl", "Ar", "K",  "Ca", "Sc", "Ti", "V",  "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge",
+  "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",  "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd",
+  "In", "Sn", "Sb", "Te", "I",  "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd",
+  "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W",  "Re", "Os", "Ir", "Pt", "Au", "Hg",
+  "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", "Pa", "U",  "Np", "Pu"};
 
 void NEP3::initialize_dftd3()
 {
@@ -123,7 +122,9 @@ NEP3::NEP3(const char* file_potential, const int num_atoms)
     paramb.version = 4;
     paramb.model_type = 2;
   } else {
-    std::cout << tokens[0] << " is an unsupported NEP model. We only support NEP3 and NEP4 models now." << std::endl;
+    std::cout << tokens[0]
+              << " is an unsupported NEP model. We only support NEP3 and NEP4 models now."
+              << std::endl;
     exit(1);
   }
   paramb.num_types = get_int_from_token(tokens[1], __FILE__, __LINE__);
@@ -175,7 +176,8 @@ NEP3::NEP3(const char* file_potential, const int num_atoms)
   // cutoff 4.2 3.7 80 47 1
   tokens = get_tokens(input);
   if (tokens.size() != 5 && tokens.size() != 8) {
-    std::cout << "This line should be cutoff rc_radial rc_angular MN_radial MN_angular [radial_factor] [angular_factor] [zbl_factor].\n";
+    std::cout << "This line should be cutoff rc_radial rc_angular MN_radial MN_angular "
+                 "[radial_factor] [angular_factor] [zbl_factor].\n";
     exit(1);
   }
   paramb.rc_radial = get_float_from_token(tokens[1], __FILE__, __LINE__);
@@ -224,7 +226,7 @@ NEP3::NEP3(const char* file_potential, const int num_atoms)
   tokens = get_tokens(input);
   if (tokens.size() != 3) {
     std::cout << "This line should be basis_size basis_size_radial basis_size_angular."
-      << std::endl;
+              << std::endl;
     exit(1);
   }
   paramb.basis_size_radial = get_int_from_token(tokens[1], __FILE__, __LINE__);
@@ -518,8 +520,12 @@ static __global__ void find_neighbor_list_large_box(
           if (paramb.use_typewise_cutoff) {
             int z1 = paramb.atomic_numbers[t1];
             int z2 = paramb.atomic_numbers[t2];
-            rc_radial = min((COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_radial_factor, rc_radial);
-            rc_angular = min((COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_angular_factor, rc_angular);
+            rc_radial = min(
+              (COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_radial_factor,
+              rc_radial);
+            rc_angular = min(
+              (COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_angular_factor,
+              rc_angular);
           }
 
           if (d12_square >= rc_radial * rc_radial) {
@@ -601,13 +607,16 @@ static __global__ void find_descriptor(
       int t2 = g_type[n2];
       float rc = paramb.rc_radial;
       if (paramb.use_typewise_cutoff) {
-        rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-        + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc);
+        rc = min(
+          (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+           COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+            paramb.typewise_cutoff_radial_factor,
+          rc);
       }
       float rcinv = 1.0f / rc;
       find_fc(rc, rcinv, d12, fc12);
       float fn12[MAX_NUM_N];
-      
+
       find_fn(paramb.basis_size_radial, rcinv, d12, fc12, fn12);
       for (int n = 0; n <= paramb.n_max_radial; ++n) {
         float gn12 = 0.0f;
@@ -649,8 +658,11 @@ static __global__ void find_descriptor(
         int t2 = g_type[n2];
         float rc = paramb.rc_angular;
         if (paramb.use_typewise_cutoff) {
-          rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-          + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc);
+          rc = min(
+            (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+             COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+              paramb.typewise_cutoff_angular_factor,
+            rc);
         }
         float rcinv = 1.0f / rc;
         find_fc(rc, rcinv, d12, fc12);
@@ -803,8 +815,11 @@ static __global__ void find_force_radial(
       float fc12, fcp12;
       float rc = paramb.rc_radial;
       if (paramb.use_typewise_cutoff) {
-        rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-        + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc);
+        rc = min(
+          (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+           COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+            paramb.typewise_cutoff_radial_factor,
+          rc);
       }
       float rcinv = 1.0f / rc;
       find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12);
@@ -946,16 +961,18 @@ static __global__ void find_partial_force_angular(
       int t2 = g_type[n2];
       float rc = paramb.rc_angular;
       if (paramb.use_typewise_cutoff) {
-        rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-        + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc);
+        rc = min(
+          (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+           COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+            paramb.typewise_cutoff_angular_factor,
+          rc);
       }
       float rcinv = 1.0f / rc;
       find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12);
 
       float fn12[MAX_NUM_N];
       float fnp12[MAX_NUM_N];
-      find_fn_and_fnp(
-        paramb.basis_size_angular, rcinv, d12, fc12, fcp12, fn12, fnp12);
+      find_fn_and_fnp(paramb.basis_size_angular, rcinv, d12, fc12, fcp12, fn12, fnp12);
       for (int n = 0; n <= paramb.n_max_angular; ++n) {
         float gn12 = 0.0f;
         float gnp12 = 0.0f;
@@ -1057,7 +1074,9 @@ static __global__ void find_force_ZBL(
         float rc_outer = zbl.rc_outer;
         if (paramb.use_typewise_cutoff_zbl) {
           // zi and zj start from 1, so need to minus 1 here
-          rc_outer = min((COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor, rc_outer);
+          rc_outer = min(
+            (COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor,
+            rc_outer);
           rc_inner = rc_outer * 0.5f;
         }
         find_f_and_fp_zbl(zizj, a_inv, rc_inner, rc_outer, d12, d12inv, f, fp);
@@ -1583,8 +1602,11 @@ static __global__ void find_descriptor(
       int t2 = g_type[n2];
       float rc = paramb.rc_radial;
       if (paramb.use_typewise_cutoff) {
-        rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-        + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc);
+        rc = min(
+          (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+           COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+            paramb.typewise_cutoff_radial_factor,
+          rc);
       }
       float rcinv = 1.0f / rc;
       find_fc(rc, rcinv, d12, fc12);
@@ -1630,8 +1652,11 @@ static __global__ void find_descriptor(
         int t2 = g_type[n2];
         float rc = paramb.rc_angular;
         if (paramb.use_typewise_cutoff) {
-          rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-          + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc);
+          rc = min(
+            (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+             COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+              paramb.typewise_cutoff_angular_factor,
+            rc);
         }
         float rcinv = 1.0f / rc;
         find_fc(rc, rcinv, d12, fc12);
diff --git a/src/force/nep3.cuh b/src/force/nep3.cuh
index 602b28f9a..43c85b6ca 100644
--- a/src/force/nep3.cuh
+++ b/src/force/nep3.cuh
@@ -76,13 +76,13 @@ public:
   };
 
   struct ANN {
-    int dim = 0;                 // dimension of the descriptor
-    int num_neurons1 = 0;        // number of neurons in the 1st hidden layer
-    int num_para = 0;            // number of parameters
+    int dim = 0;                   // dimension of the descriptor
+    int num_neurons1 = 0;          // number of neurons in the 1st hidden layer
+    int num_para = 0;              // number of parameters
     const float* w0[NUM_ELEMENTS]; // weight from the input layer to the hidden layer
     const float* b0[NUM_ELEMENTS]; // bias for the hidden layer
     const float* w1[NUM_ELEMENTS]; // weight from the hidden layer to the output layer
-    const float* b1;             // bias for the output layer
+    const float* b1;               // bias for the output layer
     const float* c;
     // for the scalar part of polarizability
     const float* w0_pol[10];
diff --git a/src/force/nep3_multigpu.cu b/src/force/nep3_multigpu.cu
index d70eb867f..efcd95036 100644
--- a/src/force/nep3_multigpu.cu
+++ b/src/force/nep3_multigpu.cu
@@ -34,13 +34,12 @@ when there is NVlink, but is also not very bad when there is only PCI-E.
 #include <vector>
 
 const std::string ELEMENTS[NUM_ELEMENTS] = {
-  "H",  "He", "Li", "Be", "B",  "C",  "N",  "O",  "F",  "Ne", "Na", "Mg", "Al", "Si", "P",
-  "S",  "Cl", "Ar", "K",  "Ca", "Sc", "Ti", "V",  "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn",
-  "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",  "Zr", "Nb", "Mo", "Tc", "Ru", "Rh",
-  "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I",  "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd",
-  "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W",  "Re",
-  "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th",
-  "Pa", "U",  "Np", "Pu"};
+  "H",  "He", "Li", "Be", "B",  "C",  "N",  "O",  "F",  "Ne", "Na", "Mg", "Al", "Si", "P",  "S",
+  "Cl", "Ar", "K",  "Ca", "Sc", "Ti", "V",  "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge",
+  "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",  "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd",
+  "In", "Sn", "Sb", "Te", "I",  "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd",
+  "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W",  "Re", "Os", "Ir", "Pt", "Au", "Hg",
+  "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", "Pa", "U",  "Np", "Pu"};
 
 void NEP3_MULTIGPU::initialize_dftd3()
 {
@@ -125,7 +124,9 @@ NEP3_MULTIGPU::NEP3_MULTIGPU(
     paramb.version = 4;
     paramb.model_type = 2;
   } else {
-    std::cout << tokens[0] << " is an unsupported NEP model. We only support NEP3 and NEP4 models now." << std::endl;
+    std::cout << tokens[0]
+              << " is an unsupported NEP model. We only support NEP3 and NEP4 models now."
+              << std::endl;
     exit(1);
   }
   paramb.num_types = get_int_from_token(tokens[1], __FILE__, __LINE__);
@@ -177,7 +178,8 @@ NEP3_MULTIGPU::NEP3_MULTIGPU(
   // cutoff 4.2 3.7 80 47 1
   tokens = get_tokens(input);
   if (tokens.size() != 5 && tokens.size() != 8) {
-    std::cout << "This line should be cutoff rc_radial rc_angular MN_radial MN_angular [radial_factor] [angular_factor] [zbl_factor].\n";
+    std::cout << "This line should be cutoff rc_radial rc_angular MN_radial MN_angular "
+                 "[radial_factor] [angular_factor] [zbl_factor].\n";
     exit(1);
   }
   paramb.rc_radial = get_float_from_token(tokens[1], __FILE__, __LINE__);
@@ -792,8 +794,12 @@ static __global__ void find_neighbor_list_large_box(
           if (paramb.use_typewise_cutoff) {
             int z1 = paramb.atomic_numbers[t1];
             int z2 = paramb.atomic_numbers[t2];
-            rc_radial = min((COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_radial_factor, rc_radial);
-            rc_angular = min((COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_angular_factor, rc_angular);
+            rc_radial = min(
+              (COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_radial_factor,
+              rc_radial);
+            rc_angular = min(
+              (COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_angular_factor,
+              rc_angular);
           }
 
           if (d12_square >= rc_radial * rc_radial) {
@@ -874,8 +880,11 @@ static __global__ void find_descriptor(
       int t2 = g_type[n2];
       float rc = paramb.rc_radial;
       if (paramb.use_typewise_cutoff) {
-        rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-        + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc);
+        rc = min(
+          (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+           COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+            paramb.typewise_cutoff_radial_factor,
+          rc);
       }
       float rcinv = 1.0f / rc;
       find_fc(rc, rcinv, d12, fc12);
@@ -919,10 +928,13 @@ static __global__ void find_descriptor(
 #else
         float fc12;
         int t2 = g_type[n2];
-         float rc = paramb.rc_angular;
+        float rc = paramb.rc_angular;
         if (paramb.use_typewise_cutoff) {
-          rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-          + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc);
+          rc = min(
+            (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+             COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+              paramb.typewise_cutoff_angular_factor,
+            rc);
         }
         float rcinv = 1.0f / rc;
         find_fc(rc, rcinv, d12, fc12);
@@ -1071,8 +1083,11 @@ static __global__ void find_force_radial(
       float fc12, fcp12;
       float rc = paramb.rc_radial;
       if (paramb.use_typewise_cutoff) {
-        rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-        + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc);
+        rc = min(
+          (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+           COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+            paramb.typewise_cutoff_radial_factor,
+          rc);
       }
       float rcinv = 1.0f / rc;
       find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12);
@@ -1214,8 +1229,11 @@ static __global__ void find_partial_force_angular(
       int t2 = g_type[n2];
       float rc = paramb.rc_angular;
       if (paramb.use_typewise_cutoff) {
-        rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-        + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc);
+        rc = min(
+          (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+           COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+            paramb.typewise_cutoff_angular_factor,
+          rc);
       }
       float rcinv = 1.0f / rc;
       find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12);
@@ -1324,7 +1342,9 @@ static __global__ void find_force_ZBL(
         float rc_outer = zbl.rc_outer;
         if (paramb.use_typewise_cutoff_zbl) {
           // zi and zj start from 1, so need to minus 1 here
-          rc_outer = min((COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor, rc_outer);
+          rc_outer = min(
+            (COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor,
+            rc_outer);
           rc_inner = rc_outer * 0.5f;
         }
         find_f_and_fp_zbl(zizj, a_inv, rc_inner, rc_outer, d12, d12inv, f, fp);
@@ -1984,8 +2004,11 @@ static __global__ void find_descriptor(
       int t2 = g_type[n2];
       float rc = paramb.rc_radial;
       if (paramb.use_typewise_cutoff) {
-        rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-        + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc);
+        rc = min(
+          (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+           COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+            paramb.typewise_cutoff_radial_factor,
+          rc);
       }
       float rcinv = 1.0f / rc;
       find_fc(rc, rcinv, d12, fc12);
@@ -2031,8 +2054,11 @@ static __global__ void find_descriptor(
         int t2 = g_type[n2];
         float rc = paramb.rc_angular;
         if (paramb.use_typewise_cutoff) {
-          rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-          + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc);
+          rc = min(
+            (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+             COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+              paramb.typewise_cutoff_angular_factor,
+            rc);
         }
         float rcinv = 1.0f / rc;
         find_fc(rc, rcinv, d12, fc12);
diff --git a/src/force/nep3_multigpu.cuh b/src/force/nep3_multigpu.cuh
index cd34d0196..c6ae4bbf3 100644
--- a/src/force/nep3_multigpu.cuh
+++ b/src/force/nep3_multigpu.cuh
@@ -107,13 +107,13 @@ public:
   };
 
   struct ANN {
-    int dim = 0;          // dimension of the descriptor
-    int num_neurons1 = 0; // number of neurons in the 1st hidden layer
-    int num_para = 0;     // number of parameters
+    int dim = 0;                   // dimension of the descriptor
+    int num_neurons1 = 0;          // number of neurons in the 1st hidden layer
+    int num_para = 0;              // number of parameters
     const float* w0[NUM_ELEMENTS]; // weight from the input layer to the hidden layer
     const float* b0[NUM_ELEMENTS]; // bias for the hidden layer
     const float* w1[NUM_ELEMENTS]; // weight from the hidden layer to the output layer
-    const float* b1;      // bias for the output layer
+    const float* b1;               // bias for the output layer
     const float* c;
     // for the scalar part of polarizability
     const float* w0_pol[10];
diff --git a/src/force/nep3_small_box.cuh b/src/force/nep3_small_box.cuh
index 6680ffac7..9aac87735 100644
--- a/src/force/nep3_small_box.cuh
+++ b/src/force/nep3_small_box.cuh
@@ -131,8 +131,12 @@ static __global__ void find_neighbor_list_small_box(
             if (paramb.use_typewise_cutoff) {
               int z1 = paramb.atomic_numbers[t1];
               int z2 = paramb.atomic_numbers[t2];
-              rc_radial = min((COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_radial_factor, rc_radial);
-              rc_angular = min((COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_angular_factor, rc_angular);
+              rc_radial = min(
+                (COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_radial_factor,
+                rc_radial);
+              rc_angular = min(
+                (COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_angular_factor,
+                rc_angular);
             }
 
             if (distance_square < rc_radial * rc_radial) {
@@ -214,8 +218,11 @@ static __global__ void find_descriptor_small_box(
       int t2 = g_type[n2];
       float rc = paramb.rc_radial;
       if (paramb.use_typewise_cutoff) {
-        rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-        + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc);
+        rc = min(
+          (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+           COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+            paramb.typewise_cutoff_radial_factor,
+          rc);
       }
       float rcinv = 1.0f / rc;
       find_fc(rc, rcinv, d12, fc12);
@@ -258,8 +265,11 @@ static __global__ void find_descriptor_small_box(
         int t2 = g_type[n2];
         float rc = paramb.rc_angular;
         if (paramb.use_typewise_cutoff) {
-          rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-          + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc);
+          rc = min(
+            (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+             COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+              paramb.typewise_cutoff_angular_factor,
+            rc);
         }
         float rcinv = 1.0f / rc;
         find_fc(rc, rcinv, d12, fc12);
@@ -382,8 +392,11 @@ static __global__ void find_descriptor_small_box(
       int t2 = g_type[n2];
       float rc = paramb.rc_radial;
       if (paramb.use_typewise_cutoff) {
-        rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-        + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc);
+        rc = min(
+          (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+           COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+            paramb.typewise_cutoff_radial_factor,
+          rc);
       }
       float rcinv = 1.0f / rc;
       find_fc(rc, rcinv, d12, fc12);
@@ -426,8 +439,11 @@ static __global__ void find_descriptor_small_box(
         int t2 = g_type[n2];
         float rc = paramb.rc_angular;
         if (paramb.use_typewise_cutoff) {
-          rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-          + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc);
+          rc = min(
+            (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+             COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+              paramb.typewise_cutoff_angular_factor,
+            rc);
         }
         float rcinv = 1.0f / rc;
         find_fc(rc, rcinv, d12, fc12);
@@ -528,15 +544,17 @@ static __global__ void find_force_radial_small_box(
       float fc12, fcp12;
       float rc = paramb.rc_radial;
       if (paramb.use_typewise_cutoff) {
-        rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-        + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc);
+        rc = min(
+          (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+           COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+            paramb.typewise_cutoff_radial_factor,
+          rc);
       }
       float rcinv = 1.0f / rc;
       find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12);
       float fn12[MAX_NUM_N];
       float fnp12[MAX_NUM_N];
-      find_fn_and_fnp(
-        paramb.basis_size_radial, rcinv, d12, fc12, fcp12, fn12, fnp12);
+      find_fn_and_fnp(paramb.basis_size_radial, rcinv, d12, fc12, fcp12, fn12, fnp12);
       for (int n = 0; n <= paramb.n_max_radial; ++n) {
         float gnp12 = 0.0f;
         for (int k = 0; k <= paramb.basis_size_radial; ++k) {
@@ -674,15 +692,17 @@ static __global__ void find_force_angular_small_box(
       int t2 = g_type[n2];
       float rc = paramb.rc_angular;
       if (paramb.use_typewise_cutoff) {
-        rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-        + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc);
+        rc = min(
+          (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+           COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+            paramb.typewise_cutoff_angular_factor,
+          rc);
       }
       float rcinv = 1.0f / rc;
       find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12);
       float fn12[MAX_NUM_N];
       float fnp12[MAX_NUM_N];
-      find_fn_and_fnp(
-        paramb.basis_size_angular, rcinv, d12, fc12, fcp12, fn12, fnp12);
+      find_fn_and_fnp(paramb.basis_size_angular, rcinv, d12, fc12, fcp12, fn12, fnp12);
       for (int n = 0; n <= paramb.n_max_angular; ++n) {
         float gn12 = 0.0f;
         float gnp12 = 0.0f;
@@ -807,7 +827,9 @@ static __global__ void find_force_ZBL_small_box(
         float rc_outer = zbl.rc_outer;
         if (paramb.use_typewise_cutoff_zbl) {
           // zi and zj start from 1, so need to minus 1 here
-          rc_outer = min((COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor, rc_outer);
+          rc_outer = min(
+            (COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor,
+            rc_outer);
           rc_inner = rc_outer * 0.5f;
         }
         find_f_and_fp_zbl(zizj, a_inv, rc_inner, rc_outer, d12, d12inv, f, fp);
diff --git a/src/integrate/integrate.cuh b/src/integrate/integrate.cuh
index 7b7fa66a7..b488cfdfd 100644
--- a/src/integrate/integrate.cuh
+++ b/src/integrate/integrate.cuh
@@ -34,7 +34,7 @@ public:
     Box& box,
     std::vector<Group>& group,
     GPU_Vector<double>& thermo,
-    int &total_steps);
+    int& total_steps);
 
   void finalize();
 
diff --git a/src/main_gpumd/add_efield.cu b/src/main_gpumd/add_efield.cu
index 6eb3a9f44..01bb595f5 100644
--- a/src/main_gpumd/add_efield.cu
+++ b/src/main_gpumd/add_efield.cu
@@ -24,12 +24,11 @@ Add electric field to a group of atoms.
 #include <iostream>
 #include <vector>
 
-static void __global__
-add_efield(
+static void __global__ add_efield(
   const int group_size,
   const int group_size_sum,
   const int* g_group_contents,
-  const double Ex, 
+  const double Ex,
   const double Ey,
   const double Ez,
   const double* g_charge,
@@ -67,8 +66,7 @@ void Add_Efield::compute(const int step, const std::vector<Group>& groups, Atom&
       atom.charge.data(),
       atom.force_per_atom.data(),
       atom.force_per_atom.data() + num_atoms_total,
-      atom.force_per_atom.data() + num_atoms_total * 2
-    );
+      atom.force_per_atom.data() + num_atoms_total * 2);
     CUDA_CHECK_KERNEL
   }
 }
@@ -105,10 +103,9 @@ void Add_Efield::parse(const char** param, int num_param, const std::vector<Grou
   }
 
   printf(
-    "    for atoms in group %d of grouping method %d.\n", 
-    group_id_[num_calls_], 
-    grouping_method_[num_calls_]
-  );
+    "    for atoms in group %d of grouping method %d.\n",
+    group_id_[num_calls_],
+    grouping_method_[num_calls_]);
 
   if (num_param == 6) {
     table_length_[num_calls_] = 1;
@@ -150,7 +147,8 @@ void Add_Efield::parse(const char** param, int num_param, const std::vector<Grou
         PRINT_INPUT_ERROR("Number of electric field components at each step should be 3.");
       }
       for (int t = 0; t < 3; ++t) {
-        efield_table_[num_calls_][t * table_length_[num_calls_] + n] = get_double_from_token(tokens[t], __FILE__, __LINE__);
+        efield_table_[num_calls_][t * table_length_[num_calls_] + n] =
+          get_double_from_token(tokens[t], __FILE__, __LINE__);
       }
     }
   }
@@ -162,7 +160,4 @@ void Add_Efield::parse(const char** param, int num_param, const std::vector<Grou
   }
 }
 
-void Add_Efield::finalize() 
-{ 
-  num_calls_ = 0;
-}
+void Add_Efield::finalize() { num_calls_ = 0; }
diff --git a/src/main_gpumd/add_efield.cuh b/src/main_gpumd/add_efield.cuh
index 89824e924..f62677540 100644
--- a/src/main_gpumd/add_efield.cuh
+++ b/src/main_gpumd/add_efield.cuh
@@ -23,13 +23,11 @@ class Group;
 class Add_Efield
 {
 public:
-
   void parse(const char** param, int num_param, const std::vector<Group>& group);
   void compute(const int step, const std::vector<Group>& groups, Atom& atom);
   void finalize();
 
 private:
-
   int num_calls_ = 0;
   int table_length_[10];
   std::vector<double> efield_table_[10];
diff --git a/src/main_gpumd/add_force.cu b/src/main_gpumd/add_force.cu
index a56c6b2cd..92bb952da 100644
--- a/src/main_gpumd/add_force.cu
+++ b/src/main_gpumd/add_force.cu
@@ -24,12 +24,11 @@ Add force to a group of atoms.
 #include <iostream>
 #include <vector>
 
-static void __global__
-add_force(
+static void __global__ add_force(
   const int group_size,
   const int group_size_sum,
   const int* g_group_contents,
-  const double added_fx, 
+  const double added_fx,
   const double added_fy,
   const double added_fz,
   double* g_fx,
@@ -64,8 +63,7 @@ void Add_Force::compute(const int step, const std::vector<Group>& groups, Atom&
       added_fz,
       atom.force_per_atom.data(),
       atom.force_per_atom.data() + num_atoms_total,
-      atom.force_per_atom.data() + num_atoms_total * 2
-    );
+      atom.force_per_atom.data() + num_atoms_total * 2);
     CUDA_CHECK_KERNEL
   }
 }
@@ -102,10 +100,9 @@ void Add_Force::parse(const char** param, int num_param, const std::vector<Group
   }
 
   printf(
-    "    for atoms in group %d of grouping method %d.\n", 
-    group_id_[num_calls_], 
-    grouping_method_[num_calls_]
-  );
+    "    for atoms in group %d of grouping method %d.\n",
+    group_id_[num_calls_],
+    grouping_method_[num_calls_]);
 
   if (num_param == 6) {
     table_length_[num_calls_] = 1;
@@ -147,7 +144,8 @@ void Add_Force::parse(const char** param, int num_param, const std::vector<Group
         PRINT_INPUT_ERROR("Number of force components at each step should be 3.");
       }
       for (int t = 0; t < 3; ++t) {
-        force_table_[num_calls_][t * table_length_[num_calls_] + n] = get_double_from_token(tokens[t], __FILE__, __LINE__);
+        force_table_[num_calls_][t * table_length_[num_calls_] + n] =
+          get_double_from_token(tokens[t], __FILE__, __LINE__);
       }
     }
   }
@@ -159,7 +157,4 @@ void Add_Force::parse(const char** param, int num_param, const std::vector<Group
   }
 }
 
-void Add_Force::finalize() 
-{ 
-  num_calls_ = 0;
-}
+void Add_Force::finalize() { num_calls_ = 0; }
diff --git a/src/main_gpumd/add_force.cuh b/src/main_gpumd/add_force.cuh
index 76081ae40..8b92187d8 100644
--- a/src/main_gpumd/add_force.cuh
+++ b/src/main_gpumd/add_force.cuh
@@ -23,13 +23,11 @@ class Group;
 class Add_Force
 {
 public:
-
   void parse(const char** param, int num_param, const std::vector<Group>& group);
   void compute(const int step, const std::vector<Group>& groups, Atom& atom);
   void finalize();
 
 private:
-
   int num_calls_ = 0;
   int table_length_[10];
   std::vector<double> force_table_[10];
diff --git a/src/main_gpumd/add_random_force.cu b/src/main_gpumd/add_random_force.cu
index 42b0562b8..662d1d242 100644
--- a/src/main_gpumd/add_random_force.cu
+++ b/src/main_gpumd/add_random_force.cu
@@ -128,23 +128,22 @@ void Add_Random_Force::compute(const int step, Atom& atom)
       curand_states_.data(),
       atom.force_per_atom.data(),
       atom.force_per_atom.data() + atom.number_of_atoms,
-      atom.force_per_atom.data() + atom.number_of_atoms * 2
-    );
+      atom.force_per_atom.data() + atom.number_of_atoms * 2);
     CUDA_CHECK_KERNEL
 
     gpu_sum_force<<<3, 1024>>>(
-        atom.number_of_atoms,
-        atom.force_per_atom.data(),
-        atom.force_per_atom.data() + atom.number_of_atoms,
-        atom.force_per_atom.data() + 2 * atom.number_of_atoms);
+      atom.number_of_atoms,
+      atom.force_per_atom.data(),
+      atom.force_per_atom.data() + atom.number_of_atoms,
+      atom.force_per_atom.data() + 2 * atom.number_of_atoms);
     CUDA_CHECK_KERNEL
 
     gpu_correct_force<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>(
-        atom.number_of_atoms,
-        1.0 / atom.number_of_atoms,
-        atom.force_per_atom.data(),
-        atom.force_per_atom.data() + atom.number_of_atoms,
-        atom.force_per_atom.data() + 2 * atom.number_of_atoms);
+      atom.number_of_atoms,
+      1.0 / atom.number_of_atoms,
+      atom.force_per_atom.data(),
+      atom.force_per_atom.data() + atom.number_of_atoms,
+      atom.force_per_atom.data() + 2 * atom.number_of_atoms);
     CUDA_CHECK_KERNEL
   }
 }
@@ -178,7 +177,4 @@ void Add_Random_Force::parse(const char** param, int num_param, int number_of_at
   CUDA_CHECK_KERNEL
 }
 
-void Add_Random_Force::finalize() 
-{ 
-  num_calls_ = 0;
-}
+void Add_Random_Force::finalize() { num_calls_ = 0; }
diff --git a/src/main_gpumd/add_random_force.cuh b/src/main_gpumd/add_random_force.cuh
index 71300667c..30fc6aaef 100644
--- a/src/main_gpumd/add_random_force.cuh
+++ b/src/main_gpumd/add_random_force.cuh
@@ -22,13 +22,11 @@ class Atom;
 class Add_Random_Force
 {
 public:
-
   void parse(const char** param, int num_param, int number_of_atoms);
   void compute(const int step, Atom& atom);
   void finalize();
 
 private:
-
   GPU_Vector<curandState> curand_states_;
   int num_calls_ = 0;
   double force_variance_ = 0.0;
diff --git a/src/main_gpumd/electron_stop.cu b/src/main_gpumd/electron_stop.cu
index 5734790f4..487f0d6a4 100644
--- a/src/main_gpumd/electron_stop.cu
+++ b/src/main_gpumd/electron_stop.cu
@@ -69,7 +69,7 @@ static void __global__ find_stopping_force(
     g_force[1 * num_atoms + i] = vy * factor;
     g_force[2 * num_atoms + i] = vz * factor;
 
-    g_power_loss[i] = stopping_power * sqrt(v2) * time_step; 
+    g_power_loss[i] = stopping_power * sqrt(v2) * time_step;
   }
 }
 
@@ -136,26 +136,25 @@ static __global__ void find_power_loss(int num_atoms, double* g_power_loss)
   double f = 0.0;
 
   for (int batch = 0; batch < number_of_batches; ++batch) {
-      int idx = tid + batch * block_size;
-      if (idx < num_atoms) {
-          f += g_power_loss[idx];
-      }
+    int idx = tid + batch * block_size;
+    if (idx < num_atoms) {
+      f += g_power_loss[idx];
+    }
   }
 
   s_f[tid] = f;
   __syncthreads();
 
   for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) {
-      if (tid < offset) {
-          s_f[tid] += s_f[tid + offset];
-      }
-      __syncthreads(); 
+    if (tid < offset) {
+      s_f[tid] += s_f[tid + offset];
+    }
+    __syncthreads();
   }
 
   if (tid == 0) {
-      device_power_loss = s_f[0];
+    device_power_loss = s_f[0];
   }
-
 }
 
 void Electron_Stop::compute(double time_step, Atom& atom)
@@ -190,8 +189,9 @@ void Electron_Stop::compute(double time_step, Atom& atom)
   find_power_loss<<<1, 1024>>>(atom.number_of_atoms, stopping_loss.data());
   CUDA_CHECK_KERNEL
 
-  double power_loss_host;  
-  CHECK(cudaMemcpyFromSymbol(&power_loss_host, device_power_loss, sizeof(double), 0, cudaMemcpyDeviceToHost));
+  double power_loss_host;
+  CHECK(cudaMemcpyFromSymbol(
+    &power_loss_host, device_power_loss, sizeof(double), 0, cudaMemcpyDeviceToHost));
   stopping_power_loss += power_loss_host;
 }
 
@@ -256,11 +256,11 @@ void Electron_Stop::parse(
   do_electron_stop = true;
 }
 
-void Electron_Stop::finalize() 
-{ 
-  if (do_electron_stop) { 
+void Electron_Stop::finalize()
+{
+  if (do_electron_stop) {
     printf("Total electron stopping power loss = %g eV.\n", stopping_power_loss);
   }
-  do_electron_stop = false; 
+  do_electron_stop = false;
   stopping_power_loss = 0.0;
 }
diff --git a/src/main_gpumd/run.cu b/src/main_gpumd/run.cu
index 21a8f4274..17db0bcbb 100644
--- a/src/main_gpumd/run.cu
+++ b/src/main_gpumd/run.cu
@@ -18,8 +18,8 @@ Run simulation according to the inputs in the run.in file.
 ------------------------------------------------------------------------------*/
 
 #include "add_efield.cuh"
-#include "add_random_force.cuh"
 #include "add_force.cuh"
+#include "add_random_force.cuh"
 #include "cohesive.cuh"
 #include "electron_stop.cuh"
 #include "force/force.cuh"
@@ -554,9 +554,10 @@ void Run::parse_correct_velocity(const char** param, int num_param, const std::v
   if (velocity.velocity_correction_group_method < 0) {
     printf("    for the whole system.\n");
   } else {
-    printf("    for individual groups in group method %d.\n", velocity.velocity_correction_group_method);
+    printf(
+      "    for individual groups in group method %d.\n", velocity.velocity_correction_group_method);
   }
-  
+
   velocity.do_velocity_correction = true;
 }
 
diff --git a/src/main_gpumd/velocity.cu b/src/main_gpumd/velocity.cu
index 542142741..058ca38d2 100644
--- a/src/main_gpumd/velocity.cu
+++ b/src/main_gpumd/velocity.cu
@@ -353,6 +353,6 @@ void Velocity::initialize(
 
 void Velocity::finalize()
 {
-  do_velocity_correction = false; 
+  do_velocity_correction = false;
   velocity_correction_group_method = -1;
 }
diff --git a/src/main_nep/fitness.cu b/src/main_nep/fitness.cu
index dfa96d03a..c6cb8257b 100644
--- a/src/main_nep/fitness.cu
+++ b/src/main_nep/fitness.cu
@@ -54,7 +54,8 @@ Fitness::Fitness(Parameters& para)
   int count = 0;
   for (int batch_id = 0; batch_id < num_batches; ++batch_id) {
     const int batch_size_minimal = structures_train.size() / num_batches;
-    const bool is_larger_batch = batch_id + batch_size_minimal * num_batches < structures_train.size();
+    const bool is_larger_batch =
+      batch_id + batch_size_minimal * num_batches < structures_train.size();
     const int batch_size = is_larger_batch ? batch_size_minimal + 1 : batch_size_minimal;
     count += batch_size;
     printf("\nBatch %d:\n", batch_id);
@@ -63,7 +64,8 @@ Fitness::Fitness(Parameters& para)
       print_line_1();
       printf("Constructing train_set in device  %d.\n", device_id);
       CHECK(cudaSetDevice(device_id));
-      train_set[batch_id][device_id].construct(para, structures_train, count - batch_size, count, device_id);
+      train_set[batch_id][device_id].construct(
+        para, structures_train, count - batch_size, count, device_id);
       print_line_2();
     }
   }
@@ -138,15 +140,15 @@ void Fitness::compute(
     std::vector<float> dummy_solution(para.number_of_variables * deviceCount, para.initial_para);
     for (int n = 0; n < num_batches; ++n) {
       potential->find_force(
-        para, 
-        dummy_solution.data(), 
-        train_set[n], 
+        para,
+        dummy_solution.data(),
+        train_set[n],
 #ifdef USE_FIXED_SCALER
-        false, 
+        false,
 #else
         true,
 #endif
-        true, 
+        true,
         deviceCount);
     }
   } else {
@@ -215,16 +217,15 @@ void Fitness::compute(
         }
       }
     }
-
   }
 }
 
 void Fitness::output(
   bool is_stress,
-  int num_components, 
-  FILE* fid, 
-  float* prediction, 
-  float* reference, 
+  int num_components,
+  FILE* fid,
+  float* prediction,
+  float* reference,
   Dataset& dataset)
 {
   for (int nc = 0; nc < dataset.Nc; ++nc) {
@@ -322,19 +323,20 @@ void Fitness::write_nep_txt(FILE* fid_nep, Parameters& para, float* elite)
       para.typewise_cutoff_zbl_factor);
   } else {
     fprintf(
-    fid_nep,
-    "cutoff %g %g %d %d\n",
-    para.rc_radial,
-    para.rc_angular,
-    max_NN_radial,
-    max_NN_angular);
+      fid_nep,
+      "cutoff %g %g %d %d\n",
+      para.rc_radial,
+      para.rc_angular,
+      max_NN_radial,
+      max_NN_angular);
   }
   fprintf(fid_nep, "n_max %d %d\n", para.n_max_radial, para.n_max_angular);
   fprintf(fid_nep, "basis_size %d %d\n", para.basis_size_radial, para.basis_size_angular);
   fprintf(fid_nep, "l_max %d %d %d\n", para.L_max, para.L_max_4body, para.L_max_5body);
 
   if (para.num_hidden_layers == 3) {
-    fprintf(fid_nep, "ANN %d %d %d\n", para.num_neurons[0], para.num_neurons[1], para.num_neurons[2]);
+    fprintf(
+      fid_nep, "ANN %d %d %d\n", para.num_neurons[0], para.num_neurons[1], para.num_neurons[2]);
   } else if (para.num_hidden_layers == 2) {
     fprintf(fid_nep, "ANN %d %d %d\n", para.num_neurons[0], para.num_neurons[1], 0);
   } else if (para.num_hidden_layers == 1) {
@@ -528,7 +530,13 @@ void Fitness::update_dipole(FILE* fid_dipole, Dataset& dataset)
 void Fitness::update_polarizability(FILE* fid_polarizability, Dataset& dataset)
 {
   dataset.virial.copy_to_host(dataset.virial_cpu.data());
-  output(false, 6, fid_polarizability, dataset.virial_cpu.data(), dataset.virial_ref_cpu.data(), dataset);
+  output(
+    false,
+    6,
+    fid_polarizability,
+    dataset.virial_cpu.data(),
+    dataset.virial_ref_cpu.data(),
+    dataset);
 }
 
 void Fitness::predict(Parameters& para, float* elite)
@@ -540,7 +548,8 @@ void Fitness::predict(Parameters& para, float* elite)
     FILE* fid_stress = my_fopen("stress_train.out", "w");
     for (int batch_id = 0; batch_id < num_batches; ++batch_id) {
       potential->find_force(para, elite, train_set[batch_id], false, true, 1);
-      update_energy_force_virial(fid_energy, fid_force, fid_virial, fid_stress, train_set[batch_id][0]);
+      update_energy_force_virial(
+        fid_energy, fid_force, fid_virial, fid_stress, train_set[batch_id][0]);
     }
     fclose(fid_energy);
     fclose(fid_force);
diff --git a/src/main_nep/fitness.cuh b/src/main_nep/fitness.cuh
index 578071bcb..a2623480f 100644
--- a/src/main_nep/fitness.cuh
+++ b/src/main_nep/fitness.cuh
@@ -48,14 +48,13 @@ protected:
   std::vector<std::vector<Dataset>> train_set;
   std::vector<Dataset> test_set;
   void output(
-    bool is_stress, 
-    int num_components, 
-    FILE* fid, 
-    float* prediction, 
-    float* reference, 
+    bool is_stress,
+    int num_components,
+    FILE* fid,
+    float* prediction,
+    float* reference,
     Dataset& dataset);
-  void
-  update_energy_force_virial(
+  void update_energy_force_virial(
     FILE* fid_energy, FILE* fid_force, FILE* fid_virial, FILE* fid_stress, Dataset& dataset);
   void update_dipole(FILE* fid_dipole, Dataset& dataset);
   void update_polarizability(FILE* fid_polarizability, Dataset& dataset);
diff --git a/src/main_nep/nep3.cu b/src/main_nep/nep3.cu
index c62c29f29..13414355e 100644
--- a/src/main_nep/nep3.cu
+++ b/src/main_nep/nep3.cu
@@ -88,8 +88,12 @@ static __global__ void gpu_find_neighbor_list(
             if (use_typewise_cutoff) {
               int z1 = paramb.atomic_numbers[t1];
               int z2 = paramb.atomic_numbers[t2];
-              rc_radial = min((COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_radial_factor, rc_radial);
-              rc_angular = min((COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_angular_factor, rc_angular);
+              rc_radial = min(
+                (COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_radial_factor,
+                rc_radial);
+              rc_angular = min(
+                (COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_angular_factor,
+                rc_angular);
             }
             if (distance_square < rc_radial * rc_radial) {
               NL_radial[count_radial * N + n1] = n2;
@@ -142,8 +146,11 @@ static __global__ void find_descriptors_radial(
       int t2 = g_type[n2];
       float rc = paramb.rc_radial;
       if (paramb.use_typewise_cutoff) {
-        rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-        + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc);
+        rc = min(
+          (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+           COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+            paramb.typewise_cutoff_radial_factor,
+          rc);
       }
       float rcinv = 1.0f / rc;
       find_fc(rc, rcinv, d12, fc12);
@@ -198,8 +205,11 @@ static __global__ void find_descriptors_angular(
         int t2 = g_type[n2];
         float rc = paramb.rc_angular;
         if (paramb.use_typewise_cutoff) {
-          rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-          + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc);
+          rc = min(
+            (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+             COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+              paramb.typewise_cutoff_angular_factor,
+            rc);
         }
         float rcinv = 1.0f / rc;
         find_fc(rc, rcinv, d12, fc12);
@@ -276,7 +286,7 @@ NEP3::NEP3(
   zbl.rc_inner = para.zbl_rc_inner;
   zbl.rc_outer = para.zbl_rc_outer;
   for (int n = 0; n < para.atomic_numbers.size(); ++n) {
-    zbl.atomic_numbers[n] = para.atomic_numbers[n]; // starting from 1
+    zbl.atomic_numbers[n] = para.atomic_numbers[n];        // starting from 1
     paramb.atomic_numbers[n] = para.atomic_numbers[n] - 1; // starting from 0
   }
   if (zbl.flexibled) {
@@ -296,24 +306,26 @@ NEP3::NEP3(
     annmb[device_id].num_para = para.number_of_variables;
     annmb[device_id].num_hidden_layers = para.num_hidden_layers;
     if (annmb[device_id].num_hidden_layers == 1) {
-        annmb[device_id].num_para_one_ann_without_bias 
-          = (annmb[device_id].dim + 2) * annmb[device_id].num_neurons[0];
+      annmb[device_id].num_para_one_ann_without_bias =
+        (annmb[device_id].dim + 2) * annmb[device_id].num_neurons[0];
     } else if (annmb[device_id].num_hidden_layers == 2) {
-        annmb[device_id].num_para_one_ann_without_bias 
-          = (annmb[device_id].dim + 1) * annmb[device_id].num_neurons[0] 
-          + (annmb[device_id].num_neurons[0] + 2) * annmb[device_id].num_neurons[1];
+      annmb[device_id].num_para_one_ann_without_bias =
+        (annmb[device_id].dim + 1) * annmb[device_id].num_neurons[0] +
+        (annmb[device_id].num_neurons[0] + 2) * annmb[device_id].num_neurons[1];
     } else {
-        annmb[device_id].num_para_one_ann_without_bias 
-          = (annmb[device_id].dim + 1) * annmb[device_id].num_neurons[0] 
-          + (annmb[device_id].num_neurons[0] + 1) * annmb[device_id].num_neurons[1] 
-          + (annmb[device_id].num_neurons[1] + 2) * annmb[device_id].num_neurons[2];
+      annmb[device_id].num_para_one_ann_without_bias =
+        (annmb[device_id].dim + 1) * annmb[device_id].num_neurons[0] +
+        (annmb[device_id].num_neurons[0] + 1) * annmb[device_id].num_neurons[1] +
+        (annmb[device_id].num_neurons[1] + 2) * annmb[device_id].num_neurons[2];
     }
     annmb[device_id].offset_w[0] = 0;
     annmb[device_id].offset_b[0] = annmb[device_id].dim * annmb[device_id].num_neurons[0];
     annmb[device_id].offset_w[1] = annmb[device_id].offset_b[0] + annmb[device_id].num_neurons[0];
-    annmb[device_id].offset_b[1] = annmb[device_id].offset_w[1] + annmb[device_id].num_neurons[0] * annmb[device_id].num_neurons[1];
+    annmb[device_id].offset_b[1] = annmb[device_id].offset_w[1] + annmb[device_id].num_neurons[0] *
+                                                                    annmb[device_id].num_neurons[1];
     annmb[device_id].offset_w[2] = annmb[device_id].offset_b[1] + annmb[device_id].num_neurons[1];
-    annmb[device_id].offset_b[2] = annmb[device_id].offset_w[2] + annmb[device_id].num_neurons[1] * annmb[device_id].num_neurons[2];
+    annmb[device_id].offset_b[2] = annmb[device_id].offset_w[2] + annmb[device_id].num_neurons[1] *
+                                                                    annmb[device_id].num_neurons[2];
     annmb[device_id].offset_w[3] = annmb[device_id].offset_b[2] + annmb[device_id].num_neurons[2];
 
     nep_data[device_id].NN_radial.resize(N);
@@ -609,8 +621,11 @@ static __global__ void find_force_radial(
       float fc12, fcp12;
       float rc = paramb.rc_radial;
       if (paramb.use_typewise_cutoff) {
-        rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-        + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc);
+        rc = min(
+          (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+           COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+            paramb.typewise_cutoff_radial_factor,
+          rc);
       }
       float rcinv = 1.0f / rc;
       find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12);
@@ -709,8 +724,11 @@ static __global__ void find_force_angular(
       int t2 = g_type[n2];
       float rc = paramb.rc_angular;
       if (paramb.use_typewise_cutoff) {
-        rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] 
-        + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc);
+        rc = min(
+          (COVALENT_RADIUS[paramb.atomic_numbers[t1]] +
+           COVALENT_RADIUS[paramb.atomic_numbers[t2]]) *
+            paramb.typewise_cutoff_angular_factor,
+          rc);
       }
       float rcinv = 1.0f / rc;
       find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12);
@@ -829,8 +847,9 @@ static __global__ void find_force_ZBL(
         float rc_outer = zbl.rc_outer;
         if (paramb.use_typewise_cutoff_zbl) {
           // zi and zj start from 1, so need to minus 1 here
-          rc_outer = min((COVALENT_RADIUS[zi - 1] 
-          + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor, rc_outer);
+          rc_outer = min(
+            (COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor,
+            rc_outer);
           rc_inner = rc_outer * 0.5f;
         }
         find_f_and_fp_zbl(zizj, a_inv, rc_inner, rc_outer, d12, d12inv, f, fp);
diff --git a/src/main_nep/nep3.cuh b/src/main_nep/nep3.cuh
index e7df8b3cd..4f1639859 100644
--- a/src/main_nep/nep3.cuh
+++ b/src/main_nep/nep3.cuh
@@ -52,31 +52,32 @@ public:
     float rcinv_angular = 0.0f; // inverse of the angular cutoff
     int basis_size_radial = 0;
     int basis_size_angular = 0;
-    int n_max_radial = 0;       // n_radial = 0, 1, 2, ..., n_max_radial
-    int n_max_angular = 0;      // n_angular = 0, 1, 2, ..., n_max_angular
-    int L_max = 0;              // l = 1, 2, ..., L_max
+    int n_max_radial = 0;  // n_radial = 0, 1, 2, ..., n_max_radial
+    int n_max_angular = 0; // n_angular = 0, 1, 2, ..., n_max_angular
+    int L_max = 0;         // l = 1, 2, ..., L_max
     int dim_angular;
     int num_L;
     int num_types = 0;
     int num_types_sq = 0;
     int num_c_radial = 0;
-    int version = 4;      // 3 for NEP3 and 4 for NEP4
+    int version = 4; // 3 for NEP3 and 4 for NEP4
     int atomic_numbers[NUM_ELEMENTS];
   };
 
   struct ANN {
-    int dim = 0;          // dimension of the descriptor
-    int num_hidden_layers = 1; // number of hidden layers (1 to 3)
+    int dim = 0;                    // dimension of the descriptor
+    int num_hidden_layers = 1;      // number of hidden layers (1 to 3)
     int num_neurons[3] = {0, 0, 0}; // number of neurons in the hidden layer
-    int num_para = 0;     // number of parameters
-    int num_para_one_ann_without_bias = 0; // number of parameters for one ann without the output bias
+    int num_para = 0;               // number of parameters
+    int num_para_one_ann_without_bias =
+      0; // number of parameters for one ann without the output bias
     int offset_w[4];
     int offset_b[3];
     const float* para[NUM_ELEMENTS]; // weight and bias parameters for the hidden layers
-    const float* b_out;      // bias for the output layer
+    const float* b_out;              // bias for the output layer
     // for the scalar part of polarizability
     const float* para_pol[NUM_ELEMENTS]; // weight and bias parameters for the hidden layers
-    const float* b_out_pol;     // bias for the output layer
+    const float* b_out_pol;              // bias for the output layer
     // for elements in descriptor
     const float* c;
   };
diff --git a/src/main_nep/parameters.cu b/src/main_nep/parameters.cu
index 9c5a1e7b3..da906ef15 100644
--- a/src/main_nep/parameters.cu
+++ b/src/main_nep/parameters.cu
@@ -22,13 +22,12 @@
 #include <iostream>
 
 const std::string ELEMENTS[NUM_ELEMENTS] = {
-  "H",  "He", "Li", "Be", "B",  "C",  "N",  "O",  "F",  "Ne", "Na", "Mg", "Al", "Si", "P",
-  "S",  "Cl", "Ar", "K",  "Ca", "Sc", "Ti", "V",  "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn",
-  "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",  "Zr", "Nb", "Mo", "Tc", "Ru", "Rh",
-  "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I",  "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd",
-  "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W",  "Re",
-  "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th",
-  "Pa", "U",  "Np", "Pu"};
+  "H",  "He", "Li", "Be", "B",  "C",  "N",  "O",  "F",  "Ne", "Na", "Mg", "Al", "Si", "P",  "S",
+  "Cl", "Ar", "K",  "Ca", "Sc", "Ti", "V",  "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge",
+  "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",  "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd",
+  "In", "Sn", "Sb", "Te", "I",  "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd",
+  "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W",  "Re", "Os", "Ir", "Pt", "Au", "Hg",
+  "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", "Pa", "U",  "Np", "Pu"};
 
 Parameters::Parameters()
 {
@@ -171,7 +170,7 @@ void Parameters::calculate_parameters()
   }
   dim_radial = n_max_radial + 1;             // 2-body descriptors q^i_n
   dim_angular = (n_max_angular + 1) * L_max; // 3-body descriptors q^i_nl
-  if (L_max_4body == 2) {    // 4-body descriptors q^i_n222
+  if (L_max_4body == 2) {                    // 4-body descriptors q^i_n222
     dim_angular += n_max_angular + 1;
   }
   if (L_max_5body == 1) { // 5-body descriptors q^i_n1111
@@ -191,9 +190,14 @@ void Parameters::calculate_parameters()
   if (num_hidden_layers == 1) {
     number_of_variables_ann = (dim + 2) * num_neurons[0] * (version == 4 ? num_types : 1) + 1;
   } else if (num_hidden_layers == 2) {
-    number_of_variables_ann = ((dim + 1) * num_neurons[0] + (num_neurons[0] + 2) * num_neurons[1]) * (version == 4 ? num_types : 1) + 1;
+    number_of_variables_ann = ((dim + 1) * num_neurons[0] + (num_neurons[0] + 2) * num_neurons[1]) *
+                                (version == 4 ? num_types : 1) +
+                              1;
   } else {
-    number_of_variables_ann = ((dim + 1) * num_neurons[0] + (num_neurons[0] + 1) * num_neurons[1] + (num_neurons[1] + 2) * num_neurons[2]) * (version == 4 ? num_types : 1) + 1;
+    number_of_variables_ann = ((dim + 1) * num_neurons[0] + (num_neurons[0] + 1) * num_neurons[1] +
+                               (num_neurons[1] + 2) * num_neurons[2]) *
+                                (version == 4 ? num_types : 1) +
+                              1;
   }
 
   number_of_variables_descriptor =
@@ -319,10 +323,12 @@ void Parameters::report_inputs()
   }
 
   if (is_use_typewise_cutoff_zbl_set) {
-    printf("    (input)   use %s cutoff for ZBL.\n", use_typewise_cutoff_zbl ? "typewise" : "global");
+    printf(
+      "    (input)   use %s cutoff for ZBL.\n", use_typewise_cutoff_zbl ? "typewise" : "global");
     printf("              factor = %g.\n", typewise_cutoff_zbl_factor);
   } else {
-    printf("    (default) use %s cutoff for ZBL.\n", use_typewise_cutoff_zbl ? "typewise" : "global");
+    printf(
+      "    (default) use %s cutoff for ZBL.\n", use_typewise_cutoff_zbl ? "typewise" : "global");
   }
 
   if (is_n_max_set) {
@@ -352,9 +358,17 @@ void Parameters::report_inputs()
   }
 
   if (is_neuron_set) {
-    printf("    (input)   number of neurons = (%d, %d, %d).\n", num_neurons[0], num_neurons[1], num_neurons[2]);
+    printf(
+      "    (input)   number of neurons = (%d, %d, %d).\n",
+      num_neurons[0],
+      num_neurons[1],
+      num_neurons[2]);
   } else {
-    printf("    (default) number of neurons = (%d, %d, %d).\n", num_neurons[0], num_neurons[1], num_neurons[2]);
+    printf(
+      "    (default) number of neurons = (%d, %d, %d).\n",
+      num_neurons[0],
+      num_neurons[1],
+      num_neurons[2]);
   }
 
   if (is_lambda_1_set) {
@@ -426,7 +440,12 @@ void Parameters::report_inputs()
   printf("    number of angular descriptor components = %d.\n", dim_angular);
   printf("    total number of descriptor components = %d.\n", dim);
   if (num_hidden_layers == 3) {
-    printf("    NN architecture = %d-%d-%d-%d-1.\n", dim, num_neurons[0], num_neurons[1], num_neurons[2]);
+    printf(
+      "    NN architecture = %d-%d-%d-%d-1.\n",
+      dim,
+      num_neurons[0],
+      num_neurons[1],
+      num_neurons[2]);
   } else if (num_hidden_layers == 2) {
     printf("    NN architecture = %d-%d-%d-1.\n", dim, num_neurons[0], num_neurons[1]);
   } else {
@@ -762,7 +781,7 @@ void Parameters::parse_neuron(const char** param, int num_param)
 {
   is_neuron_set = true;
 
-  if (num_param < 2 || num_param > 4) { 
+  if (num_param < 2 || num_param > 4) {
     PRINT_INPUT_ERROR("neuron should have 1 to 3 parameters.\n");
   }
 
@@ -1039,13 +1058,13 @@ void Parameters::parse_use_typewise_cutoff(const char** param, int num_param)
   if (num_param == 3) {
     double typewise_cutoff_radial_factor_temp = 0.0;
     if (!is_valid_real(param[1], &typewise_cutoff_radial_factor_temp)) {
-       PRINT_INPUT_ERROR("typewise_cutoff_radial_factor should be a number.\n");
+      PRINT_INPUT_ERROR("typewise_cutoff_radial_factor should be a number.\n");
     }
     typewise_cutoff_radial_factor = typewise_cutoff_radial_factor_temp;
 
     double typewise_cutoff_angular_factor_temp = 0.0;
     if (!is_valid_real(param[2], &typewise_cutoff_angular_factor_temp)) {
-       PRINT_INPUT_ERROR("typewise_cutoff_angular_factor should be a number.\n");
+      PRINT_INPUT_ERROR("typewise_cutoff_angular_factor should be a number.\n");
     }
     typewise_cutoff_angular_factor = typewise_cutoff_angular_factor_temp;
   }
@@ -1071,7 +1090,7 @@ void Parameters::parse_use_typewise_cutoff_zbl(const char** param, int num_param
   if (num_param == 2) {
     double typewise_cutoff_zbl_factor_temp = 0.0;
     if (!is_valid_real(param[1], &typewise_cutoff_zbl_factor_temp)) {
-       PRINT_INPUT_ERROR("typewise_cutoff_zbl_factor should be a number.\n");
+      PRINT_INPUT_ERROR("typewise_cutoff_zbl_factor should be a number.\n");
     }
     typewise_cutoff_zbl_factor = typewise_cutoff_zbl_factor_temp;
   }
diff --git a/src/main_nep/snes.cu b/src/main_nep/snes.cu
index 5c70fd4e4..9127b5125 100644
--- a/src/main_nep/snes.cu
+++ b/src/main_nep/snes.cu
@@ -44,7 +44,7 @@ SNES::SNES(Parameters& para, Fitness* fitness_function)
   maximum_generation = para.maximum_generation;
   number_of_variables = para.number_of_variables;
   population_size = para.population_size;
-  const int N =  population_size * number_of_variables;
+  const int N = population_size * number_of_variables;
   int num = number_of_variables;
   if (para.version == 4) {
     num /= para.num_types;
@@ -107,8 +107,8 @@ void SNES::initialize_mu_and_sigma(Parameters& para)
     fclose(fid_restart);
   }
 #ifdef USE_FIXED_SCALER
-    mu[para.number_of_variables_ann - 1] = 0.0f;
-    sigma[para.number_of_variables_ann - 1] = 0.0f;
+  mu[para.number_of_variables_ann - 1] = 0.0f;
+  sigma[para.number_of_variables_ann - 1] = 0.0f;
 #endif
   cudaSetDevice(0); // normally use GPU-0
   gpu_mu.copy_from_host(mu.data());
@@ -142,15 +142,23 @@ void SNES::find_type_of_variable(Parameters& para)
           }
           offset += (para.dim + 2) * para.num_neurons[0];
         } else if (para.num_hidden_layers == 2) {
-          for (int n = 0; n < (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 2) * para.num_neurons[1]; ++n) {
+          for (int n = 0; n < (para.dim + 1) * para.num_neurons[0] +
+                                (para.num_neurons[0] + 2) * para.num_neurons[1];
+               ++n) {
             type_of_variable[n + offset] = t;
           }
-          offset += (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 2) * para.num_neurons[1];
+          offset +=
+            (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 2) * para.num_neurons[1];
         } else {
-          for (int n = 0; n < (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 1) * para.num_neurons[1] + (para.num_neurons[1] + 2) * para.num_neurons[2]; ++n) {
+          for (int n = 0; n < (para.dim + 1) * para.num_neurons[0] +
+                                (para.num_neurons[0] + 1) * para.num_neurons[1] +
+                                (para.num_neurons[1] + 2) * para.num_neurons[2];
+               ++n) {
             type_of_variable[n + offset] = t;
           }
-          offset += (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 1) * para.num_neurons[1] + (para.num_neurons[1] + 2) * para.num_neurons[2];
+          offset += (para.dim + 1) * para.num_neurons[0] +
+                    (para.num_neurons[0] + 1) * para.num_neurons[1] +
+                    (para.num_neurons[1] + 2) * para.num_neurons[2];
         }
       }
       ++offset; // the bias
@@ -159,10 +167,13 @@ void SNES::find_type_of_variable(Parameters& para)
     if (para.num_hidden_layers == 1) {
       offset += (para.dim + 2) * para.num_neurons[0] + 1;
     } else if (para.num_hidden_layers == 2) {
-      offset += (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 2) * para.num_neurons[1] + 1;
+      offset +=
+        (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 2) * para.num_neurons[1] + 1;
     } else {
-      offset += (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 1) * para.num_neurons[1] + (para.num_neurons[1] + 2) * para.num_neurons[2] + 1;
-    }    
+      offset += (para.dim + 1) * para.num_neurons[0] +
+                (para.num_neurons[0] + 1) * para.num_neurons[1] +
+                (para.num_neurons[1] + 2) * para.num_neurons[2] + 1;
+    }
   }
 
   // descriptor part
@@ -269,8 +280,9 @@ void SNES::compute(Parameters& para, Fitness* fitness_function)
     std::vector<std::string> tokens;
     tokens = get_tokens(input);
     int num_lines_to_be_skipped = 5;
-    if (tokens[0] == "nep3_zbl" || tokens[0] == "nep4_zbl" 
-      || tokens[0] == "nep3_zbl_temperature" || tokens[0] == "nep4_zbl_temperature") {
+    if (
+      tokens[0] == "nep3_zbl" || tokens[0] == "nep4_zbl" || tokens[0] == "nep3_zbl_temperature" ||
+      tokens[0] == "nep4_zbl_temperature") {
       num_lines_to_be_skipped = 6;
     }
 
@@ -315,12 +327,12 @@ void SNES::create_population(Parameters& para)
   cudaSetDevice(0); // normally use GPU-0
   const int N = population_size * number_of_variables;
   gpu_create_population<<<(N - 1) / 128 + 1, 128>>>(
-    N, 
-    number_of_variables, 
-    gpu_mu.data(), 
-    gpu_sigma.data(), 
-    curand_states.data(), 
-    gpu_s.data(), 
+    N,
+    number_of_variables,
+    gpu_mu.data(),
+    gpu_sigma.data(),
+    curand_states.data(),
+    gpu_s.data(),
     gpu_population.data());
   CUDA_CHECK_KERNEL
   gpu_population.copy_to_host(population.data());
@@ -381,14 +393,14 @@ void SNES::regularize_NEP4(Parameters& para)
     if (t == para.num_types) {
       num_variables = para.number_of_variables;
     }
-    
+
     gpu_find_L1_L2_NEP4<<<population_size, 1024>>>(
-      number_of_variables, 
+      number_of_variables,
       para.num_types,
-      t, 
-      gpu_type_of_variable.data(), 
-      gpu_population.data(), 
-      gpu_cost_L1reg.data(), 
+      t,
+      gpu_type_of_variable.data(),
+      gpu_population.data(),
+      gpu_cost_L1reg.data(),
       gpu_cost_L2reg.data());
     CUDA_CHECK_KERNEL
 
diff --git a/src/main_nep/snes.cuh b/src/main_nep/snes.cuh
index d98c46fde..4f9352a98 100644
--- a/src/main_nep/snes.cuh
+++ b/src/main_nep/snes.cuh
@@ -32,7 +32,7 @@ protected:
   int number_of_variables = 10;
   int population_size = 20;
   float eta_sigma = 0.1f;
-  
+
   std::vector<int> index;
   std::vector<float> fitness;
   std::vector<float> population;
diff --git a/src/main_nep/structure.cu b/src/main_nep/structure.cu
index c53298849..47379cad6 100644
--- a/src/main_nep/structure.cu
+++ b/src/main_nep/structure.cu
@@ -22,8 +22,8 @@
 #include <fstream>
 #include <iostream>
 #include <iterator>
-#include <random>
 #include <numeric>
+#include <random>
 #include <sstream>
 #include <string>
 #include <vector>
@@ -107,13 +107,19 @@ static void read_force(
       PRINT_INPUT_ERROR("Number of items for an atom line mismatches properties.");
     }
     std::string atom_symbol(tokens[0 + species_offset]);
-    structure.x[na] = get_float_from_token(tokens[0 + pos_offset], xyz_filename.c_str(), line_number);
-    structure.y[na] = get_float_from_token(tokens[1 + pos_offset], xyz_filename.c_str(), line_number);
-    structure.z[na] = get_float_from_token(tokens[2 + pos_offset], xyz_filename.c_str(), line_number);
+    structure.x[na] =
+      get_float_from_token(tokens[0 + pos_offset], xyz_filename.c_str(), line_number);
+    structure.y[na] =
+      get_float_from_token(tokens[1 + pos_offset], xyz_filename.c_str(), line_number);
+    structure.z[na] =
+      get_float_from_token(tokens[2 + pos_offset], xyz_filename.c_str(), line_number);
     if (num_columns > 4) {
-      structure.fx[na] = get_float_from_token(tokens[0 + force_offset], xyz_filename.c_str(), line_number);
-      structure.fy[na] = get_float_from_token(tokens[1 + force_offset], xyz_filename.c_str(), line_number);
-      structure.fz[na] = get_float_from_token(tokens[2 + force_offset], xyz_filename.c_str(), line_number);
+      structure.fx[na] =
+        get_float_from_token(tokens[0 + force_offset], xyz_filename.c_str(), line_number);
+      structure.fy[na] =
+        get_float_from_token(tokens[1 + force_offset], xyz_filename.c_str(), line_number);
+      structure.fz[na] =
+        get_float_from_token(tokens[2 + force_offset], xyz_filename.c_str(), line_number);
     }
 
     bool is_allowed_element = false;
@@ -130,8 +136,8 @@ static void read_force(
 }
 
 static void read_one_structure(
-  const Parameters& para, 
-  std::ifstream& input, 
+  const Parameters& para,
+  std::ifstream& input,
   Structure& structure,
   std::string& xyz_filename,
   int& line_number)
@@ -168,7 +174,9 @@ static void read_one_structure(
     if (token.substr(0, temperature_string.length()) == temperature_string) {
       structure.has_temperature = true;
       structure.temperature = get_float_from_token(
-        token.substr(temperature_string.length(), token.length()), xyz_filename.c_str(), line_number);
+        token.substr(temperature_string.length(), token.length()),
+        xyz_filename.c_str(),
+        line_number);
     }
   }
   if (para.train_mode == 3 && !structure.has_temperature) {
@@ -201,7 +209,8 @@ static void read_one_structure(
           tokens[n + m].substr(
             (m == 0) ? (lattice_string.length() + 1) : 0,
             (m == 8) ? (tokens[n + m].length() - 1) : tokens[n + m].length()),
-          xyz_filename.c_str(), line_number);
+          xyz_filename.c_str(),
+          line_number);
       }
       change_box(para, structure);
     }
@@ -221,7 +230,8 @@ static void read_one_structure(
           tokens[n + m].substr(
             (m == 0) ? (virial_string.length() + 1) : 0,
             (m == 8) ? (tokens[n + m].length() - 1) : tokens[n + m].length()),
-          xyz_filename.c_str(), line_number);
+          xyz_filename.c_str(),
+          line_number);
         structure.virial[reduced_index[m]] /= structure.num_atom;
       }
     }
@@ -240,7 +250,8 @@ static void read_one_structure(
           tokens[n + m].substr(
             (m == 0) ? (stress_string.length() + 1) : 0,
             (m == 8) ? (tokens[n + m].length() - 1) : tokens[n + m].length()),
-          xyz_filename.c_str(), line_number);
+          xyz_filename.c_str(),
+          line_number);
         virials_from_stress[reduced_index[m]] *= -volume / structure.num_atom;
       }
     }
@@ -287,7 +298,8 @@ static void read_one_structure(
             tokens[n + m].substr(
               (m == 0) ? (dipole_string.length() + 1) : 0,
               (m == 2) ? (tokens[n + m].length() - 1) : tokens[n + m].length()),
-            xyz_filename.c_str(), line_number);
+            xyz_filename.c_str(),
+            line_number);
           structure.virial[m] /= structure.num_atom;
         }
       }
@@ -316,7 +328,8 @@ static void read_one_structure(
             tokens[n + m].substr(
               (m == 0) ? (pol_string.length() + 1) : 0,
               (m == 8) ? (tokens[n + m].length() - 1) : tokens[n + m].length()),
-            xyz_filename.c_str(), line_number);
+            xyz_filename.c_str(),
+            line_number);
           structure.virial[reduced_index[m]] /= structure.num_atom;
         }
       }
@@ -371,27 +384,38 @@ static void read_one_structure(
       }
       for (int k = 0; k < sub_tokens.size() / 3; ++k) {
         if (k < species_position) {
-          species_offset += get_int_from_token(sub_tokens[k * 3 + 2], xyz_filename.c_str(), line_number);
+          species_offset +=
+            get_int_from_token(sub_tokens[k * 3 + 2], xyz_filename.c_str(), line_number);
         }
         if (k < pos_position) {
-          pos_offset += get_int_from_token(sub_tokens[k * 3 + 2], xyz_filename.c_str(), line_number);
+          pos_offset +=
+            get_int_from_token(sub_tokens[k * 3 + 2], xyz_filename.c_str(), line_number);
         }
         if (k < force_position) {
-          force_offset += get_int_from_token(sub_tokens[k * 3 + 2], xyz_filename.c_str(), line_number);
+          force_offset +=
+            get_int_from_token(sub_tokens[k * 3 + 2], xyz_filename.c_str(), line_number);
         }
         num_columns += get_int_from_token(sub_tokens[k * 3 + 2], xyz_filename.c_str(), line_number);
       }
     }
   }
 
-  read_force(num_columns, species_offset, pos_offset, force_offset, input, para, structure, xyz_filename, line_number);
+  read_force(
+    num_columns,
+    species_offset,
+    pos_offset,
+    force_offset,
+    input,
+    para,
+    structure,
+    xyz_filename,
+    line_number);
 }
 
-static void
-read_exyz(
-  const Parameters& para, 
-  std::ifstream& input, 
-  std::vector<Structure>& structures, 
+static void read_exyz(
+  const Parameters& para,
+  std::ifstream& input,
+  std::vector<Structure>& structures,
   std::string& xyz_filename)
 {
   int line_number = 0;
@@ -433,7 +457,7 @@ read_exyz(
 
 static void find_permuted_indices(
   const int num_batches,
-  const std::vector<Structure>& structures, 
+  const std::vector<Structure>& structures,
   std::vector<int>& permuted_indices)
 {
   std::vector<float> energy(structures.size());
@@ -442,12 +466,10 @@ static void find_permuted_indices(
   }
 
   std::vector<int> energy_index(structures.size());
-  std::iota(energy_index.begin(), energy_index.end(), 0); 
-  std::stable_sort(
-    energy_index.begin(), 
-    energy_index.end(),
-    [&energy](size_t i1, size_t i2) {return energy[i1] < energy[i2];}
-  );
+  std::iota(energy_index.begin(), energy_index.end(), 0);
+  std::stable_sort(energy_index.begin(), energy_index.end(), [&energy](size_t i1, size_t i2) {
+    return energy[i1] < energy[i2];
+  });
 
   int count = 0;
   for (int b = 0; b < num_batches; ++b) {
@@ -459,7 +481,6 @@ static void find_permuted_indices(
     }
     count += batch_size;
   }
-  
 }
 
 static void reorder(const int num_batches, std::vector<Structure>& structures)
diff --git a/src/mc/nep_energy.cu b/src/mc/nep_energy.cu
index 900977dd8..73a8efe9c 100644
--- a/src/mc/nep_energy.cu
+++ b/src/mc/nep_energy.cu
@@ -30,13 +30,12 @@ heat transport, Phys. Rev. B. 104, 104309 (2021).
 #include <vector>
 
 const std::string ELEMENTS[NUM_ELEMENTS] = {
-  "H",  "He", "Li", "Be", "B",  "C",  "N",  "O",  "F",  "Ne", "Na", "Mg", "Al", "Si", "P",
-  "S",  "Cl", "Ar", "K",  "Ca", "Sc", "Ti", "V",  "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn",
-  "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",  "Zr", "Nb", "Mo", "Tc", "Ru", "Rh",
-  "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I",  "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd",
-  "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W",  "Re",
-  "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th",
-  "Pa", "U",  "Np", "Pu"};
+  "H",  "He", "Li", "Be", "B",  "C",  "N",  "O",  "F",  "Ne", "Na", "Mg", "Al", "Si", "P",  "S",
+  "Cl", "Ar", "K",  "Ca", "Sc", "Ti", "V",  "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge",
+  "As", "Se", "Br", "Kr", "Rb", "Sr", "Y",  "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd",
+  "In", "Sn", "Sb", "Te", "I",  "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd",
+  "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W",  "Re", "Os", "Ir", "Pt", "Au", "Hg",
+  "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", "Pa", "U",  "Np", "Pu"};
 
 void NEP_Energy::initialize(const char* file_potential)
 {
@@ -66,7 +65,9 @@ void NEP_Energy::initialize(const char* file_potential)
     paramb.version = 4;
     zbl.enabled = true;
   } else {
-    std::cout << tokens[0] << " is an unsupported NEP model. We only support NEP3 and NEP4 models now." << std::endl;
+    std::cout << tokens[0]
+              << " is an unsupported NEP model. We only support NEP3 and NEP4 models now."
+              << std::endl;
     exit(1);
   }
   paramb.num_types = get_int_from_token(tokens[1], __FILE__, __LINE__);
@@ -118,7 +119,8 @@ void NEP_Energy::initialize(const char* file_potential)
   // cutoff 4.2 3.7 80 47
   tokens = get_tokens(input);
   if (tokens.size() != 5 && tokens.size() != 8) {
-    std::cout << "This line should be cutoff rc_radial rc_angular MN_radial MN_angular [radial_factor] [angular_factor] [zbl_factor].\n";
+    std::cout << "This line should be cutoff rc_radial rc_angular MN_radial MN_angular "
+                 "[radial_factor] [angular_factor] [zbl_factor].\n";
     exit(1);
   }
   paramb.rc_radial = get_float_from_token(tokens[1], __FILE__, __LINE__);
@@ -430,7 +432,9 @@ static __global__ void find_energy_zbl(
         float rc_outer = zbl.rc_outer;
         if (paramb.use_typewise_cutoff_zbl) {
           // zi and zj start from 1, so need to minus 1 here
-          rc_outer = min((COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor, rc_outer);
+          rc_outer = min(
+            (COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor,
+            rc_outer);
           rc_inner = rc_outer * 0.5f;
         }
         find_f_and_fp_zbl(zizj, a_inv, rc_inner, rc_outer, d12, d12inv, f, fp);
diff --git a/src/mc/nep_energy.cuh b/src/mc/nep_energy.cuh
index 1b3dcba2b..dee90b09d 100644
--- a/src/mc/nep_energy.cuh
+++ b/src/mc/nep_energy.cuh
@@ -49,13 +49,13 @@ public:
   };
 
   struct ANN {
-    int dim = 0;          // dimension of the descriptor
-    int num_neurons1 = 0; // number of neurons in the 1st hidden layer
-    int num_para = 0;     // number of parameters
+    int dim = 0;                   // dimension of the descriptor
+    int num_neurons1 = 0;          // number of neurons in the 1st hidden layer
+    int num_para = 0;              // number of parameters
     const float* w0[NUM_ELEMENTS]; // weight from the input layer to the hidden layer
     const float* b0[NUM_ELEMENTS]; // bias for the hidden layer
     const float* w1[NUM_ELEMENTS]; // weight from the hidden layer to the output layer
-    const float* b1;      // bias for the output layer
+    const float* b1;               // bias for the output layer
     const float* c;
   };
 
diff --git a/src/measure/dump_exyz.cu b/src/measure/dump_exyz.cu
index e13ed0e58..1620866de 100644
--- a/src/measure/dump_exyz.cu
+++ b/src/measure/dump_exyz.cu
@@ -105,14 +105,14 @@ void Dump_EXYZ::parse(const char** param, int num_param)
   }
 
   if (num_param >= 6) {
-     if (!is_valid_int(param[5], &separated_)) {
+    if (!is_valid_int(param[5], &separated_)) {
       PRINT_INPUT_ERROR("separated should be an integer.");
-     }
-     if (separated_ == 0) {
+    }
+    if (separated_ == 0) {
       printf("    dump_exyz into dump.xyz.\n");
-     } else {
+    } else {
       printf("    dump_exyz into separated dump.*.xyz.\n");
-     }
+    }
   }
 }
 
@@ -122,7 +122,7 @@ void Dump_EXYZ::preprocess(const int number_of_atoms)
     if (separated_ == 0) {
       fid_ = my_fopen("dump.xyz", "a");
     }
-    
+
     gpu_total_virial_.resize(6);
     cpu_total_virial_.resize(6);
     if (has_force_) {
@@ -250,9 +250,9 @@ void Dump_EXYZ::process(
   if (has_potential_) {
     atom.potential_per_atom.copy_to_host(cpu_potential_per_atom_.data());
   }
-  
+
   if (separated_) {
-    std::string filename = "dump."+std::to_string(step+1)+".xyz";
+    std::string filename = "dump." + std::to_string(step + 1) + ".xyz";
     fid_ = my_fopen(filename.data(), "w");
   }
 
@@ -290,7 +290,6 @@ void Dump_EXYZ::process(
   } else {
     fclose(fid_);
   }
-  
 }
 
 void Dump_EXYZ::postprocess()
diff --git a/src/measure/dump_netcdf.cuh b/src/measure/dump_netcdf.cuh
index 355197b9a..f631eacca 100644
--- a/src/measure/dump_netcdf.cuh
+++ b/src/measure/dump_netcdf.cuh
@@ -39,12 +39,12 @@ public:
 
 private:
   bool dump_ = false;
-  int interval = 1;          // output interval
-  int has_velocity_ = 0;     // 0 wthout velocities, 1 with velocities
+  int interval = 1;      // output interval
+  int has_velocity_ = 0; // 0 wthout velocities, 1 with velocities
   char file_position[200];
-  int precision = 2;         // 1 = single precision, 2 = double
+  int precision = 2; // 1 = single precision, 2 = double
 
-  int ncid;                  // NetCDF ID
+  int ncid; // NetCDF ID
   static bool append;
 
   // dimensions
diff --git a/src/measure/rdf.cu b/src/measure/rdf.cu
index f81ffcbae..d0392e707 100644
--- a/src/measure/rdf.cu
+++ b/src/measure/rdf.cu
@@ -635,10 +635,14 @@ void RDF::parse(
   if (r_cut_ <= 0) {
     PRINT_INPUT_ERROR("radial cutoff should be positive.\n");
   }
-  double thickness_half[3] = {box.get_volume()/box.get_area(0)/ 2.5,box.get_volume()/box.get_area(1)/ 2.5,box.get_volume()/box.get_area(2)/ 2.5};
+  double thickness_half[3] = {
+    box.get_volume() / box.get_area(0) / 2.5,
+    box.get_volume() / box.get_area(1) / 2.5,
+    box.get_volume() / box.get_area(2) / 2.5};
   if (r_cut_ > thickness_half[0] || r_cut_ > thickness_half[1] || r_cut_ > thickness_half[2]) {
-    std::string message = "The box has a thickness < 2.5 RDF radial cutoffs in a periodic direction.\n"
-                          "                Please increase the periodic direction(s).\n";
+    std::string message =
+      "The box has a thickness < 2.5 RDF radial cutoffs in a periodic direction.\n"
+      "                Please increase the periodic direction(s).\n";
     PRINT_INPUT_ERROR(message.c_str());
   }
   printf("    radial cutoff %g.\n", r_cut_);
diff --git a/src/measure/shc.cu b/src/measure/shc.cu
index 8c64785ff..52d0d6e87 100644
--- a/src/measure/shc.cu
+++ b/src/measure/shc.cu
@@ -187,9 +187,11 @@ void SHC::process(
   } else {
     if (group_id == -1) {
       for (int n = 1; n < group_num; ++n) {
-        int offset_s = Nc * group[group_method].cpu_size_sum[n] + 
+        int offset_s = Nc * group[group_method].cpu_size_sum[n] +
                        correlation_step * group[group_method].cpu_size[n];
-        gpu_copy_data<<<(group[group_method].cpu_size[n] - 1) / BLOCK_SIZE_SHC + 1, BLOCK_SIZE_SHC>>>(
+        gpu_copy_data<<<
+          (group[group_method].cpu_size[n] - 1) / BLOCK_SIZE_SHC + 1,
+          BLOCK_SIZE_SHC>>>(
           group[group_method].cpu_size[n],
           group[group_method].cpu_size_sum[n],
           group[group_method].contents.data(),
@@ -487,9 +489,11 @@ void SHC::parse(const char** param, int num_param, const std::vector<Group>& gro
       PRINT_INPUT_ERROR("Unrecognized argument in compute_shc.\n");
     }
   }
-  
+
   if (group_id == -1) {
-    printf("    compute SHC for all group IDs except for group ID 0 in grouping method %d.\n", group_method);
+    printf(
+      "    compute SHC for all group IDs except for group ID 0 in grouping method %d.\n",
+      group_method);
   }
   if (group_id < -1) {
     PRINT_INPUT_ERROR("group ID should >= -1 for computing SHC.");
diff --git a/src/model/read_xyz.cu b/src/model/read_xyz.cu
index f00dcc866..98f59d390 100644
--- a/src/model/read_xyz.cu
+++ b/src/model/read_xyz.cu
@@ -298,7 +298,8 @@ static void read_xyz_line_2(
   // properties
   const int num_properties = 6;
   std::string property_name[num_properties] = {"species", "pos", "mass", "charge", "vel", "group"};
-  int property_position[num_properties] = {-1, -1, -1, -1, -1, -1}; // species,pos,mass,charge,vel,group
+  int property_position[num_properties] = {
+    -1, -1, -1, -1, -1, -1}; // species,pos,mass,charge,vel,group
   for (int n = 0; n < tokens.size(); ++n) {
     const std::string properties_string = "properties=";
     if (tokens[n].substr(0, properties_string.length()) == properties_string) {
@@ -554,7 +555,8 @@ void initialize_position(
   int num_columns = 0;
   bool has_mass = true;
   bool has_charge = true;
-  read_xyz_line_2(input, box, has_velocity_in_xyz, has_mass, has_charge, num_columns, property_offset, group);
+  read_xyz_line_2(
+    input, box, has_velocity_in_xyz, has_mass, has_charge, num_columns, property_offset, group);
 
   read_xyz_in_line_3(
     input,
diff --git a/src/utilities/nep_utilities.cuh b/src/utilities/nep_utilities.cuh
index 827f4c7ec..5bf4d25c6 100644
--- a/src/utilities/nep_utilities.cuh
+++ b/src/utilities/nep_utilities.cuh
@@ -31,17 +31,18 @@ __constant__ float C4B[5] = {
   -0.809943929279723f};
 __constant__ float C5B[3] = {0.026596810706114f, 0.053193621412227f, 0.026596810706114f};
 
-__constant__ float COVALENT_RADIUS[94] = 
-{0.426667f,0.613333f,1.6f,1.25333f,1.02667f,1.0f,0.946667f,0.84f,0.853333f,0.893333f,
-  1.86667f,1.66667f,1.50667f,1.38667f,1.46667f,1.36f,1.32f,1.28f,2.34667f,2.05333f,
-  1.77333f,1.62667f,1.61333f,1.46667f,1.42667f,1.38667f,1.33333f,1.32f,1.34667f,1.45333f,
-  1.49333f,1.45333f,1.53333f,1.46667f,1.52f,1.56f,2.52f,2.22667f,1.96f,1.85333f,
-  1.76f,1.65333f,1.53333f,1.50667f,1.50667f,1.44f,1.53333f,1.64f,1.70667f,1.68f,
-  1.68f,1.64f,1.76f,1.74667f,2.78667f,2.34667f,2.16f,1.96f,2.10667f,2.09333f,
-  2.08f,2.06667f,2.01333f,2.02667f,2.01333f,2.0f,1.98667f,1.98667f,1.97333f,2.04f,
-  1.94667f,1.82667f,1.74667f,1.64f,1.57333f,1.54667f,1.48f,1.49333f,1.50667f,1.76f,
-  1.73333f,1.73333f,1.81333f,1.74667f,1.84f,1.89333f,2.68f,2.41333f,2.22667f,2.10667f,
-  2.02667f,2.04f,2.05333f,2.06667f};
+__constant__ float COVALENT_RADIUS[94] = {
+  0.426667f, 0.613333f, 1.6f,     1.25333f, 1.02667f, 1.0f,     0.946667f, 0.84f,    0.853333f,
+  0.893333f, 1.86667f,  1.66667f, 1.50667f, 1.38667f, 1.46667f, 1.36f,     1.32f,    1.28f,
+  2.34667f,  2.05333f,  1.77333f, 1.62667f, 1.61333f, 1.46667f, 1.42667f,  1.38667f, 1.33333f,
+  1.32f,     1.34667f,  1.45333f, 1.49333f, 1.45333f, 1.53333f, 1.46667f,  1.52f,    1.56f,
+  2.52f,     2.22667f,  1.96f,    1.85333f, 1.76f,    1.65333f, 1.53333f,  1.50667f, 1.50667f,
+  1.44f,     1.53333f,  1.64f,    1.70667f, 1.68f,    1.68f,    1.64f,     1.76f,    1.74667f,
+  2.78667f,  2.34667f,  2.16f,    1.96f,    2.10667f, 2.09333f, 2.08f,     2.06667f, 2.01333f,
+  2.02667f,  2.01333f,  2.0f,     1.98667f, 1.98667f, 1.97333f, 2.04f,     1.94667f, 1.82667f,
+  1.74667f,  1.64f,     1.57333f, 1.54667f, 1.48f,    1.49333f, 1.50667f,  1.76f,    1.73333f,
+  1.73333f,  1.81333f,  1.74667f, 1.84f,    1.89333f, 2.68f,    2.41333f,  2.22667f, 2.10667f,
+  2.02667f,  2.04f,     2.05333f, 2.06667f};
 
 const int SIZE_BOX_AND_INVERSE_BOX = 18; // (3 * 3) * 2
 const int MAX_NUM_N = 20;                // n_max+1 = 19+1
@@ -92,7 +93,7 @@ static __device__ void apply_ann_multi_layers(
   float* energy_derivative)
 {
   constexpr int MAX_NEURONS_PER_LAYER = 200;
-  float x[3 * MAX_NEURONS_PER_LAYER];   // Maximum number of neurons per layer
+  float x[3 * MAX_NEURONS_PER_LAYER];     // Maximum number of neurons per layer
   float delta[3 * MAX_NEURONS_PER_LAYER]; // error of each neuron
 
   // input layer
@@ -119,8 +120,8 @@ static __device__ void apply_ann_multi_layers(
   energy = 0.0f;
   for (int n = 0; n < N_neu[layers - 1]; ++n) {
     float out = x[(layers - 1) * MAX_NEURONS_PER_LAYER + n];
-    energy += w_out[n] * out;         // w_out_j * x_j^3
-    delta[(layers - 1) * MAX_NEURONS_PER_LAYER + n] = w_out[n] * (1.0f - out * out);    // delta_j^3
+    energy += w_out[n] * out; // w_out_j * x_j^3
+    delta[(layers - 1) * MAX_NEURONS_PER_LAYER + n] = w_out[n] * (1.0f - out * out); // delta_j^3
   }
   energy -= b_out[0];
 
@@ -144,7 +145,6 @@ static __device__ void apply_ann_multi_layers(
       energy_derivative[d] += w0[n * N_des + d] * delta[n];
     }
   }
-    
 }
 
 static __device__ __forceinline__ void find_fc(float rc, float rcinv, float d12, float& fc)