diff --git a/src/main_nep/fitness.cu b/src/main_nep/fitness.cu
index 7c0979d0b..6e654717a 100644
--- a/src/main_nep/fitness.cu
+++ b/src/main_nep/fitness.cu
@@ -334,14 +334,7 @@ void Fitness::write_nep_txt(FILE* fid_nep, Parameters& para, float* elite)
   fprintf(fid_nep, "basis_size %d %d\n", para.basis_size_radial, para.basis_size_angular);
   fprintf(fid_nep, "l_max %d %d %d\n", para.L_max, para.L_max_4body, para.L_max_5body);
 
-  if (para.num_hidden_layers == 3) {
-    fprintf(
-      fid_nep, "ANN %d %d %d\n", para.num_neurons[0], para.num_neurons[1], para.num_neurons[2]);
-  } else if (para.num_hidden_layers == 2) {
-    fprintf(fid_nep, "ANN %d %d %d\n", para.num_neurons[0], para.num_neurons[1], 0);
-  } else if (para.num_hidden_layers == 1) {
-    fprintf(fid_nep, "ANN %d %d\n", para.num_neurons[0], 0);
-  }
+  fprintf(fid_nep, "ANN %d %d\n", para.num_neurons1, 0);
   for (int m = 0; m < para.number_of_variables; ++m) {
     fprintf(fid_nep, "%15.7e\n", elite[m]);
   }
diff --git a/src/main_nep/nep3.cu b/src/main_nep/nep3.cu
index 13414355e..ffcf76dcf 100644
--- a/src/main_nep/nep3.cu
+++ b/src/main_nep/nep3.cu
@@ -300,33 +300,8 @@ NEP3::NEP3(
   for (int device_id = 0; device_id < deviceCount; device_id++) {
     cudaSetDevice(device_id);
     annmb[device_id].dim = para.dim;
-    annmb[device_id].num_neurons[0] = para.num_neurons[0];
-    annmb[device_id].num_neurons[1] = para.num_neurons[1];
-    annmb[device_id].num_neurons[2] = para.num_neurons[2];
+    annmb[device_id].num_neurons1 = para.num_neurons1;
     annmb[device_id].num_para = para.number_of_variables;
-    annmb[device_id].num_hidden_layers = para.num_hidden_layers;
-    if (annmb[device_id].num_hidden_layers == 1) {
-      annmb[device_id].num_para_one_ann_without_bias =
-        (annmb[device_id].dim + 2) * annmb[device_id].num_neurons[0];
-    } else if (annmb[device_id].num_hidden_layers == 2) {
-      annmb[device_id].num_para_one_ann_without_bias =
-        (annmb[device_id].dim + 1) * annmb[device_id].num_neurons[0] +
-        (annmb[device_id].num_neurons[0] + 2) * annmb[device_id].num_neurons[1];
-    } else {
-      annmb[device_id].num_para_one_ann_without_bias =
-        (annmb[device_id].dim + 1) * annmb[device_id].num_neurons[0] +
-        (annmb[device_id].num_neurons[0] + 1) * annmb[device_id].num_neurons[1] +
-        (annmb[device_id].num_neurons[1] + 2) * annmb[device_id].num_neurons[2];
-    }
-    annmb[device_id].offset_w[0] = 0;
-    annmb[device_id].offset_b[0] = annmb[device_id].dim * annmb[device_id].num_neurons[0];
-    annmb[device_id].offset_w[1] = annmb[device_id].offset_b[0] + annmb[device_id].num_neurons[0];
-    annmb[device_id].offset_b[1] = annmb[device_id].offset_w[1] + annmb[device_id].num_neurons[0] *
-                                                                    annmb[device_id].num_neurons[1];
-    annmb[device_id].offset_w[2] = annmb[device_id].offset_b[1] + annmb[device_id].num_neurons[1];
-    annmb[device_id].offset_b[2] = annmb[device_id].offset_w[2] + annmb[device_id].num_neurons[1] *
-                                                                    annmb[device_id].num_neurons[2];
-    annmb[device_id].offset_w[3] = annmb[device_id].offset_b[2] + annmb[device_id].num_neurons[2];
 
     nep_data[device_id].NN_radial.resize(N);
     nep_data[device_id].NN_angular.resize(N);
@@ -350,23 +325,31 @@ void NEP3::update_potential(Parameters& para, float* parameters, ANN& ann)
   float* pointer = parameters;
   for (int t = 0; t < paramb.num_types; ++t) {
     if (t > 0 && paramb.version != 4) { // Use the same set of NN parameters for NEP3
-      pointer -= ann.num_para_one_ann_without_bias;
+      pointer -= (ann.dim + 2) * ann.num_neurons1;
     }
-    ann.para[t] = pointer;
-    pointer += ann.num_para_one_ann_without_bias;
+    ann.w0[t] = pointer;
+    pointer += ann.num_neurons1 * ann.dim;
+    ann.b0[t] = pointer;
+    pointer += ann.num_neurons1;
+    ann.w1[t] = pointer;
+    pointer += ann.num_neurons1;
   }
-  ann.b_out = pointer;
+  ann.b1 = pointer;
   pointer += 1;
 
   if (para.train_mode == 2) {
     for (int t = 0; t < paramb.num_types; ++t) {
       if (t > 0 && paramb.version != 4) { // Use the same set of NN parameters for NEP3
-        pointer -= ann.num_para_one_ann_without_bias;
+        pointer -= (ann.dim + 2) * ann.num_neurons1;
       }
-      ann.para_pol[t] = pointer;
-      pointer += ann.num_para_one_ann_without_bias;
+      ann.w0_pol[t] = pointer;
+      pointer += ann.num_neurons1 * ann.dim;
+      ann.b0_pol[t] = pointer;
+      pointer += ann.num_neurons1;
+      ann.w1_pol[t] = pointer;
+      pointer += ann.num_neurons1;
     }
-    ann.b_out_pol = pointer;
+    ann.b1_pol = pointer;
     pointer += 1;
   }
 
@@ -433,18 +416,13 @@ static __global__ void apply_ann(
     }
     // get energy and energy gradient
     float F = 0.0f, Fp[MAX_DIM] = {0.0f};
-    apply_ann_multi_layers(
+    apply_ann_one_layer(
       annmb.dim,
-      annmb.num_hidden_layers,
-      annmb.num_neurons,
-      annmb.para[type] + annmb.offset_w[0],
-      annmb.para[type] + annmb.offset_w[1],
-      annmb.para[type] + annmb.offset_w[2],
-      annmb.para[type] + annmb.offset_b[0],
-      annmb.para[type] + annmb.offset_b[1],
-      annmb.para[type] + annmb.offset_b[2],
-      annmb.para[type] + annmb.offset_w[3],
-      annmb.b_out,
+      annmb.num_neurons1,
+      annmb.w0[type],
+      annmb.b0[type],
+      annmb.w1[type],
+      annmb.b1,
       q,
       F,
       Fp);
@@ -478,18 +456,13 @@ static __global__ void apply_ann_pol(
     float F = 0.0f, Fp[MAX_DIM] = {0.0f};
 
     // scalar part
-    apply_ann_multi_layers(
+    apply_ann_one_layer(
       annmb.dim,
-      annmb.num_hidden_layers,
-      annmb.num_neurons,
-      annmb.para_pol[type] + annmb.offset_w[0],
-      annmb.para_pol[type] + annmb.offset_w[1],
-      annmb.para_pol[type] + annmb.offset_w[2],
-      annmb.para_pol[type] + annmb.offset_b[0],
-      annmb.para_pol[type] + annmb.offset_b[1],
-      annmb.para_pol[type] + annmb.offset_b[2],
-      annmb.para_pol[type] + annmb.offset_w[3],
-      annmb.b_out_pol,
+      annmb.num_neurons1,
+      annmb.w0_pol[type],
+      annmb.b0_pol[type],
+      annmb.w1_pol[type],
+      annmb.b1_pol,
       q,
       F,
       Fp);
@@ -501,18 +474,13 @@ static __global__ void apply_ann_pol(
     for (int d = 0; d < annmb.dim; ++d) {
       Fp[d] = 0.0f;
     }
-    apply_ann_multi_layers(
+    apply_ann_one_layer(
       annmb.dim,
-      annmb.num_hidden_layers,
-      annmb.num_neurons,
-      annmb.para[type] + annmb.offset_w[0],
-      annmb.para[type] + annmb.offset_w[1],
-      annmb.para[type] + annmb.offset_w[2],
-      annmb.para[type] + annmb.offset_b[0],
-      annmb.para[type] + annmb.offset_b[1],
-      annmb.para[type] + annmb.offset_b[2],
-      annmb.para[type] + annmb.offset_w[3],
-      annmb.b_out,
+      annmb.num_neurons1,
+      annmb.w0[type],
+      annmb.b0[type],
+      annmb.w1[type],
+      annmb.b1,
       q,
       F,
       Fp);
@@ -547,18 +515,13 @@ static __global__ void apply_ann_temperature(
     q[annmb.dim - 1] = temperature * g_q_scaler[annmb.dim - 1];
     // get energy and energy gradient
     float F = 0.0f, Fp[MAX_DIM] = {0.0f};
-    apply_ann_multi_layers(
+    apply_ann_one_layer(
       annmb.dim,
-      annmb.num_hidden_layers,
-      annmb.num_neurons,
-      annmb.para[type] + annmb.offset_w[0],
-      annmb.para[type] + annmb.offset_w[1],
-      annmb.para[type] + annmb.offset_w[2],
-      annmb.para[type] + annmb.offset_b[0],
-      annmb.para[type] + annmb.offset_b[1],
-      annmb.para[type] + annmb.offset_b[2],
-      annmb.para[type] + annmb.offset_w[3],
-      annmb.b_out,
+      annmb.num_neurons1,
+      annmb.w0[type],
+      annmb.b0[type],
+      annmb.w1[type],
+      annmb.b1,
       q,
       F,
       Fp);
diff --git a/src/main_nep/nep3.cuh b/src/main_nep/nep3.cuh
index 4f1639859..433d118f8 100644
--- a/src/main_nep/nep3.cuh
+++ b/src/main_nep/nep3.cuh
@@ -66,18 +66,17 @@ public:
 
   struct ANN {
     int dim = 0;                    // dimension of the descriptor
-    int num_hidden_layers = 1;      // number of hidden layers (1 to 3)
-    int num_neurons[3] = {0, 0, 0}; // number of neurons in the hidden layer
+    int num_neurons1 = 0;           // number of neurons in the hidden layer
     int num_para = 0;               // number of parameters
-    int num_para_one_ann_without_bias =
-      0; // number of parameters for one ann without the output bias
-    int offset_w[4];
-    int offset_b[3];
-    const float* para[NUM_ELEMENTS]; // weight and bias parameters for the hidden layers
-    const float* b_out;              // bias for the output layer
+    const float* w0[NUM_ELEMENTS]; // weight from the input layer to the hidden layer
+    const float* b0[NUM_ELEMENTS]; // bias for the hidden layer
+    const float* w1[NUM_ELEMENTS]; // weight from the hidden layer to the output layer
+    const float* b1;               // bias for the output layer
     // for the scalar part of polarizability
-    const float* para_pol[NUM_ELEMENTS]; // weight and bias parameters for the hidden layers
-    const float* b_out_pol;              // bias for the output layer
+    const float* w0_pol[10]; // weight from the input layer to the hidden layer
+    const float* b0_pol[10]; // bias for the hidden layer
+    const float* w1_pol[10]; // weight from the hidden layer to the output layer
+    const float* b1_pol;     // bias for the output layer
     // for elements in descriptor
     const float* c;
   };
diff --git a/src/main_nep/parameters.cu b/src/main_nep/parameters.cu
index da906ef15..db65a39fa 100644
--- a/src/main_nep/parameters.cu
+++ b/src/main_nep/parameters.cu
@@ -86,10 +86,7 @@ void Parameters::set_default_parameters()
   L_max = 4;                   // the only supported value
   L_max_4body = 2;             // default is to include 4body
   L_max_5body = 0;             // default is not to include 5body
-  num_hidden_layers = 1;       // default is to have one hidden layer
-  num_neurons[0] = 30;         // a relatively small value to achieve high speed
-  num_neurons[1] = 0;          // default is not to have the 2nd hidden layer
-  num_neurons[2] = 0;          // default is not to have the 3rd hidden layer
+  num_neurons1 = 30;           // a relatively small value to achieve high speed
   lambda_1 = lambda_2 = -1.0f; // automatic regularization
   lambda_e = lambda_f = 1.0f;  // energy and force are more important
   lambda_v = 0.1f;             // virial is less important
@@ -187,18 +184,7 @@ void Parameters::calculate_parameters()
   }
 #endif
 
-  if (num_hidden_layers == 1) {
-    number_of_variables_ann = (dim + 2) * num_neurons[0] * (version == 4 ? num_types : 1) + 1;
-  } else if (num_hidden_layers == 2) {
-    number_of_variables_ann = ((dim + 1) * num_neurons[0] + (num_neurons[0] + 2) * num_neurons[1]) *
-                                (version == 4 ? num_types : 1) +
-                              1;
-  } else {
-    number_of_variables_ann = ((dim + 1) * num_neurons[0] + (num_neurons[0] + 1) * num_neurons[1] +
-                               (num_neurons[1] + 2) * num_neurons[2]) *
-                                (version == 4 ? num_types : 1) +
-                              1;
-  }
+  number_of_variables_ann = (dim + 2) * num_neurons1 * (version == 4 ? num_types : 1) + 1;
 
   number_of_variables_descriptor =
     num_types * num_types *
@@ -358,17 +344,9 @@ void Parameters::report_inputs()
   }
 
   if (is_neuron_set) {
-    printf(
-      "    (input)   number of neurons = (%d, %d, %d).\n",
-      num_neurons[0],
-      num_neurons[1],
-      num_neurons[2]);
+    printf("    (input)   number of neurons = %d.\n", num_neurons1);
   } else {
-    printf(
-      "    (default) number of neurons = (%d, %d, %d).\n",
-      num_neurons[0],
-      num_neurons[1],
-      num_neurons[2]);
+    printf("    (default) number of neurons = %d.\n", num_neurons1);
   }
 
   if (is_lambda_1_set) {
@@ -439,18 +417,7 @@ void Parameters::report_inputs()
   printf("    number of radial descriptor components = %d.\n", dim_radial);
   printf("    number of angular descriptor components = %d.\n", dim_angular);
   printf("    total number of descriptor components = %d.\n", dim);
-  if (num_hidden_layers == 3) {
-    printf(
-      "    NN architecture = %d-%d-%d-%d-1.\n",
-      dim,
-      num_neurons[0],
-      num_neurons[1],
-      num_neurons[2]);
-  } else if (num_hidden_layers == 2) {
-    printf("    NN architecture = %d-%d-%d-1.\n", dim, num_neurons[0], num_neurons[1]);
-  } else {
-    printf("    NN architecture = %d-%d-1.\n", dim, num_neurons[0]);
-  }
+  printf("    NN architecture = %d-%d-1.\n", dim, num_neurons1);
   printf(
     "    number of NN parameters to be optimized = %d.\n",
     number_of_variables_ann * (train_mode == 2 ? 2 : 1));
@@ -781,46 +748,18 @@ void Parameters::parse_neuron(const char** param, int num_param)
 {
   is_neuron_set = true;
 
-  if (num_param < 2 || num_param > 4) {
-    PRINT_INPUT_ERROR("neuron should have 1 to 3 parameters.\n");
+  if (num_param != 2) {
+    PRINT_INPUT_ERROR("neuron should have 1 parameter.\n");
   }
 
-  num_hidden_layers = num_param - 1;
-
-  if (!is_valid_int(param[1], &num_neurons[0])) {
+  if (!is_valid_int(param[1], &num_neurons1)) {
     PRINT_INPUT_ERROR("number of neurons should be an integer.\n");
   }
-  if (num_neurons[0] < 1) {
+  if (num_neurons1 < 1) {
     PRINT_INPUT_ERROR("number of neurons should >= 1.");
-  } else if (num_neurons[0] > 200) {
+  } else if (num_neurons1 > 200) {
     PRINT_INPUT_ERROR("number of neurons should <= 200.");
   }
-
-  if (num_param > 2) {
-    if (!is_valid_int(param[2], &num_neurons[1])) {
-      PRINT_INPUT_ERROR("number of neurons should be an integer.\n");
-    }
-    if (num_neurons[1] < 1) {
-      PRINT_INPUT_ERROR("number of neurons should >= 1.");
-    } else if (num_neurons[1] > 200) {
-      PRINT_INPUT_ERROR("number of neurons should <= 200.");
-    }
-  }
-
-  if (num_param > 3) {
-    if (!is_valid_int(param[3], &num_neurons[2])) {
-      PRINT_INPUT_ERROR("number of neurons should be an integer.\n");
-    }
-    if (num_neurons[2] < 1) {
-      PRINT_INPUT_ERROR("number of neurons should >= 1.");
-    } else if (num_neurons[2] > 200) {
-      PRINT_INPUT_ERROR("number of neurons should <= 200.");
-    }
-  }
-
-  if (num_neurons[0] + num_neurons[1] + num_neurons[2] > 200) {
-    PRINT_INPUT_ERROR("total number of neurons should <= 200.\n");
-  }
 }
 
 void Parameters::parse_lambda_1(const char** param, int num_param)
diff --git a/src/main_nep/parameters.cuh b/src/main_nep/parameters.cuh
index 4067603de..3f4e35876 100644
--- a/src/main_nep/parameters.cuh
+++ b/src/main_nep/parameters.cuh
@@ -30,8 +30,7 @@ public:
   int num_types;          // number of atom types
   int population_size;    // population size for SNES
   int maximum_generation; // maximum number of generations for SNES;
-  int num_hidden_layers;  // number of hidden layers
-  int num_neurons[3];     // number of nuerons in the three hidden layers
+  int num_neurons1;       // number of nuerons in the 1st hidden layer (only one hidden layer)
   int basis_size_radial;  // for nep3
   int basis_size_angular; // for nep3
   int n_max_radial;       // maximum order of the radial Chebyshev polynomials
diff --git a/src/main_nep/snes.cu b/src/main_nep/snes.cu
index 2a7bbd658..827efcb88 100644
--- a/src/main_nep/snes.cu
+++ b/src/main_nep/snes.cu
@@ -132,44 +132,15 @@ void SNES::find_type_of_variable(Parameters& para)
     int num_ann = (para.train_mode == 2) ? 2 : 1;
     for (int ann = 0; ann < num_ann; ++ann) {
       for (int t = 0; t < para.num_types; ++t) {
-        if (para.num_hidden_layers == 1) {
-          for (int n = 0; n < (para.dim + 2) * para.num_neurons[0]; ++n) {
-            type_of_variable[n + offset] = t;
-          }
-          offset += (para.dim + 2) * para.num_neurons[0];
-        } else if (para.num_hidden_layers == 2) {
-          for (int n = 0; n < (para.dim + 1) * para.num_neurons[0] +
-                                (para.num_neurons[0] + 2) * para.num_neurons[1];
-               ++n) {
-            type_of_variable[n + offset] = t;
-          }
-          offset +=
-            (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 2) * para.num_neurons[1];
-        } else {
-          for (int n = 0; n < (para.dim + 1) * para.num_neurons[0] +
-                                (para.num_neurons[0] + 1) * para.num_neurons[1] +
-                                (para.num_neurons[1] + 2) * para.num_neurons[2];
-               ++n) {
-            type_of_variable[n + offset] = t;
-          }
-          offset += (para.dim + 1) * para.num_neurons[0] +
-                    (para.num_neurons[0] + 1) * para.num_neurons[1] +
-                    (para.num_neurons[1] + 2) * para.num_neurons[2];
+        for (int n = 0; n < (para.dim + 2) * para.num_neurons1; ++n) {
+          type_of_variable[n + offset] = t;
         }
+        offset += (para.dim + 2) * para.num_neurons1;
       }
       ++offset; // the bias
     }
   } else {
-    if (para.num_hidden_layers == 1) {
-      offset += (para.dim + 2) * para.num_neurons[0] + 1;
-    } else if (para.num_hidden_layers == 2) {
-      offset +=
-        (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 2) * para.num_neurons[1] + 1;
-    } else {
-      offset += (para.dim + 1) * para.num_neurons[0] +
-                (para.num_neurons[0] + 1) * para.num_neurons[1] +
-                (para.num_neurons[1] + 2) * para.num_neurons[2] + 1;
-    }
+    offset += (para.dim + 2) * para.num_neurons1 + 1;
   }
 
   // descriptor part
diff --git a/src/utilities/nep_utilities.cuh b/src/utilities/nep_utilities.cuh
index 5bf4d25c6..ec79b10a7 100644
--- a/src/utilities/nep_utilities.cuh
+++ b/src/utilities/nep_utilities.cuh
@@ -76,77 +76,6 @@ static __device__ void apply_ann_one_layer(
   energy -= b1[0];
 }
 
-static __device__ void apply_ann_multi_layers(
-  const int N_des,
-  const int layers,
-  const int* N_neu,
-  const float* w0,
-  const float* w1,
-  const float* w2,
-  const float* b0,
-  const float* b1,
-  const float* b2,
-  const float* w_out,
-  const float* b_out,
-  float* q,
-  float& energy,
-  float* energy_derivative)
-{
-  constexpr int MAX_NEURONS_PER_LAYER = 200;
-  float x[3 * MAX_NEURONS_PER_LAYER];     // Maximum number of neurons per layer
-  float delta[3 * MAX_NEURONS_PER_LAYER]; // error of each neuron
-
-  // input layer
-  for (int n = 0; n < N_neu[0]; ++n) {
-    float sum = 0.0f;
-    for (int d = 0; d < N_des; ++d) {
-      sum += w0[n * N_des + d] * q[d];
-    }
-    x[n] = tanh(sum - b0[n]);
-  }
-  // hidden layers
-  for (int l = 1; l < layers; ++l) {
-    const float* w = (l == 1) ? w1 : w2;
-    const float* b = (l == 1) ? b1 : b2;
-    for (int n = 0; n < N_neu[l]; ++n) {
-      float sum = 0.0f;
-      for (int m = 0; m < N_neu[l - 1]; ++m) {
-        sum += w[n * N_neu[l - 1] + m] * x[(l - 1) * MAX_NEURONS_PER_LAYER + m];
-      }
-      x[l * MAX_NEURONS_PER_LAYER + n] = tanh(sum - b[n]);
-    }
-  }
-  // output layer
-  energy = 0.0f;
-  for (int n = 0; n < N_neu[layers - 1]; ++n) {
-    float out = x[(layers - 1) * MAX_NEURONS_PER_LAYER + n];
-    energy += w_out[n] * out; // w_out_j * x_j^3
-    delta[(layers - 1) * MAX_NEURONS_PER_LAYER + n] = w_out[n] * (1.0f - out * out); // delta_j^3
-  }
-  energy -= b_out[0];
-
-  // Backpropagation error
-  for (int l = layers - 1; l >= 1; --l) {     // l = 2, 1
-    const float* w_next = (l == 1) ? w1 : w2; // w2, w1
-    for (int m = 0; m < N_neu[l - 1]; ++m) {
-      float sum = 0.0f;
-      for (int n = 0; n < N_neu[l]; ++n) {
-        sum += w_next[n * N_neu[l - 1] + m] * delta[l * MAX_NEURONS_PER_LAYER + n];
-      }
-      float out = x[(l - 1) * MAX_NEURONS_PER_LAYER + m];
-      delta[(l - 1) * MAX_NEURONS_PER_LAYER + m] = sum * (1.0f - out * out);
-    }
-  }
-
-  // Derivative of the energy
-  for (int d = 0; d < N_des; ++d) {
-    energy_derivative[d] = 0.0f;
-    for (int n = 0; n < N_neu[0]; ++n) {
-      energy_derivative[d] += w0[n * N_des + d] * delta[n];
-    }
-  }
-}
-
 static __device__ __forceinline__ void find_fc(float rc, float rcinv, float d12, float& fc)
 {
   if (d12 < rc) {