diff --git a/src/force/dftd3.cu b/src/force/dftd3.cu index b9fe6e027..a25033a7c 100644 --- a/src/force/dftd3.cu +++ b/src/force/dftd3.cu @@ -42,13 +42,12 @@ namespace { const int MN = 10000; // maximum number of neighbors for one atom const std::string ELEMENTS[NUM_ELEMENTS] = { - "H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg", "Al", "Si", "P", - "S", "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", - "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", - "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", - "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W", "Re", - "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", - "Pa", "U", "Np", "Pu"}; + "H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg", "Al", "Si", "P", "S", + "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge", + "As", "Se", "Br", "Kr", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", + "In", "Sn", "Sb", "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", + "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg", + "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", "Pa", "U", "Np", "Pu"}; void __global__ find_dftd3_coordination_number_small_box( DFTD3::DFTD3_Para dftd3_para, diff --git a/src/force/nep3.cu b/src/force/nep3.cu index a4b340257..347fd5c2c 100644 --- a/src/force/nep3.cu +++ b/src/force/nep3.cu @@ -32,13 +32,12 @@ heat transport, Phys. Rev. B. 104, 104309 (2021). #include const std::string ELEMENTS[NUM_ELEMENTS] = { - "H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg", "Al", "Si", "P", - "S", "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", - "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", - "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", - "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W", "Re", - "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", - "Pa", "U", "Np", "Pu"}; + "H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg", "Al", "Si", "P", "S", + "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge", + "As", "Se", "Br", "Kr", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", + "In", "Sn", "Sb", "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", + "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg", + "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", "Pa", "U", "Np", "Pu"}; void NEP3::initialize_dftd3() { @@ -123,7 +122,9 @@ NEP3::NEP3(const char* file_potential, const int num_atoms) paramb.version = 4; paramb.model_type = 2; } else { - std::cout << tokens[0] << " is an unsupported NEP model. We only support NEP3 and NEP4 models now." << std::endl; + std::cout << tokens[0] + << " is an unsupported NEP model. We only support NEP3 and NEP4 models now." + << std::endl; exit(1); } paramb.num_types = get_int_from_token(tokens[1], __FILE__, __LINE__); @@ -175,7 +176,8 @@ NEP3::NEP3(const char* file_potential, const int num_atoms) // cutoff 4.2 3.7 80 47 1 tokens = get_tokens(input); if (tokens.size() != 5 && tokens.size() != 8) { - std::cout << "This line should be cutoff rc_radial rc_angular MN_radial MN_angular [radial_factor] [angular_factor] [zbl_factor].\n"; + std::cout << "This line should be cutoff rc_radial rc_angular MN_radial MN_angular " + "[radial_factor] [angular_factor] [zbl_factor].\n"; exit(1); } paramb.rc_radial = get_float_from_token(tokens[1], __FILE__, __LINE__); @@ -224,7 +226,7 @@ NEP3::NEP3(const char* file_potential, const int num_atoms) tokens = get_tokens(input); if (tokens.size() != 3) { std::cout << "This line should be basis_size basis_size_radial basis_size_angular." - << std::endl; + << std::endl; exit(1); } paramb.basis_size_radial = get_int_from_token(tokens[1], __FILE__, __LINE__); @@ -518,8 +520,12 @@ static __global__ void find_neighbor_list_large_box( if (paramb.use_typewise_cutoff) { int z1 = paramb.atomic_numbers[t1]; int z2 = paramb.atomic_numbers[t2]; - rc_radial = min((COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_radial_factor, rc_radial); - rc_angular = min((COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_angular_factor, rc_angular); + rc_radial = min( + (COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_radial_factor, + rc_radial); + rc_angular = min( + (COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_angular_factor, + rc_angular); } if (d12_square >= rc_radial * rc_radial) { @@ -601,13 +607,16 @@ static __global__ void find_descriptor( int t2 = g_type[n2]; float rc = paramb.rc_radial; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_radial_factor, + rc); } float rcinv = 1.0f / rc; find_fc(rc, rcinv, d12, fc12); float fn12[MAX_NUM_N]; - + find_fn(paramb.basis_size_radial, rcinv, d12, fc12, fn12); for (int n = 0; n <= paramb.n_max_radial; ++n) { float gn12 = 0.0f; @@ -649,8 +658,11 @@ static __global__ void find_descriptor( int t2 = g_type[n2]; float rc = paramb.rc_angular; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_angular_factor, + rc); } float rcinv = 1.0f / rc; find_fc(rc, rcinv, d12, fc12); @@ -803,8 +815,11 @@ static __global__ void find_force_radial( float fc12, fcp12; float rc = paramb.rc_radial; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_radial_factor, + rc); } float rcinv = 1.0f / rc; find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12); @@ -946,16 +961,18 @@ static __global__ void find_partial_force_angular( int t2 = g_type[n2]; float rc = paramb.rc_angular; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_angular_factor, + rc); } float rcinv = 1.0f / rc; find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12); float fn12[MAX_NUM_N]; float fnp12[MAX_NUM_N]; - find_fn_and_fnp( - paramb.basis_size_angular, rcinv, d12, fc12, fcp12, fn12, fnp12); + find_fn_and_fnp(paramb.basis_size_angular, rcinv, d12, fc12, fcp12, fn12, fnp12); for (int n = 0; n <= paramb.n_max_angular; ++n) { float gn12 = 0.0f; float gnp12 = 0.0f; @@ -1057,7 +1074,9 @@ static __global__ void find_force_ZBL( float rc_outer = zbl.rc_outer; if (paramb.use_typewise_cutoff_zbl) { // zi and zj start from 1, so need to minus 1 here - rc_outer = min((COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor, rc_outer); + rc_outer = min( + (COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor, + rc_outer); rc_inner = rc_outer * 0.5f; } find_f_and_fp_zbl(zizj, a_inv, rc_inner, rc_outer, d12, d12inv, f, fp); @@ -1583,8 +1602,11 @@ static __global__ void find_descriptor( int t2 = g_type[n2]; float rc = paramb.rc_radial; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_radial_factor, + rc); } float rcinv = 1.0f / rc; find_fc(rc, rcinv, d12, fc12); @@ -1630,8 +1652,11 @@ static __global__ void find_descriptor( int t2 = g_type[n2]; float rc = paramb.rc_angular; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_angular_factor, + rc); } float rcinv = 1.0f / rc; find_fc(rc, rcinv, d12, fc12); diff --git a/src/force/nep3.cuh b/src/force/nep3.cuh index 602b28f9a..43c85b6ca 100644 --- a/src/force/nep3.cuh +++ b/src/force/nep3.cuh @@ -76,13 +76,13 @@ public: }; struct ANN { - int dim = 0; // dimension of the descriptor - int num_neurons1 = 0; // number of neurons in the 1st hidden layer - int num_para = 0; // number of parameters + int dim = 0; // dimension of the descriptor + int num_neurons1 = 0; // number of neurons in the 1st hidden layer + int num_para = 0; // number of parameters const float* w0[NUM_ELEMENTS]; // weight from the input layer to the hidden layer const float* b0[NUM_ELEMENTS]; // bias for the hidden layer const float* w1[NUM_ELEMENTS]; // weight from the hidden layer to the output layer - const float* b1; // bias for the output layer + const float* b1; // bias for the output layer const float* c; // for the scalar part of polarizability const float* w0_pol[10]; diff --git a/src/force/nep3_multigpu.cu b/src/force/nep3_multigpu.cu index d70eb867f..efcd95036 100644 --- a/src/force/nep3_multigpu.cu +++ b/src/force/nep3_multigpu.cu @@ -34,13 +34,12 @@ when there is NVlink, but is also not very bad when there is only PCI-E. #include const std::string ELEMENTS[NUM_ELEMENTS] = { - "H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg", "Al", "Si", "P", - "S", "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", - "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", - "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", - "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W", "Re", - "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", - "Pa", "U", "Np", "Pu"}; + "H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg", "Al", "Si", "P", "S", + "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge", + "As", "Se", "Br", "Kr", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", + "In", "Sn", "Sb", "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", + "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg", + "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", "Pa", "U", "Np", "Pu"}; void NEP3_MULTIGPU::initialize_dftd3() { @@ -125,7 +124,9 @@ NEP3_MULTIGPU::NEP3_MULTIGPU( paramb.version = 4; paramb.model_type = 2; } else { - std::cout << tokens[0] << " is an unsupported NEP model. We only support NEP3 and NEP4 models now." << std::endl; + std::cout << tokens[0] + << " is an unsupported NEP model. We only support NEP3 and NEP4 models now." + << std::endl; exit(1); } paramb.num_types = get_int_from_token(tokens[1], __FILE__, __LINE__); @@ -177,7 +178,8 @@ NEP3_MULTIGPU::NEP3_MULTIGPU( // cutoff 4.2 3.7 80 47 1 tokens = get_tokens(input); if (tokens.size() != 5 && tokens.size() != 8) { - std::cout << "This line should be cutoff rc_radial rc_angular MN_radial MN_angular [radial_factor] [angular_factor] [zbl_factor].\n"; + std::cout << "This line should be cutoff rc_radial rc_angular MN_radial MN_angular " + "[radial_factor] [angular_factor] [zbl_factor].\n"; exit(1); } paramb.rc_radial = get_float_from_token(tokens[1], __FILE__, __LINE__); @@ -792,8 +794,12 @@ static __global__ void find_neighbor_list_large_box( if (paramb.use_typewise_cutoff) { int z1 = paramb.atomic_numbers[t1]; int z2 = paramb.atomic_numbers[t2]; - rc_radial = min((COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_radial_factor, rc_radial); - rc_angular = min((COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_angular_factor, rc_angular); + rc_radial = min( + (COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_radial_factor, + rc_radial); + rc_angular = min( + (COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_angular_factor, + rc_angular); } if (d12_square >= rc_radial * rc_radial) { @@ -874,8 +880,11 @@ static __global__ void find_descriptor( int t2 = g_type[n2]; float rc = paramb.rc_radial; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_radial_factor, + rc); } float rcinv = 1.0f / rc; find_fc(rc, rcinv, d12, fc12); @@ -919,10 +928,13 @@ static __global__ void find_descriptor( #else float fc12; int t2 = g_type[n2]; - float rc = paramb.rc_angular; + float rc = paramb.rc_angular; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_angular_factor, + rc); } float rcinv = 1.0f / rc; find_fc(rc, rcinv, d12, fc12); @@ -1071,8 +1083,11 @@ static __global__ void find_force_radial( float fc12, fcp12; float rc = paramb.rc_radial; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_radial_factor, + rc); } float rcinv = 1.0f / rc; find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12); @@ -1214,8 +1229,11 @@ static __global__ void find_partial_force_angular( int t2 = g_type[n2]; float rc = paramb.rc_angular; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_angular_factor, + rc); } float rcinv = 1.0f / rc; find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12); @@ -1324,7 +1342,9 @@ static __global__ void find_force_ZBL( float rc_outer = zbl.rc_outer; if (paramb.use_typewise_cutoff_zbl) { // zi and zj start from 1, so need to minus 1 here - rc_outer = min((COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor, rc_outer); + rc_outer = min( + (COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor, + rc_outer); rc_inner = rc_outer * 0.5f; } find_f_and_fp_zbl(zizj, a_inv, rc_inner, rc_outer, d12, d12inv, f, fp); @@ -1984,8 +2004,11 @@ static __global__ void find_descriptor( int t2 = g_type[n2]; float rc = paramb.rc_radial; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_radial_factor, + rc); } float rcinv = 1.0f / rc; find_fc(rc, rcinv, d12, fc12); @@ -2031,8 +2054,11 @@ static __global__ void find_descriptor( int t2 = g_type[n2]; float rc = paramb.rc_angular; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_angular_factor, + rc); } float rcinv = 1.0f / rc; find_fc(rc, rcinv, d12, fc12); diff --git a/src/force/nep3_multigpu.cuh b/src/force/nep3_multigpu.cuh index cd34d0196..c6ae4bbf3 100644 --- a/src/force/nep3_multigpu.cuh +++ b/src/force/nep3_multigpu.cuh @@ -107,13 +107,13 @@ public: }; struct ANN { - int dim = 0; // dimension of the descriptor - int num_neurons1 = 0; // number of neurons in the 1st hidden layer - int num_para = 0; // number of parameters + int dim = 0; // dimension of the descriptor + int num_neurons1 = 0; // number of neurons in the 1st hidden layer + int num_para = 0; // number of parameters const float* w0[NUM_ELEMENTS]; // weight from the input layer to the hidden layer const float* b0[NUM_ELEMENTS]; // bias for the hidden layer const float* w1[NUM_ELEMENTS]; // weight from the hidden layer to the output layer - const float* b1; // bias for the output layer + const float* b1; // bias for the output layer const float* c; // for the scalar part of polarizability const float* w0_pol[10]; diff --git a/src/force/nep3_small_box.cuh b/src/force/nep3_small_box.cuh index 6680ffac7..9aac87735 100644 --- a/src/force/nep3_small_box.cuh +++ b/src/force/nep3_small_box.cuh @@ -131,8 +131,12 @@ static __global__ void find_neighbor_list_small_box( if (paramb.use_typewise_cutoff) { int z1 = paramb.atomic_numbers[t1]; int z2 = paramb.atomic_numbers[t2]; - rc_radial = min((COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_radial_factor, rc_radial); - rc_angular = min((COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_angular_factor, rc_angular); + rc_radial = min( + (COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_radial_factor, + rc_radial); + rc_angular = min( + (COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_angular_factor, + rc_angular); } if (distance_square < rc_radial * rc_radial) { @@ -214,8 +218,11 @@ static __global__ void find_descriptor_small_box( int t2 = g_type[n2]; float rc = paramb.rc_radial; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_radial_factor, + rc); } float rcinv = 1.0f / rc; find_fc(rc, rcinv, d12, fc12); @@ -258,8 +265,11 @@ static __global__ void find_descriptor_small_box( int t2 = g_type[n2]; float rc = paramb.rc_angular; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_angular_factor, + rc); } float rcinv = 1.0f / rc; find_fc(rc, rcinv, d12, fc12); @@ -382,8 +392,11 @@ static __global__ void find_descriptor_small_box( int t2 = g_type[n2]; float rc = paramb.rc_radial; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_radial_factor, + rc); } float rcinv = 1.0f / rc; find_fc(rc, rcinv, d12, fc12); @@ -426,8 +439,11 @@ static __global__ void find_descriptor_small_box( int t2 = g_type[n2]; float rc = paramb.rc_angular; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_angular_factor, + rc); } float rcinv = 1.0f / rc; find_fc(rc, rcinv, d12, fc12); @@ -528,15 +544,17 @@ static __global__ void find_force_radial_small_box( float fc12, fcp12; float rc = paramb.rc_radial; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_radial_factor, + rc); } float rcinv = 1.0f / rc; find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12); float fn12[MAX_NUM_N]; float fnp12[MAX_NUM_N]; - find_fn_and_fnp( - paramb.basis_size_radial, rcinv, d12, fc12, fcp12, fn12, fnp12); + find_fn_and_fnp(paramb.basis_size_radial, rcinv, d12, fc12, fcp12, fn12, fnp12); for (int n = 0; n <= paramb.n_max_radial; ++n) { float gnp12 = 0.0f; for (int k = 0; k <= paramb.basis_size_radial; ++k) { @@ -674,15 +692,17 @@ static __global__ void find_force_angular_small_box( int t2 = g_type[n2]; float rc = paramb.rc_angular; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_angular_factor, + rc); } float rcinv = 1.0f / rc; find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12); float fn12[MAX_NUM_N]; float fnp12[MAX_NUM_N]; - find_fn_and_fnp( - paramb.basis_size_angular, rcinv, d12, fc12, fcp12, fn12, fnp12); + find_fn_and_fnp(paramb.basis_size_angular, rcinv, d12, fc12, fcp12, fn12, fnp12); for (int n = 0; n <= paramb.n_max_angular; ++n) { float gn12 = 0.0f; float gnp12 = 0.0f; @@ -807,7 +827,9 @@ static __global__ void find_force_ZBL_small_box( float rc_outer = zbl.rc_outer; if (paramb.use_typewise_cutoff_zbl) { // zi and zj start from 1, so need to minus 1 here - rc_outer = min((COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor, rc_outer); + rc_outer = min( + (COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor, + rc_outer); rc_inner = rc_outer * 0.5f; } find_f_and_fp_zbl(zizj, a_inv, rc_inner, rc_outer, d12, d12inv, f, fp); diff --git a/src/integrate/integrate.cuh b/src/integrate/integrate.cuh index 7b7fa66a7..b488cfdfd 100644 --- a/src/integrate/integrate.cuh +++ b/src/integrate/integrate.cuh @@ -34,7 +34,7 @@ public: Box& box, std::vector& group, GPU_Vector& thermo, - int &total_steps); + int& total_steps); void finalize(); diff --git a/src/main_gpumd/add_efield.cu b/src/main_gpumd/add_efield.cu index 6eb3a9f44..01bb595f5 100644 --- a/src/main_gpumd/add_efield.cu +++ b/src/main_gpumd/add_efield.cu @@ -24,12 +24,11 @@ Add electric field to a group of atoms. #include #include -static void __global__ -add_efield( +static void __global__ add_efield( const int group_size, const int group_size_sum, const int* g_group_contents, - const double Ex, + const double Ex, const double Ey, const double Ez, const double* g_charge, @@ -67,8 +66,7 @@ void Add_Efield::compute(const int step, const std::vector& groups, Atom& atom.charge.data(), atom.force_per_atom.data(), atom.force_per_atom.data() + num_atoms_total, - atom.force_per_atom.data() + num_atoms_total * 2 - ); + atom.force_per_atom.data() + num_atoms_total * 2); CUDA_CHECK_KERNEL } } @@ -105,10 +103,9 @@ void Add_Efield::parse(const char** param, int num_param, const std::vector& group); void compute(const int step, const std::vector& groups, Atom& atom); void finalize(); private: - int num_calls_ = 0; int table_length_[10]; std::vector efield_table_[10]; diff --git a/src/main_gpumd/add_force.cu b/src/main_gpumd/add_force.cu index a56c6b2cd..92bb952da 100644 --- a/src/main_gpumd/add_force.cu +++ b/src/main_gpumd/add_force.cu @@ -24,12 +24,11 @@ Add force to a group of atoms. #include #include -static void __global__ -add_force( +static void __global__ add_force( const int group_size, const int group_size_sum, const int* g_group_contents, - const double added_fx, + const double added_fx, const double added_fy, const double added_fz, double* g_fx, @@ -64,8 +63,7 @@ void Add_Force::compute(const int step, const std::vector& groups, Atom& added_fz, atom.force_per_atom.data(), atom.force_per_atom.data() + num_atoms_total, - atom.force_per_atom.data() + num_atoms_total * 2 - ); + atom.force_per_atom.data() + num_atoms_total * 2); CUDA_CHECK_KERNEL } } @@ -102,10 +100,9 @@ void Add_Force::parse(const char** param, int num_param, const std::vector& group); void compute(const int step, const std::vector& groups, Atom& atom); void finalize(); private: - int num_calls_ = 0; int table_length_[10]; std::vector force_table_[10]; diff --git a/src/main_gpumd/add_random_force.cu b/src/main_gpumd/add_random_force.cu index 42b0562b8..662d1d242 100644 --- a/src/main_gpumd/add_random_force.cu +++ b/src/main_gpumd/add_random_force.cu @@ -128,23 +128,22 @@ void Add_Random_Force::compute(const int step, Atom& atom) curand_states_.data(), atom.force_per_atom.data(), atom.force_per_atom.data() + atom.number_of_atoms, - atom.force_per_atom.data() + atom.number_of_atoms * 2 - ); + atom.force_per_atom.data() + atom.number_of_atoms * 2); CUDA_CHECK_KERNEL gpu_sum_force<<<3, 1024>>>( - atom.number_of_atoms, - atom.force_per_atom.data(), - atom.force_per_atom.data() + atom.number_of_atoms, - atom.force_per_atom.data() + 2 * atom.number_of_atoms); + atom.number_of_atoms, + atom.force_per_atom.data(), + atom.force_per_atom.data() + atom.number_of_atoms, + atom.force_per_atom.data() + 2 * atom.number_of_atoms); CUDA_CHECK_KERNEL gpu_correct_force<<<(atom.number_of_atoms - 1) / 64 + 1, 64>>>( - atom.number_of_atoms, - 1.0 / atom.number_of_atoms, - atom.force_per_atom.data(), - atom.force_per_atom.data() + atom.number_of_atoms, - atom.force_per_atom.data() + 2 * atom.number_of_atoms); + atom.number_of_atoms, + 1.0 / atom.number_of_atoms, + atom.force_per_atom.data(), + atom.force_per_atom.data() + atom.number_of_atoms, + atom.force_per_atom.data() + 2 * atom.number_of_atoms); CUDA_CHECK_KERNEL } } @@ -178,7 +177,4 @@ void Add_Random_Force::parse(const char** param, int num_param, int number_of_at CUDA_CHECK_KERNEL } -void Add_Random_Force::finalize() -{ - num_calls_ = 0; -} +void Add_Random_Force::finalize() { num_calls_ = 0; } diff --git a/src/main_gpumd/add_random_force.cuh b/src/main_gpumd/add_random_force.cuh index 71300667c..30fc6aaef 100644 --- a/src/main_gpumd/add_random_force.cuh +++ b/src/main_gpumd/add_random_force.cuh @@ -22,13 +22,11 @@ class Atom; class Add_Random_Force { public: - void parse(const char** param, int num_param, int number_of_atoms); void compute(const int step, Atom& atom); void finalize(); private: - GPU_Vector curand_states_; int num_calls_ = 0; double force_variance_ = 0.0; diff --git a/src/main_gpumd/electron_stop.cu b/src/main_gpumd/electron_stop.cu index 5734790f4..487f0d6a4 100644 --- a/src/main_gpumd/electron_stop.cu +++ b/src/main_gpumd/electron_stop.cu @@ -69,7 +69,7 @@ static void __global__ find_stopping_force( g_force[1 * num_atoms + i] = vy * factor; g_force[2 * num_atoms + i] = vz * factor; - g_power_loss[i] = stopping_power * sqrt(v2) * time_step; + g_power_loss[i] = stopping_power * sqrt(v2) * time_step; } } @@ -136,26 +136,25 @@ static __global__ void find_power_loss(int num_atoms, double* g_power_loss) double f = 0.0; for (int batch = 0; batch < number_of_batches; ++batch) { - int idx = tid + batch * block_size; - if (idx < num_atoms) { - f += g_power_loss[idx]; - } + int idx = tid + batch * block_size; + if (idx < num_atoms) { + f += g_power_loss[idx]; + } } s_f[tid] = f; __syncthreads(); for (int offset = blockDim.x >> 1; offset > 0; offset >>= 1) { - if (tid < offset) { - s_f[tid] += s_f[tid + offset]; - } - __syncthreads(); + if (tid < offset) { + s_f[tid] += s_f[tid + offset]; + } + __syncthreads(); } if (tid == 0) { - device_power_loss = s_f[0]; + device_power_loss = s_f[0]; } - } void Electron_Stop::compute(double time_step, Atom& atom) @@ -190,8 +189,9 @@ void Electron_Stop::compute(double time_step, Atom& atom) find_power_loss<<<1, 1024>>>(atom.number_of_atoms, stopping_loss.data()); CUDA_CHECK_KERNEL - double power_loss_host; - CHECK(cudaMemcpyFromSymbol(&power_loss_host, device_power_loss, sizeof(double), 0, cudaMemcpyDeviceToHost)); + double power_loss_host; + CHECK(cudaMemcpyFromSymbol( + &power_loss_host, device_power_loss, sizeof(double), 0, cudaMemcpyDeviceToHost)); stopping_power_loss += power_loss_host; } @@ -256,11 +256,11 @@ void Electron_Stop::parse( do_electron_stop = true; } -void Electron_Stop::finalize() -{ - if (do_electron_stop) { +void Electron_Stop::finalize() +{ + if (do_electron_stop) { printf("Total electron stopping power loss = %g eV.\n", stopping_power_loss); } - do_electron_stop = false; + do_electron_stop = false; stopping_power_loss = 0.0; } diff --git a/src/main_gpumd/run.cu b/src/main_gpumd/run.cu index 21a8f4274..17db0bcbb 100644 --- a/src/main_gpumd/run.cu +++ b/src/main_gpumd/run.cu @@ -18,8 +18,8 @@ Run simulation according to the inputs in the run.in file. ------------------------------------------------------------------------------*/ #include "add_efield.cuh" -#include "add_random_force.cuh" #include "add_force.cuh" +#include "add_random_force.cuh" #include "cohesive.cuh" #include "electron_stop.cuh" #include "force/force.cuh" @@ -554,9 +554,10 @@ void Run::parse_correct_velocity(const char** param, int num_param, const std::v if (velocity.velocity_correction_group_method < 0) { printf(" for the whole system.\n"); } else { - printf(" for individual groups in group method %d.\n", velocity.velocity_correction_group_method); + printf( + " for individual groups in group method %d.\n", velocity.velocity_correction_group_method); } - + velocity.do_velocity_correction = true; } diff --git a/src/main_gpumd/velocity.cu b/src/main_gpumd/velocity.cu index 542142741..058ca38d2 100644 --- a/src/main_gpumd/velocity.cu +++ b/src/main_gpumd/velocity.cu @@ -353,6 +353,6 @@ void Velocity::initialize( void Velocity::finalize() { - do_velocity_correction = false; + do_velocity_correction = false; velocity_correction_group_method = -1; } diff --git a/src/main_nep/fitness.cu b/src/main_nep/fitness.cu index dfa96d03a..c6cb8257b 100644 --- a/src/main_nep/fitness.cu +++ b/src/main_nep/fitness.cu @@ -54,7 +54,8 @@ Fitness::Fitness(Parameters& para) int count = 0; for (int batch_id = 0; batch_id < num_batches; ++batch_id) { const int batch_size_minimal = structures_train.size() / num_batches; - const bool is_larger_batch = batch_id + batch_size_minimal * num_batches < structures_train.size(); + const bool is_larger_batch = + batch_id + batch_size_minimal * num_batches < structures_train.size(); const int batch_size = is_larger_batch ? batch_size_minimal + 1 : batch_size_minimal; count += batch_size; printf("\nBatch %d:\n", batch_id); @@ -63,7 +64,8 @@ Fitness::Fitness(Parameters& para) print_line_1(); printf("Constructing train_set in device %d.\n", device_id); CHECK(cudaSetDevice(device_id)); - train_set[batch_id][device_id].construct(para, structures_train, count - batch_size, count, device_id); + train_set[batch_id][device_id].construct( + para, structures_train, count - batch_size, count, device_id); print_line_2(); } } @@ -138,15 +140,15 @@ void Fitness::compute( std::vector dummy_solution(para.number_of_variables * deviceCount, para.initial_para); for (int n = 0; n < num_batches; ++n) { potential->find_force( - para, - dummy_solution.data(), - train_set[n], + para, + dummy_solution.data(), + train_set[n], #ifdef USE_FIXED_SCALER - false, + false, #else true, #endif - true, + true, deviceCount); } } else { @@ -215,16 +217,15 @@ void Fitness::compute( } } } - } } void Fitness::output( bool is_stress, - int num_components, - FILE* fid, - float* prediction, - float* reference, + int num_components, + FILE* fid, + float* prediction, + float* reference, Dataset& dataset) { for (int nc = 0; nc < dataset.Nc; ++nc) { @@ -322,19 +323,20 @@ void Fitness::write_nep_txt(FILE* fid_nep, Parameters& para, float* elite) para.typewise_cutoff_zbl_factor); } else { fprintf( - fid_nep, - "cutoff %g %g %d %d\n", - para.rc_radial, - para.rc_angular, - max_NN_radial, - max_NN_angular); + fid_nep, + "cutoff %g %g %d %d\n", + para.rc_radial, + para.rc_angular, + max_NN_radial, + max_NN_angular); } fprintf(fid_nep, "n_max %d %d\n", para.n_max_radial, para.n_max_angular); fprintf(fid_nep, "basis_size %d %d\n", para.basis_size_radial, para.basis_size_angular); fprintf(fid_nep, "l_max %d %d %d\n", para.L_max, para.L_max_4body, para.L_max_5body); if (para.num_hidden_layers == 3) { - fprintf(fid_nep, "ANN %d %d %d\n", para.num_neurons[0], para.num_neurons[1], para.num_neurons[2]); + fprintf( + fid_nep, "ANN %d %d %d\n", para.num_neurons[0], para.num_neurons[1], para.num_neurons[2]); } else if (para.num_hidden_layers == 2) { fprintf(fid_nep, "ANN %d %d %d\n", para.num_neurons[0], para.num_neurons[1], 0); } else if (para.num_hidden_layers == 1) { @@ -528,7 +530,13 @@ void Fitness::update_dipole(FILE* fid_dipole, Dataset& dataset) void Fitness::update_polarizability(FILE* fid_polarizability, Dataset& dataset) { dataset.virial.copy_to_host(dataset.virial_cpu.data()); - output(false, 6, fid_polarizability, dataset.virial_cpu.data(), dataset.virial_ref_cpu.data(), dataset); + output( + false, + 6, + fid_polarizability, + dataset.virial_cpu.data(), + dataset.virial_ref_cpu.data(), + dataset); } void Fitness::predict(Parameters& para, float* elite) @@ -540,7 +548,8 @@ void Fitness::predict(Parameters& para, float* elite) FILE* fid_stress = my_fopen("stress_train.out", "w"); for (int batch_id = 0; batch_id < num_batches; ++batch_id) { potential->find_force(para, elite, train_set[batch_id], false, true, 1); - update_energy_force_virial(fid_energy, fid_force, fid_virial, fid_stress, train_set[batch_id][0]); + update_energy_force_virial( + fid_energy, fid_force, fid_virial, fid_stress, train_set[batch_id][0]); } fclose(fid_energy); fclose(fid_force); diff --git a/src/main_nep/fitness.cuh b/src/main_nep/fitness.cuh index 578071bcb..a2623480f 100644 --- a/src/main_nep/fitness.cuh +++ b/src/main_nep/fitness.cuh @@ -48,14 +48,13 @@ protected: std::vector> train_set; std::vector test_set; void output( - bool is_stress, - int num_components, - FILE* fid, - float* prediction, - float* reference, + bool is_stress, + int num_components, + FILE* fid, + float* prediction, + float* reference, Dataset& dataset); - void - update_energy_force_virial( + void update_energy_force_virial( FILE* fid_energy, FILE* fid_force, FILE* fid_virial, FILE* fid_stress, Dataset& dataset); void update_dipole(FILE* fid_dipole, Dataset& dataset); void update_polarizability(FILE* fid_polarizability, Dataset& dataset); diff --git a/src/main_nep/nep3.cu b/src/main_nep/nep3.cu index c62c29f29..13414355e 100644 --- a/src/main_nep/nep3.cu +++ b/src/main_nep/nep3.cu @@ -88,8 +88,12 @@ static __global__ void gpu_find_neighbor_list( if (use_typewise_cutoff) { int z1 = paramb.atomic_numbers[t1]; int z2 = paramb.atomic_numbers[t2]; - rc_radial = min((COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_radial_factor, rc_radial); - rc_angular = min((COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_angular_factor, rc_angular); + rc_radial = min( + (COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_radial_factor, + rc_radial); + rc_angular = min( + (COVALENT_RADIUS[z1] + COVALENT_RADIUS[z2]) * paramb.typewise_cutoff_angular_factor, + rc_angular); } if (distance_square < rc_radial * rc_radial) { NL_radial[count_radial * N + n1] = n2; @@ -142,8 +146,11 @@ static __global__ void find_descriptors_radial( int t2 = g_type[n2]; float rc = paramb.rc_radial; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_radial_factor, + rc); } float rcinv = 1.0f / rc; find_fc(rc, rcinv, d12, fc12); @@ -198,8 +205,11 @@ static __global__ void find_descriptors_angular( int t2 = g_type[n2]; float rc = paramb.rc_angular; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_angular_factor, + rc); } float rcinv = 1.0f / rc; find_fc(rc, rcinv, d12, fc12); @@ -276,7 +286,7 @@ NEP3::NEP3( zbl.rc_inner = para.zbl_rc_inner; zbl.rc_outer = para.zbl_rc_outer; for (int n = 0; n < para.atomic_numbers.size(); ++n) { - zbl.atomic_numbers[n] = para.atomic_numbers[n]; // starting from 1 + zbl.atomic_numbers[n] = para.atomic_numbers[n]; // starting from 1 paramb.atomic_numbers[n] = para.atomic_numbers[n] - 1; // starting from 0 } if (zbl.flexibled) { @@ -296,24 +306,26 @@ NEP3::NEP3( annmb[device_id].num_para = para.number_of_variables; annmb[device_id].num_hidden_layers = para.num_hidden_layers; if (annmb[device_id].num_hidden_layers == 1) { - annmb[device_id].num_para_one_ann_without_bias - = (annmb[device_id].dim + 2) * annmb[device_id].num_neurons[0]; + annmb[device_id].num_para_one_ann_without_bias = + (annmb[device_id].dim + 2) * annmb[device_id].num_neurons[0]; } else if (annmb[device_id].num_hidden_layers == 2) { - annmb[device_id].num_para_one_ann_without_bias - = (annmb[device_id].dim + 1) * annmb[device_id].num_neurons[0] - + (annmb[device_id].num_neurons[0] + 2) * annmb[device_id].num_neurons[1]; + annmb[device_id].num_para_one_ann_without_bias = + (annmb[device_id].dim + 1) * annmb[device_id].num_neurons[0] + + (annmb[device_id].num_neurons[0] + 2) * annmb[device_id].num_neurons[1]; } else { - annmb[device_id].num_para_one_ann_without_bias - = (annmb[device_id].dim + 1) * annmb[device_id].num_neurons[0] - + (annmb[device_id].num_neurons[0] + 1) * annmb[device_id].num_neurons[1] - + (annmb[device_id].num_neurons[1] + 2) * annmb[device_id].num_neurons[2]; + annmb[device_id].num_para_one_ann_without_bias = + (annmb[device_id].dim + 1) * annmb[device_id].num_neurons[0] + + (annmb[device_id].num_neurons[0] + 1) * annmb[device_id].num_neurons[1] + + (annmb[device_id].num_neurons[1] + 2) * annmb[device_id].num_neurons[2]; } annmb[device_id].offset_w[0] = 0; annmb[device_id].offset_b[0] = annmb[device_id].dim * annmb[device_id].num_neurons[0]; annmb[device_id].offset_w[1] = annmb[device_id].offset_b[0] + annmb[device_id].num_neurons[0]; - annmb[device_id].offset_b[1] = annmb[device_id].offset_w[1] + annmb[device_id].num_neurons[0] * annmb[device_id].num_neurons[1]; + annmb[device_id].offset_b[1] = annmb[device_id].offset_w[1] + annmb[device_id].num_neurons[0] * + annmb[device_id].num_neurons[1]; annmb[device_id].offset_w[2] = annmb[device_id].offset_b[1] + annmb[device_id].num_neurons[1]; - annmb[device_id].offset_b[2] = annmb[device_id].offset_w[2] + annmb[device_id].num_neurons[1] * annmb[device_id].num_neurons[2]; + annmb[device_id].offset_b[2] = annmb[device_id].offset_w[2] + annmb[device_id].num_neurons[1] * + annmb[device_id].num_neurons[2]; annmb[device_id].offset_w[3] = annmb[device_id].offset_b[2] + annmb[device_id].num_neurons[2]; nep_data[device_id].NN_radial.resize(N); @@ -609,8 +621,11 @@ static __global__ void find_force_radial( float fc12, fcp12; float rc = paramb.rc_radial; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_radial_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_radial_factor, + rc); } float rcinv = 1.0f / rc; find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12); @@ -709,8 +724,11 @@ static __global__ void find_force_angular( int t2 = g_type[n2]; float rc = paramb.rc_angular; if (paramb.use_typewise_cutoff) { - rc = min((COVALENT_RADIUS[paramb.atomic_numbers[t1]] - + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * paramb.typewise_cutoff_angular_factor, rc); + rc = min( + (COVALENT_RADIUS[paramb.atomic_numbers[t1]] + + COVALENT_RADIUS[paramb.atomic_numbers[t2]]) * + paramb.typewise_cutoff_angular_factor, + rc); } float rcinv = 1.0f / rc; find_fc_and_fcp(rc, rcinv, d12, fc12, fcp12); @@ -829,8 +847,9 @@ static __global__ void find_force_ZBL( float rc_outer = zbl.rc_outer; if (paramb.use_typewise_cutoff_zbl) { // zi and zj start from 1, so need to minus 1 here - rc_outer = min((COVALENT_RADIUS[zi - 1] - + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor, rc_outer); + rc_outer = min( + (COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor, + rc_outer); rc_inner = rc_outer * 0.5f; } find_f_and_fp_zbl(zizj, a_inv, rc_inner, rc_outer, d12, d12inv, f, fp); diff --git a/src/main_nep/nep3.cuh b/src/main_nep/nep3.cuh index e7df8b3cd..4f1639859 100644 --- a/src/main_nep/nep3.cuh +++ b/src/main_nep/nep3.cuh @@ -52,31 +52,32 @@ public: float rcinv_angular = 0.0f; // inverse of the angular cutoff int basis_size_radial = 0; int basis_size_angular = 0; - int n_max_radial = 0; // n_radial = 0, 1, 2, ..., n_max_radial - int n_max_angular = 0; // n_angular = 0, 1, 2, ..., n_max_angular - int L_max = 0; // l = 1, 2, ..., L_max + int n_max_radial = 0; // n_radial = 0, 1, 2, ..., n_max_radial + int n_max_angular = 0; // n_angular = 0, 1, 2, ..., n_max_angular + int L_max = 0; // l = 1, 2, ..., L_max int dim_angular; int num_L; int num_types = 0; int num_types_sq = 0; int num_c_radial = 0; - int version = 4; // 3 for NEP3 and 4 for NEP4 + int version = 4; // 3 for NEP3 and 4 for NEP4 int atomic_numbers[NUM_ELEMENTS]; }; struct ANN { - int dim = 0; // dimension of the descriptor - int num_hidden_layers = 1; // number of hidden layers (1 to 3) + int dim = 0; // dimension of the descriptor + int num_hidden_layers = 1; // number of hidden layers (1 to 3) int num_neurons[3] = {0, 0, 0}; // number of neurons in the hidden layer - int num_para = 0; // number of parameters - int num_para_one_ann_without_bias = 0; // number of parameters for one ann without the output bias + int num_para = 0; // number of parameters + int num_para_one_ann_without_bias = + 0; // number of parameters for one ann without the output bias int offset_w[4]; int offset_b[3]; const float* para[NUM_ELEMENTS]; // weight and bias parameters for the hidden layers - const float* b_out; // bias for the output layer + const float* b_out; // bias for the output layer // for the scalar part of polarizability const float* para_pol[NUM_ELEMENTS]; // weight and bias parameters for the hidden layers - const float* b_out_pol; // bias for the output layer + const float* b_out_pol; // bias for the output layer // for elements in descriptor const float* c; }; diff --git a/src/main_nep/parameters.cu b/src/main_nep/parameters.cu index 9c5a1e7b3..da906ef15 100644 --- a/src/main_nep/parameters.cu +++ b/src/main_nep/parameters.cu @@ -22,13 +22,12 @@ #include const std::string ELEMENTS[NUM_ELEMENTS] = { - "H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg", "Al", "Si", "P", - "S", "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", - "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", - "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", - "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W", "Re", - "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", - "Pa", "U", "Np", "Pu"}; + "H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg", "Al", "Si", "P", "S", + "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge", + "As", "Se", "Br", "Kr", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", + "In", "Sn", "Sb", "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", + "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg", + "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", "Pa", "U", "Np", "Pu"}; Parameters::Parameters() { @@ -171,7 +170,7 @@ void Parameters::calculate_parameters() } dim_radial = n_max_radial + 1; // 2-body descriptors q^i_n dim_angular = (n_max_angular + 1) * L_max; // 3-body descriptors q^i_nl - if (L_max_4body == 2) { // 4-body descriptors q^i_n222 + if (L_max_4body == 2) { // 4-body descriptors q^i_n222 dim_angular += n_max_angular + 1; } if (L_max_5body == 1) { // 5-body descriptors q^i_n1111 @@ -191,9 +190,14 @@ void Parameters::calculate_parameters() if (num_hidden_layers == 1) { number_of_variables_ann = (dim + 2) * num_neurons[0] * (version == 4 ? num_types : 1) + 1; } else if (num_hidden_layers == 2) { - number_of_variables_ann = ((dim + 1) * num_neurons[0] + (num_neurons[0] + 2) * num_neurons[1]) * (version == 4 ? num_types : 1) + 1; + number_of_variables_ann = ((dim + 1) * num_neurons[0] + (num_neurons[0] + 2) * num_neurons[1]) * + (version == 4 ? num_types : 1) + + 1; } else { - number_of_variables_ann = ((dim + 1) * num_neurons[0] + (num_neurons[0] + 1) * num_neurons[1] + (num_neurons[1] + 2) * num_neurons[2]) * (version == 4 ? num_types : 1) + 1; + number_of_variables_ann = ((dim + 1) * num_neurons[0] + (num_neurons[0] + 1) * num_neurons[1] + + (num_neurons[1] + 2) * num_neurons[2]) * + (version == 4 ? num_types : 1) + + 1; } number_of_variables_descriptor = @@ -319,10 +323,12 @@ void Parameters::report_inputs() } if (is_use_typewise_cutoff_zbl_set) { - printf(" (input) use %s cutoff for ZBL.\n", use_typewise_cutoff_zbl ? "typewise" : "global"); + printf( + " (input) use %s cutoff for ZBL.\n", use_typewise_cutoff_zbl ? "typewise" : "global"); printf(" factor = %g.\n", typewise_cutoff_zbl_factor); } else { - printf(" (default) use %s cutoff for ZBL.\n", use_typewise_cutoff_zbl ? "typewise" : "global"); + printf( + " (default) use %s cutoff for ZBL.\n", use_typewise_cutoff_zbl ? "typewise" : "global"); } if (is_n_max_set) { @@ -352,9 +358,17 @@ void Parameters::report_inputs() } if (is_neuron_set) { - printf(" (input) number of neurons = (%d, %d, %d).\n", num_neurons[0], num_neurons[1], num_neurons[2]); + printf( + " (input) number of neurons = (%d, %d, %d).\n", + num_neurons[0], + num_neurons[1], + num_neurons[2]); } else { - printf(" (default) number of neurons = (%d, %d, %d).\n", num_neurons[0], num_neurons[1], num_neurons[2]); + printf( + " (default) number of neurons = (%d, %d, %d).\n", + num_neurons[0], + num_neurons[1], + num_neurons[2]); } if (is_lambda_1_set) { @@ -426,7 +440,12 @@ void Parameters::report_inputs() printf(" number of angular descriptor components = %d.\n", dim_angular); printf(" total number of descriptor components = %d.\n", dim); if (num_hidden_layers == 3) { - printf(" NN architecture = %d-%d-%d-%d-1.\n", dim, num_neurons[0], num_neurons[1], num_neurons[2]); + printf( + " NN architecture = %d-%d-%d-%d-1.\n", + dim, + num_neurons[0], + num_neurons[1], + num_neurons[2]); } else if (num_hidden_layers == 2) { printf(" NN architecture = %d-%d-%d-1.\n", dim, num_neurons[0], num_neurons[1]); } else { @@ -762,7 +781,7 @@ void Parameters::parse_neuron(const char** param, int num_param) { is_neuron_set = true; - if (num_param < 2 || num_param > 4) { + if (num_param < 2 || num_param > 4) { PRINT_INPUT_ERROR("neuron should have 1 to 3 parameters.\n"); } @@ -1039,13 +1058,13 @@ void Parameters::parse_use_typewise_cutoff(const char** param, int num_param) if (num_param == 3) { double typewise_cutoff_radial_factor_temp = 0.0; if (!is_valid_real(param[1], &typewise_cutoff_radial_factor_temp)) { - PRINT_INPUT_ERROR("typewise_cutoff_radial_factor should be a number.\n"); + PRINT_INPUT_ERROR("typewise_cutoff_radial_factor should be a number.\n"); } typewise_cutoff_radial_factor = typewise_cutoff_radial_factor_temp; double typewise_cutoff_angular_factor_temp = 0.0; if (!is_valid_real(param[2], &typewise_cutoff_angular_factor_temp)) { - PRINT_INPUT_ERROR("typewise_cutoff_angular_factor should be a number.\n"); + PRINT_INPUT_ERROR("typewise_cutoff_angular_factor should be a number.\n"); } typewise_cutoff_angular_factor = typewise_cutoff_angular_factor_temp; } @@ -1071,7 +1090,7 @@ void Parameters::parse_use_typewise_cutoff_zbl(const char** param, int num_param if (num_param == 2) { double typewise_cutoff_zbl_factor_temp = 0.0; if (!is_valid_real(param[1], &typewise_cutoff_zbl_factor_temp)) { - PRINT_INPUT_ERROR("typewise_cutoff_zbl_factor should be a number.\n"); + PRINT_INPUT_ERROR("typewise_cutoff_zbl_factor should be a number.\n"); } typewise_cutoff_zbl_factor = typewise_cutoff_zbl_factor_temp; } diff --git a/src/main_nep/snes.cu b/src/main_nep/snes.cu index 5c70fd4e4..9127b5125 100644 --- a/src/main_nep/snes.cu +++ b/src/main_nep/snes.cu @@ -44,7 +44,7 @@ SNES::SNES(Parameters& para, Fitness* fitness_function) maximum_generation = para.maximum_generation; number_of_variables = para.number_of_variables; population_size = para.population_size; - const int N = population_size * number_of_variables; + const int N = population_size * number_of_variables; int num = number_of_variables; if (para.version == 4) { num /= para.num_types; @@ -107,8 +107,8 @@ void SNES::initialize_mu_and_sigma(Parameters& para) fclose(fid_restart); } #ifdef USE_FIXED_SCALER - mu[para.number_of_variables_ann - 1] = 0.0f; - sigma[para.number_of_variables_ann - 1] = 0.0f; + mu[para.number_of_variables_ann - 1] = 0.0f; + sigma[para.number_of_variables_ann - 1] = 0.0f; #endif cudaSetDevice(0); // normally use GPU-0 gpu_mu.copy_from_host(mu.data()); @@ -142,15 +142,23 @@ void SNES::find_type_of_variable(Parameters& para) } offset += (para.dim + 2) * para.num_neurons[0]; } else if (para.num_hidden_layers == 2) { - for (int n = 0; n < (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 2) * para.num_neurons[1]; ++n) { + for (int n = 0; n < (para.dim + 1) * para.num_neurons[0] + + (para.num_neurons[0] + 2) * para.num_neurons[1]; + ++n) { type_of_variable[n + offset] = t; } - offset += (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 2) * para.num_neurons[1]; + offset += + (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 2) * para.num_neurons[1]; } else { - for (int n = 0; n < (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 1) * para.num_neurons[1] + (para.num_neurons[1] + 2) * para.num_neurons[2]; ++n) { + for (int n = 0; n < (para.dim + 1) * para.num_neurons[0] + + (para.num_neurons[0] + 1) * para.num_neurons[1] + + (para.num_neurons[1] + 2) * para.num_neurons[2]; + ++n) { type_of_variable[n + offset] = t; } - offset += (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 1) * para.num_neurons[1] + (para.num_neurons[1] + 2) * para.num_neurons[2]; + offset += (para.dim + 1) * para.num_neurons[0] + + (para.num_neurons[0] + 1) * para.num_neurons[1] + + (para.num_neurons[1] + 2) * para.num_neurons[2]; } } ++offset; // the bias @@ -159,10 +167,13 @@ void SNES::find_type_of_variable(Parameters& para) if (para.num_hidden_layers == 1) { offset += (para.dim + 2) * para.num_neurons[0] + 1; } else if (para.num_hidden_layers == 2) { - offset += (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 2) * para.num_neurons[1] + 1; + offset += + (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 2) * para.num_neurons[1] + 1; } else { - offset += (para.dim + 1) * para.num_neurons[0] + (para.num_neurons[0] + 1) * para.num_neurons[1] + (para.num_neurons[1] + 2) * para.num_neurons[2] + 1; - } + offset += (para.dim + 1) * para.num_neurons[0] + + (para.num_neurons[0] + 1) * para.num_neurons[1] + + (para.num_neurons[1] + 2) * para.num_neurons[2] + 1; + } } // descriptor part @@ -269,8 +280,9 @@ void SNES::compute(Parameters& para, Fitness* fitness_function) std::vector tokens; tokens = get_tokens(input); int num_lines_to_be_skipped = 5; - if (tokens[0] == "nep3_zbl" || tokens[0] == "nep4_zbl" - || tokens[0] == "nep3_zbl_temperature" || tokens[0] == "nep4_zbl_temperature") { + if ( + tokens[0] == "nep3_zbl" || tokens[0] == "nep4_zbl" || tokens[0] == "nep3_zbl_temperature" || + tokens[0] == "nep4_zbl_temperature") { num_lines_to_be_skipped = 6; } @@ -315,12 +327,12 @@ void SNES::create_population(Parameters& para) cudaSetDevice(0); // normally use GPU-0 const int N = population_size * number_of_variables; gpu_create_population<<<(N - 1) / 128 + 1, 128>>>( - N, - number_of_variables, - gpu_mu.data(), - gpu_sigma.data(), - curand_states.data(), - gpu_s.data(), + N, + number_of_variables, + gpu_mu.data(), + gpu_sigma.data(), + curand_states.data(), + gpu_s.data(), gpu_population.data()); CUDA_CHECK_KERNEL gpu_population.copy_to_host(population.data()); @@ -381,14 +393,14 @@ void SNES::regularize_NEP4(Parameters& para) if (t == para.num_types) { num_variables = para.number_of_variables; } - + gpu_find_L1_L2_NEP4<<>>( - number_of_variables, + number_of_variables, para.num_types, - t, - gpu_type_of_variable.data(), - gpu_population.data(), - gpu_cost_L1reg.data(), + t, + gpu_type_of_variable.data(), + gpu_population.data(), + gpu_cost_L1reg.data(), gpu_cost_L2reg.data()); CUDA_CHECK_KERNEL diff --git a/src/main_nep/snes.cuh b/src/main_nep/snes.cuh index d98c46fde..4f9352a98 100644 --- a/src/main_nep/snes.cuh +++ b/src/main_nep/snes.cuh @@ -32,7 +32,7 @@ protected: int number_of_variables = 10; int population_size = 20; float eta_sigma = 0.1f; - + std::vector index; std::vector fitness; std::vector population; diff --git a/src/main_nep/structure.cu b/src/main_nep/structure.cu index c53298849..47379cad6 100644 --- a/src/main_nep/structure.cu +++ b/src/main_nep/structure.cu @@ -22,8 +22,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -107,13 +107,19 @@ static void read_force( PRINT_INPUT_ERROR("Number of items for an atom line mismatches properties."); } std::string atom_symbol(tokens[0 + species_offset]); - structure.x[na] = get_float_from_token(tokens[0 + pos_offset], xyz_filename.c_str(), line_number); - structure.y[na] = get_float_from_token(tokens[1 + pos_offset], xyz_filename.c_str(), line_number); - structure.z[na] = get_float_from_token(tokens[2 + pos_offset], xyz_filename.c_str(), line_number); + structure.x[na] = + get_float_from_token(tokens[0 + pos_offset], xyz_filename.c_str(), line_number); + structure.y[na] = + get_float_from_token(tokens[1 + pos_offset], xyz_filename.c_str(), line_number); + structure.z[na] = + get_float_from_token(tokens[2 + pos_offset], xyz_filename.c_str(), line_number); if (num_columns > 4) { - structure.fx[na] = get_float_from_token(tokens[0 + force_offset], xyz_filename.c_str(), line_number); - structure.fy[na] = get_float_from_token(tokens[1 + force_offset], xyz_filename.c_str(), line_number); - structure.fz[na] = get_float_from_token(tokens[2 + force_offset], xyz_filename.c_str(), line_number); + structure.fx[na] = + get_float_from_token(tokens[0 + force_offset], xyz_filename.c_str(), line_number); + structure.fy[na] = + get_float_from_token(tokens[1 + force_offset], xyz_filename.c_str(), line_number); + structure.fz[na] = + get_float_from_token(tokens[2 + force_offset], xyz_filename.c_str(), line_number); } bool is_allowed_element = false; @@ -130,8 +136,8 @@ static void read_force( } static void read_one_structure( - const Parameters& para, - std::ifstream& input, + const Parameters& para, + std::ifstream& input, Structure& structure, std::string& xyz_filename, int& line_number) @@ -168,7 +174,9 @@ static void read_one_structure( if (token.substr(0, temperature_string.length()) == temperature_string) { structure.has_temperature = true; structure.temperature = get_float_from_token( - token.substr(temperature_string.length(), token.length()), xyz_filename.c_str(), line_number); + token.substr(temperature_string.length(), token.length()), + xyz_filename.c_str(), + line_number); } } if (para.train_mode == 3 && !structure.has_temperature) { @@ -201,7 +209,8 @@ static void read_one_structure( tokens[n + m].substr( (m == 0) ? (lattice_string.length() + 1) : 0, (m == 8) ? (tokens[n + m].length() - 1) : tokens[n + m].length()), - xyz_filename.c_str(), line_number); + xyz_filename.c_str(), + line_number); } change_box(para, structure); } @@ -221,7 +230,8 @@ static void read_one_structure( tokens[n + m].substr( (m == 0) ? (virial_string.length() + 1) : 0, (m == 8) ? (tokens[n + m].length() - 1) : tokens[n + m].length()), - xyz_filename.c_str(), line_number); + xyz_filename.c_str(), + line_number); structure.virial[reduced_index[m]] /= structure.num_atom; } } @@ -240,7 +250,8 @@ static void read_one_structure( tokens[n + m].substr( (m == 0) ? (stress_string.length() + 1) : 0, (m == 8) ? (tokens[n + m].length() - 1) : tokens[n + m].length()), - xyz_filename.c_str(), line_number); + xyz_filename.c_str(), + line_number); virials_from_stress[reduced_index[m]] *= -volume / structure.num_atom; } } @@ -287,7 +298,8 @@ static void read_one_structure( tokens[n + m].substr( (m == 0) ? (dipole_string.length() + 1) : 0, (m == 2) ? (tokens[n + m].length() - 1) : tokens[n + m].length()), - xyz_filename.c_str(), line_number); + xyz_filename.c_str(), + line_number); structure.virial[m] /= structure.num_atom; } } @@ -316,7 +328,8 @@ static void read_one_structure( tokens[n + m].substr( (m == 0) ? (pol_string.length() + 1) : 0, (m == 8) ? (tokens[n + m].length() - 1) : tokens[n + m].length()), - xyz_filename.c_str(), line_number); + xyz_filename.c_str(), + line_number); structure.virial[reduced_index[m]] /= structure.num_atom; } } @@ -371,27 +384,38 @@ static void read_one_structure( } for (int k = 0; k < sub_tokens.size() / 3; ++k) { if (k < species_position) { - species_offset += get_int_from_token(sub_tokens[k * 3 + 2], xyz_filename.c_str(), line_number); + species_offset += + get_int_from_token(sub_tokens[k * 3 + 2], xyz_filename.c_str(), line_number); } if (k < pos_position) { - pos_offset += get_int_from_token(sub_tokens[k * 3 + 2], xyz_filename.c_str(), line_number); + pos_offset += + get_int_from_token(sub_tokens[k * 3 + 2], xyz_filename.c_str(), line_number); } if (k < force_position) { - force_offset += get_int_from_token(sub_tokens[k * 3 + 2], xyz_filename.c_str(), line_number); + force_offset += + get_int_from_token(sub_tokens[k * 3 + 2], xyz_filename.c_str(), line_number); } num_columns += get_int_from_token(sub_tokens[k * 3 + 2], xyz_filename.c_str(), line_number); } } } - read_force(num_columns, species_offset, pos_offset, force_offset, input, para, structure, xyz_filename, line_number); + read_force( + num_columns, + species_offset, + pos_offset, + force_offset, + input, + para, + structure, + xyz_filename, + line_number); } -static void -read_exyz( - const Parameters& para, - std::ifstream& input, - std::vector& structures, +static void read_exyz( + const Parameters& para, + std::ifstream& input, + std::vector& structures, std::string& xyz_filename) { int line_number = 0; @@ -433,7 +457,7 @@ read_exyz( static void find_permuted_indices( const int num_batches, - const std::vector& structures, + const std::vector& structures, std::vector& permuted_indices) { std::vector energy(structures.size()); @@ -442,12 +466,10 @@ static void find_permuted_indices( } std::vector energy_index(structures.size()); - std::iota(energy_index.begin(), energy_index.end(), 0); - std::stable_sort( - energy_index.begin(), - energy_index.end(), - [&energy](size_t i1, size_t i2) {return energy[i1] < energy[i2];} - ); + std::iota(energy_index.begin(), energy_index.end(), 0); + std::stable_sort(energy_index.begin(), energy_index.end(), [&energy](size_t i1, size_t i2) { + return energy[i1] < energy[i2]; + }); int count = 0; for (int b = 0; b < num_batches; ++b) { @@ -459,7 +481,6 @@ static void find_permuted_indices( } count += batch_size; } - } static void reorder(const int num_batches, std::vector& structures) diff --git a/src/mc/nep_energy.cu b/src/mc/nep_energy.cu index 900977dd8..73a8efe9c 100644 --- a/src/mc/nep_energy.cu +++ b/src/mc/nep_energy.cu @@ -30,13 +30,12 @@ heat transport, Phys. Rev. B. 104, 104309 (2021). #include const std::string ELEMENTS[NUM_ELEMENTS] = { - "H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg", "Al", "Si", "P", - "S", "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", - "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", - "Pd", "Ag", "Cd", "In", "Sn", "Sb", "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", - "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W", "Re", - "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", - "Pa", "U", "Np", "Pu"}; + "H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne", "Na", "Mg", "Al", "Si", "P", "S", + "Cl", "Ar", "K", "Ca", "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Ga", "Ge", + "As", "Se", "Br", "Kr", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", + "In", "Sn", "Sb", "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", + "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg", + "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th", "Pa", "U", "Np", "Pu"}; void NEP_Energy::initialize(const char* file_potential) { @@ -66,7 +65,9 @@ void NEP_Energy::initialize(const char* file_potential) paramb.version = 4; zbl.enabled = true; } else { - std::cout << tokens[0] << " is an unsupported NEP model. We only support NEP3 and NEP4 models now." << std::endl; + std::cout << tokens[0] + << " is an unsupported NEP model. We only support NEP3 and NEP4 models now." + << std::endl; exit(1); } paramb.num_types = get_int_from_token(tokens[1], __FILE__, __LINE__); @@ -118,7 +119,8 @@ void NEP_Energy::initialize(const char* file_potential) // cutoff 4.2 3.7 80 47 tokens = get_tokens(input); if (tokens.size() != 5 && tokens.size() != 8) { - std::cout << "This line should be cutoff rc_radial rc_angular MN_radial MN_angular [radial_factor] [angular_factor] [zbl_factor].\n"; + std::cout << "This line should be cutoff rc_radial rc_angular MN_radial MN_angular " + "[radial_factor] [angular_factor] [zbl_factor].\n"; exit(1); } paramb.rc_radial = get_float_from_token(tokens[1], __FILE__, __LINE__); @@ -430,7 +432,9 @@ static __global__ void find_energy_zbl( float rc_outer = zbl.rc_outer; if (paramb.use_typewise_cutoff_zbl) { // zi and zj start from 1, so need to minus 1 here - rc_outer = min((COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor, rc_outer); + rc_outer = min( + (COVALENT_RADIUS[zi - 1] + COVALENT_RADIUS[zj - 1]) * paramb.typewise_cutoff_zbl_factor, + rc_outer); rc_inner = rc_outer * 0.5f; } find_f_and_fp_zbl(zizj, a_inv, rc_inner, rc_outer, d12, d12inv, f, fp); diff --git a/src/mc/nep_energy.cuh b/src/mc/nep_energy.cuh index 1b3dcba2b..dee90b09d 100644 --- a/src/mc/nep_energy.cuh +++ b/src/mc/nep_energy.cuh @@ -49,13 +49,13 @@ public: }; struct ANN { - int dim = 0; // dimension of the descriptor - int num_neurons1 = 0; // number of neurons in the 1st hidden layer - int num_para = 0; // number of parameters + int dim = 0; // dimension of the descriptor + int num_neurons1 = 0; // number of neurons in the 1st hidden layer + int num_para = 0; // number of parameters const float* w0[NUM_ELEMENTS]; // weight from the input layer to the hidden layer const float* b0[NUM_ELEMENTS]; // bias for the hidden layer const float* w1[NUM_ELEMENTS]; // weight from the hidden layer to the output layer - const float* b1; // bias for the output layer + const float* b1; // bias for the output layer const float* c; }; diff --git a/src/measure/dump_exyz.cu b/src/measure/dump_exyz.cu index e13ed0e58..1620866de 100644 --- a/src/measure/dump_exyz.cu +++ b/src/measure/dump_exyz.cu @@ -105,14 +105,14 @@ void Dump_EXYZ::parse(const char** param, int num_param) } if (num_param >= 6) { - if (!is_valid_int(param[5], &separated_)) { + if (!is_valid_int(param[5], &separated_)) { PRINT_INPUT_ERROR("separated should be an integer."); - } - if (separated_ == 0) { + } + if (separated_ == 0) { printf(" dump_exyz into dump.xyz.\n"); - } else { + } else { printf(" dump_exyz into separated dump.*.xyz.\n"); - } + } } } @@ -122,7 +122,7 @@ void Dump_EXYZ::preprocess(const int number_of_atoms) if (separated_ == 0) { fid_ = my_fopen("dump.xyz", "a"); } - + gpu_total_virial_.resize(6); cpu_total_virial_.resize(6); if (has_force_) { @@ -250,9 +250,9 @@ void Dump_EXYZ::process( if (has_potential_) { atom.potential_per_atom.copy_to_host(cpu_potential_per_atom_.data()); } - + if (separated_) { - std::string filename = "dump."+std::to_string(step+1)+".xyz"; + std::string filename = "dump." + std::to_string(step + 1) + ".xyz"; fid_ = my_fopen(filename.data(), "w"); } @@ -290,7 +290,6 @@ void Dump_EXYZ::process( } else { fclose(fid_); } - } void Dump_EXYZ::postprocess() diff --git a/src/measure/dump_netcdf.cuh b/src/measure/dump_netcdf.cuh index 355197b9a..f631eacca 100644 --- a/src/measure/dump_netcdf.cuh +++ b/src/measure/dump_netcdf.cuh @@ -39,12 +39,12 @@ public: private: bool dump_ = false; - int interval = 1; // output interval - int has_velocity_ = 0; // 0 wthout velocities, 1 with velocities + int interval = 1; // output interval + int has_velocity_ = 0; // 0 wthout velocities, 1 with velocities char file_position[200]; - int precision = 2; // 1 = single precision, 2 = double + int precision = 2; // 1 = single precision, 2 = double - int ncid; // NetCDF ID + int ncid; // NetCDF ID static bool append; // dimensions diff --git a/src/measure/rdf.cu b/src/measure/rdf.cu index f81ffcbae..d0392e707 100644 --- a/src/measure/rdf.cu +++ b/src/measure/rdf.cu @@ -635,10 +635,14 @@ void RDF::parse( if (r_cut_ <= 0) { PRINT_INPUT_ERROR("radial cutoff should be positive.\n"); } - double thickness_half[3] = {box.get_volume()/box.get_area(0)/ 2.5,box.get_volume()/box.get_area(1)/ 2.5,box.get_volume()/box.get_area(2)/ 2.5}; + double thickness_half[3] = { + box.get_volume() / box.get_area(0) / 2.5, + box.get_volume() / box.get_area(1) / 2.5, + box.get_volume() / box.get_area(2) / 2.5}; if (r_cut_ > thickness_half[0] || r_cut_ > thickness_half[1] || r_cut_ > thickness_half[2]) { - std::string message = "The box has a thickness < 2.5 RDF radial cutoffs in a periodic direction.\n" - " Please increase the periodic direction(s).\n"; + std::string message = + "The box has a thickness < 2.5 RDF radial cutoffs in a periodic direction.\n" + " Please increase the periodic direction(s).\n"; PRINT_INPUT_ERROR(message.c_str()); } printf(" radial cutoff %g.\n", r_cut_); diff --git a/src/measure/shc.cu b/src/measure/shc.cu index 8c64785ff..52d0d6e87 100644 --- a/src/measure/shc.cu +++ b/src/measure/shc.cu @@ -187,9 +187,11 @@ void SHC::process( } else { if (group_id == -1) { for (int n = 1; n < group_num; ++n) { - int offset_s = Nc * group[group_method].cpu_size_sum[n] + + int offset_s = Nc * group[group_method].cpu_size_sum[n] + correlation_step * group[group_method].cpu_size[n]; - gpu_copy_data<<<(group[group_method].cpu_size[n] - 1) / BLOCK_SIZE_SHC + 1, BLOCK_SIZE_SHC>>>( + gpu_copy_data<<< + (group[group_method].cpu_size[n] - 1) / BLOCK_SIZE_SHC + 1, + BLOCK_SIZE_SHC>>>( group[group_method].cpu_size[n], group[group_method].cpu_size_sum[n], group[group_method].contents.data(), @@ -487,9 +489,11 @@ void SHC::parse(const char** param, int num_param, const std::vector& gro PRINT_INPUT_ERROR("Unrecognized argument in compute_shc.\n"); } } - + if (group_id == -1) { - printf(" compute SHC for all group IDs except for group ID 0 in grouping method %d.\n", group_method); + printf( + " compute SHC for all group IDs except for group ID 0 in grouping method %d.\n", + group_method); } if (group_id < -1) { PRINT_INPUT_ERROR("group ID should >= -1 for computing SHC."); diff --git a/src/model/read_xyz.cu b/src/model/read_xyz.cu index f00dcc866..98f59d390 100644 --- a/src/model/read_xyz.cu +++ b/src/model/read_xyz.cu @@ -298,7 +298,8 @@ static void read_xyz_line_2( // properties const int num_properties = 6; std::string property_name[num_properties] = {"species", "pos", "mass", "charge", "vel", "group"}; - int property_position[num_properties] = {-1, -1, -1, -1, -1, -1}; // species,pos,mass,charge,vel,group + int property_position[num_properties] = { + -1, -1, -1, -1, -1, -1}; // species,pos,mass,charge,vel,group for (int n = 0; n < tokens.size(); ++n) { const std::string properties_string = "properties="; if (tokens[n].substr(0, properties_string.length()) == properties_string) { @@ -554,7 +555,8 @@ void initialize_position( int num_columns = 0; bool has_mass = true; bool has_charge = true; - read_xyz_line_2(input, box, has_velocity_in_xyz, has_mass, has_charge, num_columns, property_offset, group); + read_xyz_line_2( + input, box, has_velocity_in_xyz, has_mass, has_charge, num_columns, property_offset, group); read_xyz_in_line_3( input, diff --git a/src/utilities/nep_utilities.cuh b/src/utilities/nep_utilities.cuh index 827f4c7ec..5bf4d25c6 100644 --- a/src/utilities/nep_utilities.cuh +++ b/src/utilities/nep_utilities.cuh @@ -31,17 +31,18 @@ __constant__ float C4B[5] = { -0.809943929279723f}; __constant__ float C5B[3] = {0.026596810706114f, 0.053193621412227f, 0.026596810706114f}; -__constant__ float COVALENT_RADIUS[94] = -{0.426667f,0.613333f,1.6f,1.25333f,1.02667f,1.0f,0.946667f,0.84f,0.853333f,0.893333f, - 1.86667f,1.66667f,1.50667f,1.38667f,1.46667f,1.36f,1.32f,1.28f,2.34667f,2.05333f, - 1.77333f,1.62667f,1.61333f,1.46667f,1.42667f,1.38667f,1.33333f,1.32f,1.34667f,1.45333f, - 1.49333f,1.45333f,1.53333f,1.46667f,1.52f,1.56f,2.52f,2.22667f,1.96f,1.85333f, - 1.76f,1.65333f,1.53333f,1.50667f,1.50667f,1.44f,1.53333f,1.64f,1.70667f,1.68f, - 1.68f,1.64f,1.76f,1.74667f,2.78667f,2.34667f,2.16f,1.96f,2.10667f,2.09333f, - 2.08f,2.06667f,2.01333f,2.02667f,2.01333f,2.0f,1.98667f,1.98667f,1.97333f,2.04f, - 1.94667f,1.82667f,1.74667f,1.64f,1.57333f,1.54667f,1.48f,1.49333f,1.50667f,1.76f, - 1.73333f,1.73333f,1.81333f,1.74667f,1.84f,1.89333f,2.68f,2.41333f,2.22667f,2.10667f, - 2.02667f,2.04f,2.05333f,2.06667f}; +__constant__ float COVALENT_RADIUS[94] = { + 0.426667f, 0.613333f, 1.6f, 1.25333f, 1.02667f, 1.0f, 0.946667f, 0.84f, 0.853333f, + 0.893333f, 1.86667f, 1.66667f, 1.50667f, 1.38667f, 1.46667f, 1.36f, 1.32f, 1.28f, + 2.34667f, 2.05333f, 1.77333f, 1.62667f, 1.61333f, 1.46667f, 1.42667f, 1.38667f, 1.33333f, + 1.32f, 1.34667f, 1.45333f, 1.49333f, 1.45333f, 1.53333f, 1.46667f, 1.52f, 1.56f, + 2.52f, 2.22667f, 1.96f, 1.85333f, 1.76f, 1.65333f, 1.53333f, 1.50667f, 1.50667f, + 1.44f, 1.53333f, 1.64f, 1.70667f, 1.68f, 1.68f, 1.64f, 1.76f, 1.74667f, + 2.78667f, 2.34667f, 2.16f, 1.96f, 2.10667f, 2.09333f, 2.08f, 2.06667f, 2.01333f, + 2.02667f, 2.01333f, 2.0f, 1.98667f, 1.98667f, 1.97333f, 2.04f, 1.94667f, 1.82667f, + 1.74667f, 1.64f, 1.57333f, 1.54667f, 1.48f, 1.49333f, 1.50667f, 1.76f, 1.73333f, + 1.73333f, 1.81333f, 1.74667f, 1.84f, 1.89333f, 2.68f, 2.41333f, 2.22667f, 2.10667f, + 2.02667f, 2.04f, 2.05333f, 2.06667f}; const int SIZE_BOX_AND_INVERSE_BOX = 18; // (3 * 3) * 2 const int MAX_NUM_N = 20; // n_max+1 = 19+1 @@ -92,7 +93,7 @@ static __device__ void apply_ann_multi_layers( float* energy_derivative) { constexpr int MAX_NEURONS_PER_LAYER = 200; - float x[3 * MAX_NEURONS_PER_LAYER]; // Maximum number of neurons per layer + float x[3 * MAX_NEURONS_PER_LAYER]; // Maximum number of neurons per layer float delta[3 * MAX_NEURONS_PER_LAYER]; // error of each neuron // input layer @@ -119,8 +120,8 @@ static __device__ void apply_ann_multi_layers( energy = 0.0f; for (int n = 0; n < N_neu[layers - 1]; ++n) { float out = x[(layers - 1) * MAX_NEURONS_PER_LAYER + n]; - energy += w_out[n] * out; // w_out_j * x_j^3 - delta[(layers - 1) * MAX_NEURONS_PER_LAYER + n] = w_out[n] * (1.0f - out * out); // delta_j^3 + energy += w_out[n] * out; // w_out_j * x_j^3 + delta[(layers - 1) * MAX_NEURONS_PER_LAYER + n] = w_out[n] * (1.0f - out * out); // delta_j^3 } energy -= b_out[0]; @@ -144,7 +145,6 @@ static __device__ void apply_ann_multi_layers( energy_derivative[d] += w0[n * N_des + d] * delta[n]; } } - } static __device__ __forceinline__ void find_fc(float rc, float rcinv, float d12, float& fc)