From c1d30c846bc9e46f5756ba94145f1a30cfb7079a Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Mon, 30 Sep 2024 12:03:04 +0900 Subject: [PATCH 01/21] build: add spectra 1.0 to dependencies --- cmake/NuriKitUtils.cmake | 20 ++++++++++++++++++++ src/CMakeLists.txt | 3 ++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/cmake/NuriKitUtils.cmake b/cmake/NuriKitUtils.cmake index 892a9869..0cc0203b 100644 --- a/cmake/NuriKitUtils.cmake +++ b/cmake/NuriKitUtils.cmake @@ -125,6 +125,26 @@ function(find_or_fetch_eigen) endif() endfunction() +function(find_or_fetch_spectra) + set(BUILD_TESTING OFF) + + find_package(Spectra 1.0 QUIET) + + if(Spectra_FOUND) + message(STATUS "Found Spectra ${Spectra_VERSION}") + else() + include(FetchContent) + message(NOTICE "Could not find compatible Spectra. Fetching from github.") + + FetchContent_Declare( + spectra + GIT_REPOSITORY https://github.com/yixuan/spectra.git + GIT_TAG v1.0.1) + nuri_make_available_deponly(spectra) + add_library(Spectra::Spectra ALIAS Spectra) + endif() +endfunction() + function(find_or_fetch_pybind11) set(BUILD_TESTING OFF) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cea3c9a5..dd6f6b12 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,6 +6,7 @@ include(NuriKitUtils) find_or_fetch_eigen() +find_or_fetch_spectra() include(GNUInstallDirs) @@ -27,7 +28,7 @@ target_link_libraries(nuri_lib PUBLIC absl::strings absl::flat_hash_map absl::absl_check absl::absl_log ) -target_system_include_directories(nuri_lib Eigen3::Eigen) +target_system_include_directories(nuri_lib Eigen3::Eigen Spectra::Spectra) handle_boost_dependency(nuri_lib) if(NURI_LIBRARY_FLAGS) From 50558d510623d92189ba2382ba194f99087dec20 Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Mon, 30 Sep 2024 15:26:21 +0900 Subject: [PATCH 02/21] feat(core/geometry): implement distance to coordinates mapping routine --- include/nuri/core/geometry.h | 16 +++++++++++ src/core/geometry.cpp | 54 ++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/include/nuri/core/geometry.h b/include/nuri/core/geometry.h index e2e28c1d..691a2e51 100644 --- a/include/nuri/core/geometry.h +++ b/include/nuri/core/geometry.h @@ -519,6 +519,22 @@ extern std::pair qcp(const Eigen::Ref &query, const Eigen::Ref &templ, AlignMode mode = AlignMode::kBoth, double evalprec = 1e-11, double evecprec = 1e-6, int maxiter = 50); + +/** + * @brief A routine for converting squared pairwise distances to cartesian + * coordinates. + * @param pts Destination to which save the generated coordinates. + * @param dsqs The squared distances between points. + * @return Whether the embedding was successful. + * + * @note The squared distance matrix must be a N x N symmetric pairwise + * squared-distance matrix, where N is the number of points. + * + * This implementation is based on the following reference: TF Havel, ID Kuntz, + * and GM Crippen. *Bull. Math. Biol.* **1983**, *45* (5), 665-720. + * DOI:[10.1007/BF02460044](https://doi.org/10.1007/BF02460044) + */ +extern bool embed_distances(Eigen::Ref pts, MatrixXd dsqs); } // namespace nuri #endif /* NURI_CORE_GEOMETRY_H_ */ diff --git a/src/core/geometry.cpp b/src/core/geometry.cpp index e0e853d8..864b0db9 100644 --- a/src/core/geometry.cpp +++ b/src/core/geometry.cpp @@ -9,11 +9,16 @@ #include #include #include +#include #include #include #include #include +#include +#include +#include +#include #include #include @@ -758,4 +763,53 @@ std::pair qcp(const Eigen::Ref &query, return ret; } // NOLINTEND(readability-identifier-naming,*-avoid-goto) + +bool embed_distances(Eigen::Ref pts, MatrixXd dsqs) { + using Spectra::CompInfo; + using Spectra::SortRule; + + if (dsqs.cols() != dsqs.rows() || dsqs.cols() != pts.cols()) { + ABSL_LOG(WARNING) << "size mismatch; cannot embed distances"; + return false; + } + + const int n = static_cast(dsqs.cols()); + + double norm_dist = 0; + for (int i = 1; i < dsqs.cols(); ++i) + norm_dist += dsqs.col(i).head(i).sum(); + norm_dist /= n * n; + + VectorXd d0sq = dsqs.colwise().mean().array() - norm_dist; + + dsqs *= -1; + dsqs.colwise() += d0sq; + dsqs.rowwise() += d0sq.transpose(); + dsqs /= 2; + + try { + Spectra::DenseSymMatProd op(dsqs); + // This constructor might throw + Spectra::SymEigsSolver eigs(op, 3, 6); + eigs.init(); + auto nconv = eigs.compute(); + if (eigs.info() != CompInfo::Successful || nconv < 3) { + ABSL_LOG(WARNING) << "solver failed"; + return false; + } + + Array3d evals_sqrt = eigs.eigenvalues().head<3>(); + if ((evals_sqrt < 0).any()) + return false; + evals_sqrt = evals_sqrt.sqrt(); + + pts = (eigs.eigenvectors(3).transpose().array().colwise() * evals_sqrt) + .matrix(); + } catch (const std::exception &e) { + ABSL_LOG(WARNING) << "solver failed: " << e.what(); + return false; + } + + return true; +} } // namespace nuri From d4545ca17f45689c5744b3b73b2d214fcdb6cc4e Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Mon, 30 Sep 2024 15:29:00 +0900 Subject: [PATCH 03/21] test(core/geometry): test distances-to-coordinates conversion --- test/core/geometry_test.cpp | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/test/core/geometry_test.cpp b/test/core/geometry_test.cpp index d765d29d..b7f65367 100644 --- a/test/core/geometry_test.cpp +++ b/test/core/geometry_test.cpp @@ -421,5 +421,36 @@ TEST_F(AlignSingularTest, Qcp) { run_test( [](const auto &q, const auto &t, auto mode) { return qcp(q, t, mode); }); } + +TEST(EmbedTest, FromDistance) { + Matrix3Xd orig(3, 19); + orig.transpose() << -18.3397, 72.5541, 64.7727, // + -17.5457, 72.7646, 65.8591, // + -17.5263, 71.9973, 66.9036, // + -18.3203, 70.8982, 66.9881, // + -19.2153, 70.5869, 65.8606, // + -19.1851, 71.4993, 64.7091, // + -19.8932, 71.3140, 63.7366, // + -19.8586, 69.4953, 66.1942, // + -19.4843, 69.0910, 67.3570, // + -18.5535, 69.9074, 67.8928, // + -17.8916, 69.7493, 69.2373, // + -16.7519, 70.7754, 69.3878, // + -15.5030, 70.1100, 69.5871, // + -17.1385, 71.5903, 70.6459, // + -15.9909, 71.8532, 71.4558, // + -18.1166, 70.6286, 71.3678, // + -18.8422, 70.0100, 70.2837, // + -19.0679, 71.4098, 72.2764, // + -20.1784, 71.8813, 71.5104; + + MatrixXd dsqs = to_square_form(pdistsq(orig), orig.cols()); + Matrix3Xd pts(orig.rows(), orig.cols()); + ASSERT_TRUE(embed_distances(pts, dsqs)); + + auto [xform, msd] = kabsch(pts, orig, AlignMode::kBoth, true); + ASSERT_GE(msd, 0); + EXPECT_NEAR(msd, 0, 1e-6); +} } // namespace } // namespace nuri From ca7e3221089de258906ea6bd309a75a266844adf Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Thu, 3 Oct 2024 15:26:58 +0900 Subject: [PATCH 04/21] feat(core/geometry): support embedding distances to 4d space --- include/nuri/core/geometry.h | 20 ++++++++- src/core/geometry.cpp | 84 ++++++++++++++++++++---------------- 2 files changed, 66 insertions(+), 38 deletions(-) diff --git a/include/nuri/core/geometry.h b/include/nuri/core/geometry.h index 691a2e51..2caff636 100644 --- a/include/nuri/core/geometry.h +++ b/include/nuri/core/geometry.h @@ -523,7 +523,7 @@ qcp(const Eigen::Ref &query, /** * @brief A routine for converting squared pairwise distances to cartesian * coordinates. - * @param pts Destination to which save the generated coordinates. + * @param pts Destination to which save the generated coordinates (3d). * @param dsqs The squared distances between points. * @return Whether the embedding was successful. * @@ -534,7 +534,23 @@ qcp(const Eigen::Ref &query, * and GM Crippen. *Bull. Math. Biol.* **1983**, *45* (5), 665-720. * DOI:[10.1007/BF02460044](https://doi.org/10.1007/BF02460044) */ -extern bool embed_distances(Eigen::Ref pts, MatrixXd dsqs); +extern bool embed_distances_3d(Eigen::Ref pts, MatrixXd dsqs); + +/** + * @brief A routine for converting squared pairwise distances to cartesian + * coordinates. + * @param pts Destination to which save the generated coordinates (4d). + * @param dsqs The squared distances between points. + * @return Whether the embedding was successful. + * + * @note The squared distance matrix must be a N x N symmetric pairwise + * squared-distance matrix, where N is the number of points. + * + * This implementation is based on the following reference: TF Havel, ID Kuntz, + * and GM Crippen. *Bull. Math. Biol.* **1983**, *45* (5), 665-720. + * DOI:[10.1007/BF02460044](https://doi.org/10.1007/BF02460044) + */ +extern bool embed_distances_4d(Eigen::Ref pts, MatrixXd dsqs); } // namespace nuri #endif /* NURI_CORE_GEOMETRY_H_ */ diff --git a/src/core/geometry.cpp b/src/core/geometry.cpp index 864b0db9..defa7693 100644 --- a/src/core/geometry.cpp +++ b/src/core/geometry.cpp @@ -764,52 +764,64 @@ std::pair qcp(const Eigen::Ref &query, } // NOLINTEND(readability-identifier-naming,*-avoid-goto) -bool embed_distances(Eigen::Ref pts, MatrixXd dsqs) { - using Spectra::CompInfo; - using Spectra::SortRule; +namespace { + template + bool embed_distances_impl(PtsLike pts, MatrixXd &dsqs) { + using Spectra::CompInfo; + using Spectra::SortRule; + constexpr Eigen::Index ndim = PtsLike::RowsAtCompileTime; + + if (dsqs.cols() != dsqs.rows() || dsqs.cols() != pts.cols()) { + ABSL_LOG(WARNING) << "size mismatch; cannot embed distances"; + return false; + } - if (dsqs.cols() != dsqs.rows() || dsqs.cols() != pts.cols()) { - ABSL_LOG(WARNING) << "size mismatch; cannot embed distances"; - return false; - } + const int n = static_cast(dsqs.cols()); - const int n = static_cast(dsqs.cols()); + double norm_dist = 0; + for (int i = 1; i < dsqs.cols(); ++i) + norm_dist += dsqs.col(i).head(i).sum(); + norm_dist /= n * n; - double norm_dist = 0; - for (int i = 1; i < dsqs.cols(); ++i) - norm_dist += dsqs.col(i).head(i).sum(); - norm_dist /= n * n; + VectorXd d0sq = dsqs.colwise().mean().array() - norm_dist; - VectorXd d0sq = dsqs.colwise().mean().array() - norm_dist; + dsqs *= -1; + dsqs.colwise() += d0sq; + dsqs.rowwise() += d0sq.transpose(); + dsqs /= 2; - dsqs *= -1; - dsqs.colwise() += d0sq; - dsqs.rowwise() += d0sq.transpose(); - dsqs /= 2; + try { + Spectra::DenseSymMatProd op(dsqs); + // This constructor might throw + Spectra::SymEigsSolver eigs(op, ndim, ndim * 2); + eigs.init(); + auto nconv = eigs.compute(SortRule::LargestAlge); + if (eigs.info() != CompInfo::Successful || nconv < ndim) { + ABSL_LOG(WARNING) << "solver failed"; + return false; + } - try { - Spectra::DenseSymMatProd op(dsqs); - // This constructor might throw - Spectra::SymEigsSolver eigs(op, 3, 6); - eigs.init(); - auto nconv = eigs.compute(); - if (eigs.info() != CompInfo::Successful || nconv < 3) { - ABSL_LOG(WARNING) << "solver failed"; - return false; - } + Array evals_sqrt = eigs.eigenvalues().head(); + if ((evals_sqrt < 0).any()) + return false; + evals_sqrt = evals_sqrt.sqrt(); - Array3d evals_sqrt = eigs.eigenvalues().head<3>(); - if ((evals_sqrt < 0).any()) + pts = (eigs.eigenvectors(ndim).transpose().array().colwise() * evals_sqrt) + .matrix(); + } catch (const std::exception &e) { + ABSL_LOG(WARNING) << "solver failed: " << e.what(); return false; - evals_sqrt = evals_sqrt.sqrt(); + } - pts = (eigs.eigenvectors(3).transpose().array().colwise() * evals_sqrt) - .matrix(); - } catch (const std::exception &e) { - ABSL_LOG(WARNING) << "solver failed: " << e.what(); - return false; + return true; } +} // namespace + +bool embed_distances_3d(Eigen::Ref pts, MatrixXd dsqs) { + return embed_distances_impl(pts, dsqs); +} - return true; +extern bool embed_distances_4d(Eigen::Ref pts, MatrixXd dsqs) { + return embed_distances_impl(pts, dsqs); } } // namespace nuri From 119ae97b15ac4954a42e684f10fc47fb45a35563 Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Thu, 3 Oct 2024 15:28:27 +0900 Subject: [PATCH 05/21] test(core/geometry): update function name --- test/core/geometry_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/core/geometry_test.cpp b/test/core/geometry_test.cpp index b7f65367..2332ada7 100644 --- a/test/core/geometry_test.cpp +++ b/test/core/geometry_test.cpp @@ -446,7 +446,7 @@ TEST(EmbedTest, FromDistance) { MatrixXd dsqs = to_square_form(pdistsq(orig), orig.cols()); Matrix3Xd pts(orig.rows(), orig.cols()); - ASSERT_TRUE(embed_distances(pts, dsqs)); + ASSERT_TRUE(embed_distances_3d(pts, dsqs)); auto [xform, msd] = kabsch(pts, orig, AlignMode::kBoth, true); ASSERT_GE(msd, 0); From 8f7f8fb263f5c32af9c4a33b4b4ce953120a1431 Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Tue, 15 Oct 2024 17:44:01 +0900 Subject: [PATCH 06/21] refactor(algo/optim/lbfgsb): optimize/simplify triangular expressions --- src/algo/optim.cpp | 52 ++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/src/algo/optim.cpp b/src/algo/optim.cpp index 4aec33bb..8872ea1d 100644 --- a/src/algo/optim.cpp +++ b/src/algo/optim.cpp @@ -82,12 +82,11 @@ namespace internal { wtt.template triangularView().transpose().solveInPlace( p.tail(col)); - p.head(col) = -v.head(col).array() / sy.diagonal().array(); - for (int i = 0; i < col; ++i) { - double ssum = - sy.col(i).tail(col - i - 1).dot(p.tail(col - i - 1)) / sy(i, i); - p[i] += ssum; - } + p.head(col).noalias() = + sy.transpose().template triangularView() + * p.tail(col); + p.head(col) -= v.head(col); + p.head(col).array() /= sy.diagonal().array(); return true; } @@ -816,9 +815,9 @@ namespace { // Shift old part of WN1. wn1.topLeftCorner(mm1, mm1).triangularView() = - wn1.block(1, 1, mm1, mm1).triangularView(); + wn1.block(1, 1, mm1, mm1); wn1.block(m, m, mm1, mm1).triangularView() = - wn1.block(m + 1, m + 1, mm1, mm1).triangularView(); + wn1.block(m + 1, m + 1, mm1, mm1); wn1.block(m, 0, mm1, mm1) = wn1.block(m + 1, 1, mm1, mm1); } @@ -848,42 +847,38 @@ namespace { void formk_update_wn1(LBfgsB &L) { auto &wn1 = L.wn1(); - auto ws = L.ws(), wy = L.wy(); auto enter = L.enter(), leave = L.leave(); const auto m = L.m(), upcl = L.col() - value_if(L.updated()); + auto ws = L.ws().leftCols(upcl), wy = L.wy().leftCols(upcl); // modify the old parts in blocks (1,1) and (2,2) due to changes // in the set of free variables. for (int j = 0; j < upcl; ++j) { wn1.col(j).segment(j, upcl - j).noalias() += - wy(enter, Eigen::all).transpose().middleRows(j, upcl - j) - * wy(enter, j); + wy(enter, Eigen::all).transpose().bottomRows(upcl - j) * wy(enter, j); wn1.col(j).segment(j, upcl - j).noalias() -= - wy(leave, Eigen::all).transpose().middleRows(j, upcl - j) - * wy(leave, j); + wy(leave, Eigen::all).transpose().bottomRows(upcl - j) * wy(leave, j); } for (int j = 0; j < upcl; ++j) { wn1.col(m + j).segment(m + j, upcl - j).noalias() -= - ws(enter, Eigen::all).transpose().middleRows(j, upcl - j) - * ws(enter, j); + ws(enter, Eigen::all).transpose().bottomRows(upcl - j) * ws(enter, j); wn1.col(m + j).segment(m + j, upcl - j).noalias() += - ws(leave, Eigen::all).transpose().middleRows(j, upcl - j) - * ws(leave, j); + ws(leave, Eigen::all).transpose().bottomRows(upcl - j) * ws(leave, j); } // Modify the old parts in block (2,1) for (int j = 0; j < upcl; ++j) { wn1.col(j).segment(m, j + 1).noalias() += ws(enter, Eigen::all).transpose().topRows(j + 1) * wy(enter, j); - wn1.col(j).segment(m, j + 1).noalias() -= - ws(leave, Eigen::all).transpose().topRows(j + 1) * wy(leave, j); - wn1.col(j).segment(m + j + 1, upcl - j - 1).noalias() -= - ws(enter, Eigen::all).transpose().middleRows(j + 1, upcl - j - 1) + ws(enter, Eigen::all).transpose().bottomRows(upcl - j - 1) * wy(enter, j); + + wn1.col(j).segment(m, j + 1).noalias() -= + ws(leave, Eigen::all).transpose().topRows(j + 1) * wy(leave, j); wn1.col(j).segment(m + j + 1, upcl - j - 1).noalias() += - ws(leave, Eigen::all).transpose().middleRows(j + 1, upcl - j - 1) + ws(leave, Eigen::all).transpose().bottomRows(upcl - j - 1) * wy(leave, j); } } @@ -1042,9 +1037,9 @@ namespace { if (pcol == m) { ABSL_ASSUME(ci == m - 1); ss.topLeftCorner(ci, ci).triangularView() = - ss.bottomRightCorner(ci, ci).triangularView(); + ss.bottomRightCorner(ci, ci); sy.topLeftCorner(ci, ci).triangularView() = - sy.bottomRightCorner(ci, ci).triangularView(); + sy.bottomRightCorner(ci, ci); } ss.col(ci).head(col).noalias() = ws.leftCols(col).transpose() * d.matrix(); @@ -1061,13 +1056,12 @@ namespace { const double theta) { const auto col = ss.cols(); - // Form the upper half of T = theta*SS + L*D^(-1)*L', store T in the - // upper triangle of the array wt. - wtt.template triangularView() = - (theta * ss.transpose()).template triangularView(); - sy_scaled.array() = sy.array().rowwise() / sy.diagonal().array().transpose(); + + // Form the upper half of T = theta*SS + L*D^(-1)*L', store T in the + // upper triangle of the array wt. + wtt.template triangularView() = theta * ss.transpose(); for (int i = 1; i < col; ++i) { wtt.col(i).tail(col - i).noalias() += sy.bottomLeftCorner(col - i, i) From dbe99e1184afe9eba527a5a64c8feb955fbc2097 Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Mon, 21 Oct 2024 17:15:42 +0900 Subject: [PATCH 07/21] fix(algo/optim/lbfgsb): align OOP interface with function --- include/nuri/algo/optim.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/nuri/algo/optim.h b/include/nuri/algo/optim.h index 371b45a6..9b4b807c 100644 --- a/include/nuri/algo/optim.h +++ b/include/nuri/algo/optim.h @@ -233,7 +233,7 @@ namespace internal { */ class LBfgsB { public: - LBfgsB(MutRef x, internal::LbfgsbBounds bounds, int m); + LBfgsB(MutRef x, internal::LbfgsbBounds bounds, int m = 10); /** * @brief Minimize a function using L-BFGS-B algorithm. @@ -250,8 +250,8 @@ class LBfgsB { * value, and final gradient. */ template - LbfgsbResult minimize(FuncGrad fg, double factr, int maxiter, int maxls, - double pgtol); + LbfgsbResult minimize(FuncGrad fg, double factr = 1e+7, double pgtol = 1e-5, + int maxiter = 15000, int maxls = 20); /* State modifiers, only for implementations */ @@ -384,8 +384,8 @@ class LBfgsB { template LbfgsbResult LBfgsB::minimize(FuncGrad fg, const double factr, - const int maxiter, const int maxls, - const double pgtol) { + const double pgtol, const int maxiter, + const int maxls) { const double tol = factr * internal::kEpsMach; ArrayXd gx(n()); @@ -545,8 +545,8 @@ LbfgsbResult l_bfgs_b(FuncGrad &&fg, MutRef x, const ArrayXi &nbd, return { LbfgsbResultCode::kInvalidInput, 0, 0, {} }; LBfgsB lbfgsb(x, { nbd, bounds }, m); - return lbfgsb.minimize(std::forward(fg), factr, maxiter, maxls, - pgtol); + return lbfgsb.minimize(std::forward(fg), factr, pgtol, maxiter, + maxls); } } // namespace nuri From f5419038db257155fadaf383d845d01c67001691 Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Mon, 21 Oct 2024 17:16:37 +0900 Subject: [PATCH 08/21] refactor(core/geometry): export pre-calculated trig values --- include/nuri/core/geometry.h | 29 +++++++++++++++++++++++++++++ src/algo/guess.cpp | 25 ++++++++----------------- 2 files changed, 37 insertions(+), 17 deletions(-) diff --git a/include/nuri/core/geometry.h b/include/nuri/core/geometry.h index 2caff636..ab1f48ff 100644 --- a/include/nuri/core/geometry.h +++ b/include/nuri/core/geometry.h @@ -93,6 +93,35 @@ namespace constants { extern constexpr inline double kTwoPi = 6.2831853071795864769252867665590057683943387987502116419498891846156328; + + // NOLINTBEGIN(*-identifier-naming) + extern constexpr inline double kCos15 = + 0.9659258262890682867497431997288973676339048390084045504023430763; + extern constexpr inline double kCos75 = + 0.2588190451025207623488988376240483283490689013199305138140032073; + extern constexpr inline double kCos100 = + -0.173648177666930348851716626769314796000375677184069387236241378; + extern constexpr inline double kCos102 = + -0.207911690817759337101742284405125166216584760627723836407181973; + extern constexpr inline double kCos112 = + -0.374606593415912035414963774501195131000158922253676174103440371; + extern constexpr inline double kCos115 = + -0.422618261740699436186978489647730181563129301194864623444415159; + extern constexpr inline double kCos125 = + -0.573576436351046096108031912826157864620433371450986351081027118; + extern constexpr inline double kCos155 = + -0.906307787036649963242552656754316983267712625175864680871298408; + extern constexpr inline double kCos175 = + -0.996194698091745532295010402473888046183562672645850974525442277; + extern constexpr inline double kTan10_2 = + 0.0874886635259240052220186694349614581194542763681082291452366622; + extern constexpr inline double kTan15_2 = + 0.1316524975873958534715264574097171035928141022232375735535653257; + extern constexpr inline double kTan116_2 = + 1.6003345290410503553267330811833575255040718469227591484115002297; + extern constexpr inline double kTan155_2 = + 4.5107085036620571342899391172547519686713241944553043587162345185; + // NOLINTEND(*-identifier-naming) } // namespace constants template , int> = 0> diff --git a/src/algo/guess.cpp b/src/algo/guess.cpp index 02c18204..c2e5aead 100644 --- a/src/algo/guess.cpp +++ b/src/algo/guess.cpp @@ -182,25 +182,16 @@ namespace { } /* - * Cutoff values taken from: + * Cutoff values taken and modified from: * https://web.archive.org/web/20231205050012/https://www.daylight.com/meetings/mug01/Sayle/m4xbondage.html */ - // NOLINTBEGIN(*-identifier-naming) - constexpr double kCos15 = - 0.9659258262890682867497431997288973676339048390084045504023430763; - constexpr double kCos115 = - -0.422618261740699436186978489647730181563129301194864623444415159; - constexpr double kCos155 = - -0.906307787036649963242552656754316983267712625175864680871298408; - constexpr double kTan10_2 = - 0.0874886635259240052220186694349614581194542763681082291452366622; - constexpr double kTan15_2 = - 0.1316524975873958534715264574097171035928141022232375735535653257; - constexpr double kTan116_2 = - 1.6003345290410503553267330811833575255040718469227591484115002297; - constexpr double kTan155_2 = - 4.5107085036620571342899391172547519686713241944553043587162345185; - // NOLINTEND(*-identifier-naming) + using constants::kCos115; + using constants::kCos15; + using constants::kCos155; + using constants::kTan10_2; + using constants::kTan116_2; + using constants::kTan155_2; + using constants::kTan15_2; constants::Hybridization hyb_from_cos(double avg_cos_angle) { // Cosine is decreasing in [0, 180] degree. From 51d49203eb5a25a0a9c9d902eb559ab3f7f0bfb7 Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Mon, 21 Oct 2024 17:17:15 +0900 Subject: [PATCH 09/21] perf(core/geometry): use provided distance matrix as workspace --- include/nuri/core/geometry.h | 8 ++++---- src/core/geometry.cpp | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/nuri/core/geometry.h b/include/nuri/core/geometry.h index ab1f48ff..5a822d23 100644 --- a/include/nuri/core/geometry.h +++ b/include/nuri/core/geometry.h @@ -553,7 +553,7 @@ qcp(const Eigen::Ref &query, * @brief A routine for converting squared pairwise distances to cartesian * coordinates. * @param pts Destination to which save the generated coordinates (3d). - * @param dsqs The squared distances between points. + * @param dsqs The squared distances between points. Will be modified in-place. * @return Whether the embedding was successful. * * @note The squared distance matrix must be a N x N symmetric pairwise @@ -563,13 +563,13 @@ qcp(const Eigen::Ref &query, * and GM Crippen. *Bull. Math. Biol.* **1983**, *45* (5), 665-720. * DOI:[10.1007/BF02460044](https://doi.org/10.1007/BF02460044) */ -extern bool embed_distances_3d(Eigen::Ref pts, MatrixXd dsqs); +extern bool embed_distances_3d(Eigen::Ref pts, MatrixXd &dsqs); /** * @brief A routine for converting squared pairwise distances to cartesian * coordinates. * @param pts Destination to which save the generated coordinates (4d). - * @param dsqs The squared distances between points. + * @param dsqs The squared distances between points. Will be modified in-place. * @return Whether the embedding was successful. * * @note The squared distance matrix must be a N x N symmetric pairwise @@ -579,7 +579,7 @@ extern bool embed_distances_3d(Eigen::Ref pts, MatrixXd dsqs); * and GM Crippen. *Bull. Math. Biol.* **1983**, *45* (5), 665-720. * DOI:[10.1007/BF02460044](https://doi.org/10.1007/BF02460044) */ -extern bool embed_distances_4d(Eigen::Ref pts, MatrixXd dsqs); +extern bool embed_distances_4d(Eigen::Ref pts, MatrixXd &dsqs); } // namespace nuri #endif /* NURI_CORE_GEOMETRY_H_ */ diff --git a/src/core/geometry.cpp b/src/core/geometry.cpp index defa7693..c9cbb681 100644 --- a/src/core/geometry.cpp +++ b/src/core/geometry.cpp @@ -817,11 +817,11 @@ namespace { } } // namespace -bool embed_distances_3d(Eigen::Ref pts, MatrixXd dsqs) { +bool embed_distances_3d(Eigen::Ref pts, MatrixXd &dsqs) { return embed_distances_impl(pts, dsqs); } -extern bool embed_distances_4d(Eigen::Ref pts, MatrixXd dsqs) { +extern bool embed_distances_4d(Eigen::Ref pts, MatrixXd &dsqs) { return embed_distances_impl(pts, dsqs); } } // namespace nuri From 7073056aeb8f865b12a8eef5ad7efe0120f434ca Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Mon, 21 Oct 2024 17:17:55 +0900 Subject: [PATCH 10/21] feat(eigen): alias more symbols --- include/nuri/eigen_config.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/nuri/eigen_config.h b/include/nuri/eigen_config.h index 92de4852..425ed4fa 100644 --- a/include/nuri/eigen_config.h +++ b/include/nuri/eigen_config.h @@ -35,11 +35,15 @@ namespace nuri { using Eigen::Array; using Eigen::Array3d; +using Eigen::Array4d; using Eigen::Array3i; using Eigen::Array4i; using Eigen::ArrayX; using ArrayXb = Eigen::ArrayX; using Eigen::Array2Xd; +using Eigen::ArrayX2d; +using Eigen::Array3Xd; +using Eigen::Array4Xd; using Eigen::ArrayXd; using Eigen::ArrayXi; using Eigen::ArrayXX; From bce1b617058ca2423b451cab8f532ae0afb6fc12 Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Mon, 21 Oct 2024 17:18:35 +0900 Subject: [PATCH 11/21] build: add random library to dependency --- src/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index dd6f6b12..faf07c68 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -26,7 +26,11 @@ add_library(nuri_lib SHARED "${NURI_SRCS}") set_target_properties(nuri_lib PROPERTIES OUTPUT_NAME nuri) target_link_libraries(nuri_lib PUBLIC - absl::strings absl::flat_hash_map absl::absl_check absl::absl_log + absl::strings + absl::flat_hash_map + absl::random_random + absl::absl_check + absl::absl_log ) target_system_include_directories(nuri_lib Eigen3::Eigen Spectra::Spectra) handle_boost_dependency(nuri_lib) From 5ab7c3b9bf8463370d95c7780da619e609831459 Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Mon, 21 Oct 2024 17:19:26 +0900 Subject: [PATCH 12/21] feat(algo/crdgen): implement basic DG coordinate generation routine --- include/nuri/algo/crdgen.h | 43 +++ src/algo/crdgen.cpp | 595 +++++++++++++++++++++++++++++++++++++ 2 files changed, 638 insertions(+) create mode 100644 include/nuri/algo/crdgen.h create mode 100644 src/algo/crdgen.cpp diff --git a/include/nuri/algo/crdgen.h b/include/nuri/algo/crdgen.h new file mode 100644 index 00000000..620540fe --- /dev/null +++ b/include/nuri/algo/crdgen.h @@ -0,0 +1,43 @@ +// +// Project NuriKit - Copyright 2024 SNU Compbio Lab. +// SPDX-License-Identifier: Apache-2.0 +// + +#ifndef NURI_ALGO_CRDGEN_H_ +#define NURI_ALGO_CRDGEN_H_ + +#include "nuri/eigen_config.h" +#include "nuri/core/molecule.h" + +namespace nuri { +/** + * @brief Generate 3D coordinates for the molecule. + * + * @param mol The molecule to generate coordinates. + * @param conf A matrix to store the generated coordinates. + * @param max_trial The maximum number of trials to generate trial distances. + * @return Whether the coordinates is successfully generated. + */ +extern bool generate_coords(const Molecule &mol, Matrix3Xd &conf, + int max_trial = 10); + +/** + * @brief Generate 3D coordinates for the molecule. + * + * @param mol The molecule to generate coordinates. + * @return Whether the coordinates is successfully generated. + * + * @note The generated coordinates is stored as the last conformer of the + * molecule, if and only if the coordinates is successfully generated. The + * molecule is not modified otherwise. + */ +inline bool generate_coords(Molecule &mol, int max_trial = 10) { + Matrix3Xd conf; + bool success = generate_coords(mol, conf, max_trial); + if (success) + mol.confs().push_back(std::move(conf)); + return success; +} +} // namespace nuri + +#endif /* NURI_ALGO_CRDGEN_H_ */ diff --git a/src/algo/crdgen.cpp b/src/algo/crdgen.cpp new file mode 100644 index 00000000..05196d55 --- /dev/null +++ b/src/algo/crdgen.cpp @@ -0,0 +1,595 @@ +// +// Project NuriKit - Copyright 2024 SNU Compbio Lab. +// SPDX-License-Identifier: Apache-2.0 +// + +#include "nuri/algo/crdgen.h" + +#include + +#include + +#include +#include +#include +#include +#include + +#include "nuri/eigen_config.h" +#include "nuri/algo/optim.h" +#include "nuri/core/geometry.h" +#include "nuri/core/molecule.h" +#include "nuri/utils.h" + +namespace nuri { +namespace { + using constants::kCos100; + using constants::kCos102; + using constants::kCos112; + using constants::kCos115; + using constants::kCos125; + using constants::kCos175; + using constants::kCos75; + + constexpr double kVdwRadDownscale = 0.85; + constexpr double kMaxInterAtomDist = 5.0; + + class DistanceBounds { + public: + DistanceBounds(const int n): bounds_(n, n) { + bounds_.matrix().diagonal().setZero(); + } + + std::pair lbsq_ubsq() const; + + // Havel's distribution function: + // Eq. 43, Distance Geometry: Theory, Algorithms, and Chemical Applications. + // In Encycl. Comput. Chem., 1998, p. 731. + void fill_trial_distances(MatrixXd &dists, const double ubeta) const { + dists.diagonal().setZero(); + + const double lbeta = 1 - ubeta; + ABSL_DCHECK(ubeta >= 0); + ABSL_DCHECK(lbeta >= 0); + + for (int i = 0; i < n() - 1; ++i) { + for (int j = i + 1; j < n(); ++j) { + double ub_tet = ub(i, j); + ub_tet *= ub_tet; + ub_tet *= ub_tet; + + double lb_tet = lb(i, j); + lb_tet *= lb_tet; + lb_tet *= lb_tet; + + dists(i, j) = dists(j, i) = + std::sqrt(std::sqrt(lbeta * lb_tet + ubeta * ub_tet)); + } + } + } + + int n() const { return static_cast(bounds_.cols()); } + + double &ub(int i, int j) { + ABSL_DCHECK(i <= j); + return bounds_(j, i); + } + + double ub(int i, int j) const { + ABSL_DCHECK(i <= j); + return bounds_(j, i); + } + + double &lb(int i, int j) { + ABSL_DCHECK(i <= j); + return bounds_(i, j); + } + + double lb(int i, int j) const { + ABSL_DCHECK(i <= j); + return bounds_(i, j); + } + + template + auto lb_head(int i) { + static_assert(offset <= 1); + return bounds_.col(i).head(i + offset); + } + + template + auto lb_tail(int i) { + static_assert(offset <= 1); + return bounds_.row(i).tail(n() - i - 1 + offset).transpose(); + } + + template + auto ub_head(int i) { + static_assert(offset <= 1); + return bounds_.row(i).head(i + offset).transpose(); + } + + template + auto ub_tail(int i) { + static_assert(offset <= 1); + return bounds_.col(i).tail(n() - i - 1 + offset); + } + + template + auto lb_head(int i) const { + static_assert(offset <= 1); + return bounds_.col(i).head(i + offset); + } + + template + auto ub_head(int i) const { + static_assert(offset <= 1); + return bounds_.row(i).head(i + offset).transpose(); + } + + const ArrayXXd &data() const { return bounds_; } + + private: + // lower triangle is upper bound, upper triangle is lower bound + ArrayXXd bounds_; + }; + + std::pair DistanceBounds::lbsq_ubsq() const { + const auto nc2 = n() * (n() - 1) / 2; + ArrayXd lbsq(nc2), ubsq(nc2); + + for (int i = 1, k = 0; i < n(); k += i, ++i) { + lbsq.segment(k, i) = lb_head(i).square(); + ubsq.segment(k, i) = ub_head(i).square(); + } + + return { lbsq, ubsq }; + } + + struct PQEntry { + int curr; + int prev; + double length; + }; + + // NOLINTNEXTLINE(clang-diagnostic-unused-function) + bool operator<(PQEntry lhs, PQEntry rhs) { + return lhs.length < rhs.length; + } + + void set_constraints(const Molecule &mol, DistanceBounds &bounds, + ArrayXd &radii) { + const int n = mol.num_atoms(); + + for (int i = 0; i < n; ++i) + radii[i] = mol.atom(i).data().element().vdw_radius() * kVdwRadDownscale; + + double max_upper = n * kMaxInterAtomDist; + for (int i = 0; i < n; ++i) { + bounds.lb_head(i) = radii[i] + radii.head(i); + bounds.ub_tail(i) = max_upper; + } + + for (int i = 0; i < n; ++i) + radii[i] = mol.atom(i).data().element().covalent_radius(); + + ArrayXd bds(mol.num_bonds()); + for (auto bond: mol.bonds()) { + int src = bond.src().id(), dst = bond.dst().id(); + double pred_bond_len = radii[src] + radii[dst]; + + switch (bond.data().order()) { + case constants::kOtherBond: + ABSL_LOG(INFO) << "unknown bond order, assuming single bond"; + ABSL_FALLTHROUGH_INTENDED; + case constants::kSingleBond: + if (bond.data().is_conjugated()) + pred_bond_len *= 0.9; + break; + case constants::kDoubleBond: + pred_bond_len *= 0.87; + break; + case constants::kTripleBond: + pred_bond_len *= 0.78; + break; + case constants::kQuadrupleBond: + pred_bond_len *= 0.7; + break; + case constants::kAromaticBond: + pred_bond_len *= 0.9; + break; + } + + bounds.lb(src, dst) = bounds.ub(src, dst) = bds[bond.id()] = + pred_bond_len; + } + + ArrayXd bdsq = bds.square(); + for (auto atom: mol) { + if (atom.degree() < 2) + continue; + + double cos_upper, cos_lower; + switch (atom.data().hybridization()) { + case constants::kUnbound: + case constants::kTerminal: + ABSL_LOG(WARNING) << "inconsistent hybridization state for atom " + << atom.id() << ": " << atom.data().hybridization(); + ABSL_FALLTHROUGH_INTENDED; + case constants::kSP: + cos_upper = -1; // kCos180 + cos_lower = kCos175; + break; + case constants::kSP2: + cos_upper = kCos125; + cos_lower = kCos115; + break; + case constants::kOtherHyb: + ABSL_LOG(WARNING) << "unknown hybridization state for atom " + << atom.id() << "; assuming sp3"; + ABSL_FALLTHROUGH_INTENDED; + case constants::kSP3: + cos_upper = kCos112; + cos_lower = kCos102; // cyclopentane ~104.5 degrees + break; + case constants::kSP3D: + cos_upper = kCos125; + cos_lower = kCos75; + break; + case constants::kSP3D2: + cos_upper = kCos100; + cos_lower = kCos75; + break; + } + + for (int i = 0; i < atom.degree() - 1; ++i) { + for (int j = i + 1; j < atom.degree(); ++j) { + const int bi = atom[i].eid(), bj = atom[j].eid(); + const double bdsqsum = bdsq[bi] + bdsq[bj]; + const double bdmul = 2 * bds[bi] * bds[bj]; + + auto [ni, nj] = nuri::minmax(atom[i].dst().id(), atom[j].dst().id()); + const double ub = bounds.ub(ni, nj) = nuri::min( + bounds.ub(ni, nj), std::sqrt(bdsqsum - bdmul * cos_upper)); + bounds.lb(ni, nj) = nuri::clamp( + std::sqrt(bdsqsum - bdmul * cos_lower), bounds.lb(ni, nj), ub); + } + } + } + } + + // NOLINTNEXTLINE(readability-function-cognitive-complexity) + std::vector update_upper_bounds(DistanceBounds &bounds, + const Molecule &mol, ArrayXd &urev) { + const int n = mol.num_atoms(); + + std::vector roots; + roots.reserve(2ULL * mol.num_fragments()); + + internal::ClearablePQ pq; + ArrayXb visited = ArrayXb::Zero(n); + + for (int root = 0; root < n; ++root) { + if (visited[root]) + continue; + + roots.push_back(root); + visited[root] = true; + + for (auto nei: mol.atom(root)) { + int next = nei.dst().id(); + pq.push({ next, root, bounds.ub(root, next) }); + } + + do { + auto [curr, prev, curr_len] = pq.pop_get(); + if (curr_len > bounds.ub(root, curr)) + continue; + + visited[curr] = true; + + const double prev_len = bounds.ub(root, prev); + for (auto nei: mol.atom(curr)) { + int next = nei.dst().id(); + if (visited[next]) + continue; + + auto [ni, nj] = nuri::minmax(prev, next); + double new_ub = prev_len + bounds.ub(ni, nj); + double &ref_ub = bounds.ub(root, next); + if (new_ub <= ref_ub) { + ref_ub = new_ub; + pq.push({ next, curr, new_ub }); + } + } + } while (!pq.empty()); + } + + // i < j < k + for (int i: roots) { + for (int j = i + 1; j < n - 1; ++j) { + bounds.ub_tail(j) = bounds.ub_tail(j).min( + bounds.ub(i, j) + bounds.ub_tail(i).tail(n - j - 1)); + } + } + + const int mid = static_cast(roots.size()); + visited.setZero(); + + for (int root = n - 1; root >= 0; --root) { + if (visited[root]) + continue; + + roots.push_back(root); + visited[root] = true; + + auto u_r = urev.head(root + 1); + u_r.setConstant(n * kMaxInterAtomDist); + u_r[root] = 0; + + for (auto nei: mol.atom(root)) { + int next = nei.dst().id(); + u_r[next] = bounds.ub(next, root); + pq.push({ next, root, bounds.ub(next, root) }); + } + + do { + auto [curr, prev, curr_len] = pq.pop_get(); + if (curr_len > u_r[curr]) + continue; + + visited[curr] = true; + + const double prev_len = u_r[prev]; + for (auto nei: mol.atom(curr)) { + int next = nei.dst().id(); + if (visited[next]) + continue; + + auto [ni, nj] = nuri::minmax(prev, next); + double new_ub = prev_len + bounds.ub(ni, nj); + double &ref_ub = u_r[next]; + if (new_ub < ref_ub) { + ref_ub = new_ub; + pq.push({ next, curr, new_ub }); + } + } + } while (!pq.empty()); + + bounds.ub_head(root) = bounds.ub_head(root).min(u_r.head(root)); + } + + // j < k < i + for (int r = mid; r < roots.size(); ++r) { + int i = roots[r]; + for (int j = 0; j < i - 1; ++j) { + bounds.ub_tail(j).head(i - j - 1) = + bounds.ub_tail(j).head(i - j - 1).min( + bounds.ub(j, i) + bounds.ub_head(i).tail(i - j - 1)); + } + } + + return roots; + } + + void update_lower_bounds(DistanceBounds &bounds, const Molecule &mol, + const std::vector &roots, ArrayXd &jb_buf) { + const int n = mol.num_atoms(); + const int mid = mol.num_fragments(); + ABSL_DCHECK(2ULL * mid == roots.size()); + + ArrayXi j_buf(n); + + for (int r = 0; r < mid; ++r) { + int i = roots[r]; + + auto js = j_buf.head(n - i - 1); + absl::c_iota(js, i + 1); + absl::c_sort(js, [&](int lhs, int rhs) { + return bounds.ub(i, lhs) > bounds.ub(i, rhs); + }); + + // i < j, i < k, j != k + auto u_j = jb_buf.head(n - i - 1); + for (int j: js) { + u_j.head(j - i - 1) = bounds.ub_head(j).tail(j - i - 1); + u_j.tail(n - j) = bounds.ub_tail<1>(j); + + // u_j[j - i - 1] == 0, lb(i, j) - ub(j, j) == lb(i, j) + // so, the following effectivecly calculates: + // clamp(lb(i, j), lb(i, k) - ub(j, k), ub(i, j)) + bounds.lb(i, j) = + nuri::min((bounds.lb_tail(i) - u_j).maxCoeff(), bounds.ub(i, j)); + } + + // i < j < k + for (int k = n - 1; k > i + 1; --k) { + for (int j = k - 1; j > i; --j) { + bounds.lb(j, k) = + nuri::min(bounds.ub(j, k), std::max({ + bounds.lb(j, k), + bounds.lb(i, j) - bounds.ub(i, k), + bounds.lb(i, k) - bounds.ub(i, j), + })); + } + } + } + + for (int r = mid; r < roots.size(); ++r) { + int i = roots[r]; + + auto js = j_buf.head(i); + absl::c_iota(js, 0); + absl::c_sort(js, [&](int lhs, int rhs) { + return bounds.ub(lhs, i) > bounds.ub(rhs, i); + }); + + // j < i, k < i, j != k + auto u_j = jb_buf.head(i); + for (int j: js) { + u_j.head(j) = bounds.ub_head(j); + u_j.tail(i - j) = bounds.ub_tail<1>(j).head(i - j); + + // Same here. + bounds.lb(j, i) = + nuri::min((bounds.lb_head(i) - u_j).maxCoeff(), bounds.ub(j, i)); + } + + // j < k < i + for (int k = i - 1; k >= 1; --k) { + for (int j = k - 1; j >= 0; --j) { + bounds.lb(j, k) = + nuri::min(bounds.ub(j, k), std::max({ + bounds.lb(j, k), + bounds.lb(j, i) - bounds.ub(k, i), + bounds.lb(k, i) - bounds.ub(j, i), + })); + } + } + } + } + + DistanceBounds init_bounds(const Molecule &mol) { + const int n = mol.num_atoms(); + + DistanceBounds bounds(n); + ArrayXd temp(n); + + set_constraints(mol, bounds, temp); + + std::vector roots = update_upper_bounds(bounds, mol, temp); + update_lower_bounds(bounds, mol, roots, temp); + + return bounds; + } + + // Havel's distance error function: + // E3 in Distance Geometry in Molecular Modeling, 1994, Ch.6, p. 311. + // NOLINTNEXTLINE(clang-diagnostic-unneeded-internal-declaration) + double distance_error(MutRef &g, ConstRef x, + Array4Xd &diffs, ArrayXd &t1, ArrayXd &t2, + Array4Xd &buf, const ArrayXd &lbsq, + const ArrayXd &ubsq) { + const auto n = x.cols(); + + for (int i = 1, k = 0; i < n; ++i) + for (int j = 0; j < i; ++j, ++k) + diffs.col(k) = x.col(i) - x.col(j); + + t1 = diffs.matrix().colwise().squaredNorm().transpose(); + + t2 = (t1 / ubsq - 1).max(0); + for (int i = 1, k = 0; i < n; k += i, ++i) { + buf.leftCols(i) = + diffs.middleCols(k, i).rowwise() + * (4 * t2.segment(k, i) / ubsq.segment(k, i)).transpose(); + g.col(i) += buf.leftCols(i).rowwise().sum(); + g.leftCols(i) -= buf.leftCols(i); + } + const double ub_err = t2.square().sum(); + + t2 = 1 + t1 / lbsq; + t1 = (2 / t2 - 1).max(0); + for (int i = 1, k = 0; i < n; k += i, ++i) { + buf.leftCols(i) = diffs.middleCols(k, i).rowwise() + * (-8 * t1.segment(k, i) / t2.segment(k, i).square() + / lbsq.segment(k, i)) + .transpose(); + g.col(i) += buf.leftCols(i).rowwise().sum(); + g.leftCols(i) -= buf.leftCols(i); + } + const double lb_err = t1.square().sum(); + + return ub_err + lb_err; + } + + // NOLINTNEXTLINE(clang-diagnostic-unneeded-internal-declaration) + double extra_dimension_error(MutRef &g, ConstRef x) { + g.row(3) += 2 * x.row(3); + return x.row(3).square().sum(); + } + + template + // NOLINTNEXTLINE(clang-diagnostic-unused-template) + double error_funcgrad(ArrayXd &ga, ConstRef xa, Array4Xd &diffs, + ArrayXd &t1, ArrayXd &t2, Array4Xd &buf, + const std::pair &bounds_squared) { + ga.setZero(); + + const auto n = buf.cols() + 1; + MutRef g = ga.reshaped(4, n); + ConstRef x = xa.reshaped(4, n); + + const double e1 = distance_error( + g, x, diffs, t1, t2, buf, bounds_squared.first, bounds_squared.second); + if constexpr (!MinimizeFourth) + return e1; + + const double e2 = extra_dimension_error(g, x); + return e1 + e2; + } + + // NOLINTNEXTLINE(*-non-const-global-variables) + thread_local absl::InsecureBitGen rng; +} // namespace + +bool generate_coords(const Molecule &mol, Matrix3Xd &conf, int max_trial) { + const Eigen::Index n = mol.num_atoms(); + + if (n != conf.cols()) { + ABSL_LOG(ERROR) << "size mismatch: " << n << " atoms in the molecule, but " + << conf.cols() << " columns in the matrix"; + return false; + } + + DistanceBounds bounds = init_bounds(mol); + const std::pair bounds_squared = bounds.lbsq_ubsq(); + ABSL_DLOG(INFO) << "initial bounds:\n" << bounds.data() << "\n"; + + Matrix4Xd trial(4, n); + MatrixXd dists(mol.size(), mol.size()); + + ArrayXi nbd = ArrayXi::Zero(4 * n); + Array2Xd dummy_bds(2, 4 * n); + LBfgsB optim(trial.reshaped().array(), { nbd, dummy_bds }); + + const auto nc2 = bounds_squared.first.size(); + Array4Xd diffs(4, nc2), buf(4, n - 1); + ArrayXd t1(nc2), t2(nc2); + + auto first_fg = [&](ArrayXd &ga, const auto &xa) { + return error_funcgrad(ga, xa, diffs, t1, t2, buf, bounds_squared); + }; + + auto second_fg = [&](ArrayXd &ga, const auto &xa) { + return error_funcgrad(ga, xa, diffs, t1, t2, buf, bounds_squared); + }; + + double beta = 0.5; + bool success = false; + + for (int iter = 0; iter < max_trial; + beta = absl::Uniform(absl::IntervalClosed, rng, 0.0, 1.0), ++iter) { + bounds.fill_trial_distances(dists, beta); + dists.cwiseAbs2(); + + if (!embed_distances_4d(trial, dists)) + continue; + + LbfgsbResult res = optim.minimize(first_fg, 1e+10, 1e-3); + if (res.code != LbfgsbResultCode::kSuccess) + continue; + + res = optim.minimize(second_fg); + if (res.code != LbfgsbResultCode::kSuccess) + continue; + + success = true; + break; + } + if (!success) + return false; + + conf = trial.topRows(3); + return true; +} +} // namespace nuri From 5a0a5d513a9f9d5554eb0de554dda9af388cfccf Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Mon, 21 Oct 2024 17:23:17 +0900 Subject: [PATCH 13/21] style: re-format with clang-format v19 --- include/nuri/eigen_config.h | 4 ++-- src/core/molecule.cpp | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/include/nuri/eigen_config.h b/include/nuri/eigen_config.h index 425ed4fa..313212ed 100644 --- a/include/nuri/eigen_config.h +++ b/include/nuri/eigen_config.h @@ -35,15 +35,15 @@ namespace nuri { using Eigen::Array; using Eigen::Array3d; -using Eigen::Array4d; using Eigen::Array3i; +using Eigen::Array4d; using Eigen::Array4i; using Eigen::ArrayX; using ArrayXb = Eigen::ArrayX; using Eigen::Array2Xd; -using Eigen::ArrayX2d; using Eigen::Array3Xd; using Eigen::Array4Xd; +using Eigen::ArrayX2d; using Eigen::ArrayXd; using Eigen::ArrayXi; using Eigen::ArrayXX; diff --git a/src/core/molecule.cpp b/src/core/molecule.cpp index a0eff7c2..f7cb6f6b 100644 --- a/src/core/molecule.cpp +++ b/src/core/molecule.cpp @@ -922,9 +922,11 @@ namespace { bool is_pyrrole_like(Molecule::Atom atom, const Element &effective, const int nbe, const int total_valence) { return nbe > 0 && internal::common_valence(effective) < total_valence - && std::any_of(atom.begin(), atom.end(), [](Molecule::Neighbor nei) { - return nei.edge_data().order() == constants::kAromaticBond; - }); + && std::any_of( + atom.begin(), atom.end(), + [](Molecule::Neighbor nei) { + return nei.edge_data().order() == constants::kAromaticBond; + }); } std::string format_atom_common(Molecule::Atom atom, bool caps) { From f576e0df21cb3117bb98a4e62e29293fbab65385 Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Mon, 21 Oct 2024 17:25:39 +0900 Subject: [PATCH 14/21] fix(algo/crdgen): add missing headers --- include/nuri/algo/crdgen.h | 2 ++ src/algo/crdgen.cpp | 3 +++ 2 files changed, 5 insertions(+) diff --git a/include/nuri/algo/crdgen.h b/include/nuri/algo/crdgen.h index 620540fe..a8a4bcd0 100644 --- a/include/nuri/algo/crdgen.h +++ b/include/nuri/algo/crdgen.h @@ -6,6 +6,8 @@ #ifndef NURI_ALGO_CRDGEN_H_ #define NURI_ALGO_CRDGEN_H_ +#include + #include "nuri/eigen_config.h" #include "nuri/core/molecule.h" diff --git a/src/algo/crdgen.cpp b/src/algo/crdgen.cpp index 05196d55..9d2024e4 100644 --- a/src/algo/crdgen.cpp +++ b/src/algo/crdgen.cpp @@ -5,6 +5,9 @@ #include "nuri/algo/crdgen.h" +#include +#include +#include #include #include From b2c64adf9bd3f842345dbd52d78e01905c83571d Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Mon, 21 Oct 2024 17:43:50 +0900 Subject: [PATCH 15/21] perf(algo/crdgen): simplify and optimize distance error term --- src/algo/crdgen.cpp | 46 ++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/algo/crdgen.cpp b/src/algo/crdgen.cpp index 9d2024e4..f55a4fc2 100644 --- a/src/algo/crdgen.cpp +++ b/src/algo/crdgen.cpp @@ -470,8 +470,7 @@ namespace { // NOLINTNEXTLINE(clang-diagnostic-unneeded-internal-declaration) double distance_error(MutRef &g, ConstRef x, Array4Xd &diffs, ArrayXd &t1, ArrayXd &t2, - Array4Xd &buf, const ArrayXd &lbsq, - const ArrayXd &ubsq) { + const ArrayXd &lbsq, const ArrayXd &ubsq) { const auto n = x.cols(); for (int i = 1, k = 0; i < n; ++i) @@ -481,24 +480,25 @@ namespace { t1 = diffs.matrix().colwise().squaredNorm().transpose(); t2 = (t1 / ubsq - 1).max(0); - for (int i = 1, k = 0; i < n; k += i, ++i) { - buf.leftCols(i) = - diffs.middleCols(k, i).rowwise() - * (4 * t2.segment(k, i) / ubsq.segment(k, i)).transpose(); - g.col(i) += buf.leftCols(i).rowwise().sum(); - g.leftCols(i) -= buf.leftCols(i); + for (int i = 1, k = 0; i < n; ++i) { + for (int j = 0; j < i; ++j, ++k) { + Array4d grad = 4 / ubsq[k] * t2[k] * diffs.col(k); + g.col(i) += grad; + g.col(j) -= grad; + } } const double ub_err = t2.square().sum(); t2 = 1 + t1 / lbsq; t1 = (2 / t2 - 1).max(0); - for (int i = 1, k = 0; i < n; k += i, ++i) { - buf.leftCols(i) = diffs.middleCols(k, i).rowwise() - * (-8 * t1.segment(k, i) / t2.segment(k, i).square() - / lbsq.segment(k, i)) - .transpose(); - g.col(i) += buf.leftCols(i).rowwise().sum(); - g.leftCols(i) -= buf.leftCols(i); + + t2 = t2.square(); + for (int i = 1, k = 0; i < n; ++i) { + for (int j = 0; j < i; ++j, ++k) { + Array4d grad = -8 * t1[k] / t2[k] / lbsq[k] * diffs.col(k); + g.col(i) += grad; + g.col(j) -= grad; + } } const double lb_err = t1.square().sum(); @@ -514,16 +514,16 @@ namespace { template // NOLINTNEXTLINE(clang-diagnostic-unused-template) double error_funcgrad(ArrayXd &ga, ConstRef xa, Array4Xd &diffs, - ArrayXd &t1, ArrayXd &t2, Array4Xd &buf, - const std::pair &bounds_squared) { + ArrayXd &t1, ArrayXd &t2, + const std::pair &bounds_squared, + const int n) { ga.setZero(); - const auto n = buf.cols() + 1; MutRef g = ga.reshaped(4, n); ConstRef x = xa.reshaped(4, n); - const double e1 = distance_error( - g, x, diffs, t1, t2, buf, bounds_squared.first, bounds_squared.second); + const double e1 = distance_error(g, x, diffs, t1, t2, bounds_squared.first, + bounds_squared.second); if constexpr (!MinimizeFourth) return e1; @@ -556,15 +556,15 @@ bool generate_coords(const Molecule &mol, Matrix3Xd &conf, int max_trial) { LBfgsB optim(trial.reshaped().array(), { nbd, dummy_bds }); const auto nc2 = bounds_squared.first.size(); - Array4Xd diffs(4, nc2), buf(4, n - 1); + Array4Xd diffs(4, nc2); ArrayXd t1(nc2), t2(nc2); auto first_fg = [&](ArrayXd &ga, const auto &xa) { - return error_funcgrad(ga, xa, diffs, t1, t2, buf, bounds_squared); + return error_funcgrad(ga, xa, diffs, t1, t2, bounds_squared, n); }; auto second_fg = [&](ArrayXd &ga, const auto &xa) { - return error_funcgrad(ga, xa, diffs, t1, t2, buf, bounds_squared); + return error_funcgrad(ga, xa, diffs, t1, t2, bounds_squared, n); }; double beta = 0.5; From fbb7e1d2ef0cd9d5aeb20e97efa3b6923856a8e8 Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Tue, 22 Oct 2024 11:30:36 +0900 Subject: [PATCH 16/21] perf(algo/optim/lbfgsb): optimize hot code path --- src/algo/optim.cpp | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/algo/optim.cpp b/src/algo/optim.cpp index 8872ea1d..ee09ab0c 100644 --- a/src/algo/optim.cpp +++ b/src/algo/optim.cpp @@ -326,28 +326,32 @@ namespace internal { // ----------------------------------------------------- // Let us try the projection, d is the Newton direction. + if (!L.constrained()) { + x(free) += d.array(); + return true; + } + xp = x; + x(free) += d.array(); bool bounded = false; for (int i = 0; i < nsub; ++i) { - int k = free[i]; - const double xk_new = xp[k] + d[i]; + const int k = free[i]; switch (bounds.raw_nbd(k)) { case 0: - x[k] = xk_new; break; case 1: - x[k] = nuri::max(bounds.lb(k), xk_new); + x[k] = nuri::max(bounds.lb(k), x[k]); if (x[k] <= bounds.lb(k)) bounded = true; break; case 2: - x[k] = nuri::min(bounds.ub(k), xk_new); + x[k] = nuri::min(bounds.ub(k), x[k]); if (x[k] >= bounds.ub(k)) bounded = true; break; case 3: - x[k] = nuri::clamp(xk_new, bounds.lb(k), bounds.ub(k)); + x[k] = nuri::clamp(x[k], bounds.lb(k), bounds.ub(k)); if (x[k] <= bounds.lb(k) || x[k] >= bounds.ub(k)) bounded = true; break; @@ -1056,16 +1060,14 @@ namespace { const double theta) { const auto col = ss.cols(); - sy_scaled.array() = - sy.array().rowwise() / sy.diagonal().array().transpose(); - // Form the upper half of T = theta*SS + L*D^(-1)*L', store T in the // upper triangle of the array wt. wtt.template triangularView() = theta * ss.transpose(); for (int i = 1; i < col; ++i) { + sy_scaled.head(i).array() = + sy.row(i).head(i).transpose().array() / sy.diagonal().head(i).array(); wtt.col(i).tail(col - i).noalias() += - sy.bottomLeftCorner(col - i, i) - * sy_scaled.row(i).head(i).transpose(); + sy.bottomLeftCorner(col - i, i) * sy_scaled.head(i); } Eigen::LLT> llt(wtt); @@ -1096,8 +1098,7 @@ bool LBfgsB::prepare_next_iter(const double gd, const double ginit, prev_col_ = col(); col_ = lbfgs_matupd(ws_, wy_, ss_, sy_, d(), r(), dr, step, dtd, col()); // wn will be recalculated in the next iteration, use for temporary storage - return lbfgs_formt(wtt_.topLeftCorner(col(), col()), - wnt_.topLeftCorner(col(), col()), + return lbfgs_formt(wtt_.topLeftCorner(col(), col()), wnt_.col(0), sy_.topLeftCorner(col(), col()), ss_.topLeftCorner(col(), col()), theta()); } From ee900d580e5b13cce6ff22d29961471ea7b91471 Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Tue, 22 Oct 2024 11:33:15 +0900 Subject: [PATCH 17/21] perf(algo/crdgen): optimize hot code path --- src/algo/crdgen.cpp | 77 +++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 48 deletions(-) diff --git a/src/algo/crdgen.cpp b/src/algo/crdgen.cpp index f55a4fc2..63c2d5b5 100644 --- a/src/algo/crdgen.cpp +++ b/src/algo/crdgen.cpp @@ -7,7 +7,6 @@ #include #include -#include #include #include @@ -43,7 +42,7 @@ namespace { bounds_.matrix().diagonal().setZero(); } - std::pair lbsq_ubsq() const; + Array2Xd bsq_inv() const; // Havel's distribution function: // Eq. 43, Distance Geometry: Theory, Algorithms, and Chemical Applications. @@ -129,23 +128,21 @@ namespace { return bounds_.row(i).head(i + offset).transpose(); } - const ArrayXXd &data() const { return bounds_; } - private: // lower triangle is upper bound, upper triangle is lower bound ArrayXXd bounds_; }; - std::pair DistanceBounds::lbsq_ubsq() const { + Array2Xd DistanceBounds::bsq_inv() const { const auto nc2 = n() * (n() - 1) / 2; - ArrayXd lbsq(nc2), ubsq(nc2); + Array2Xd bsq(2, nc2); for (int i = 1, k = 0; i < n(); k += i, ++i) { - lbsq.segment(k, i) = lb_head(i).square(); - ubsq.segment(k, i) = ub_head(i).square(); + bsq.row(0).segment(k, i) = lb_head(i).transpose().square().inverse(); + bsq.row(1).segment(k, i) = ub_head(i).transpose().square().inverse(); } - return { lbsq, ubsq }; + return bsq; } struct PQEntry { @@ -469,38 +466,30 @@ namespace { // E3 in Distance Geometry in Molecular Modeling, 1994, Ch.6, p. 311. // NOLINTNEXTLINE(clang-diagnostic-unneeded-internal-declaration) double distance_error(MutRef &g, ConstRef x, - Array4Xd &diffs, ArrayXd &t1, ArrayXd &t2, - const ArrayXd &lbsq, const ArrayXd &ubsq) { - const auto n = x.cols(); + const Array2Xd &bsq_inv) { + double ub_err = 0, lb_err = 0; - for (int i = 1, k = 0; i < n; ++i) - for (int j = 0; j < i; ++j, ++k) - diffs.col(k) = x.col(i) - x.col(j); + for (int i = 1, k = 0; i < x.cols(); ++i) { + for (int j = 0; j < i; ++j, ++k) { + const double lbsq_inv = bsq_inv(0, k), ubsq_inv = bsq_inv(1, k); - t1 = diffs.matrix().colwise().squaredNorm().transpose(); + Array4d diff = x.col(i) - x.col(j); + const double dsq = diff.matrix().squaredNorm(); - t2 = (t1 / ubsq - 1).max(0); - for (int i = 1, k = 0; i < n; ++i) { - for (int j = 0; j < i; ++j, ++k) { - Array4d grad = 4 / ubsq[k] * t2[k] * diffs.col(k); - g.col(i) += grad; - g.col(j) -= grad; - } - } - const double ub_err = t2.square().sum(); + const double ub_term = nonnegative(dsq * ubsq_inv - 1); - t2 = 1 + t1 / lbsq; - t1 = (2 / t2 - 1).max(0); + const double lb_inner_inv = 1 / (1 + dsq * lbsq_inv); + const double lb_term = nonnegative(2 * lb_inner_inv - 1); - t2 = t2.square(); - for (int i = 1, k = 0; i < n; ++i) { - for (int j = 0; j < i; ++j, ++k) { - Array4d grad = -8 * t1[k] / t2[k] / lbsq[k] * diffs.col(k); - g.col(i) += grad; - g.col(j) -= grad; + diff *= 4 * ubsq_inv * ub_term + - 8 * (lb_inner_inv * lb_inner_inv) * lbsq_inv * lb_term; + g.col(i) += diff; + g.col(j) -= diff; + + ub_err += ub_term * ub_term; + lb_err += lb_term * lb_term; } } - const double lb_err = t1.square().sum(); return ub_err + lb_err; } @@ -513,17 +502,14 @@ namespace { template // NOLINTNEXTLINE(clang-diagnostic-unused-template) - double error_funcgrad(ArrayXd &ga, ConstRef xa, Array4Xd &diffs, - ArrayXd &t1, ArrayXd &t2, - const std::pair &bounds_squared, - const int n) { + double error_funcgrad(ArrayXd &ga, ConstRef xa, + const Array2Xd &bsq_inv, const int n) { ga.setZero(); MutRef g = ga.reshaped(4, n); ConstRef x = xa.reshaped(4, n); - const double e1 = distance_error(g, x, diffs, t1, t2, bounds_squared.first, - bounds_squared.second); + const double e1 = distance_error(g, x, bsq_inv); if constexpr (!MinimizeFourth) return e1; @@ -545,8 +531,7 @@ bool generate_coords(const Molecule &mol, Matrix3Xd &conf, int max_trial) { } DistanceBounds bounds = init_bounds(mol); - const std::pair bounds_squared = bounds.lbsq_ubsq(); - ABSL_DLOG(INFO) << "initial bounds:\n" << bounds.data() << "\n"; + const auto bsq_inv = bounds.bsq_inv(); Matrix4Xd trial(4, n); MatrixXd dists(mol.size(), mol.size()); @@ -555,16 +540,12 @@ bool generate_coords(const Molecule &mol, Matrix3Xd &conf, int max_trial) { Array2Xd dummy_bds(2, 4 * n); LBfgsB optim(trial.reshaped().array(), { nbd, dummy_bds }); - const auto nc2 = bounds_squared.first.size(); - Array4Xd diffs(4, nc2); - ArrayXd t1(nc2), t2(nc2); - auto first_fg = [&](ArrayXd &ga, const auto &xa) { - return error_funcgrad(ga, xa, diffs, t1, t2, bounds_squared, n); + return error_funcgrad(ga, xa, bsq_inv, n); }; auto second_fg = [&](ArrayXd &ga, const auto &xa) { - return error_funcgrad(ga, xa, diffs, t1, t2, bounds_squared, n); + return error_funcgrad(ga, xa, bsq_inv, n); }; double beta = 0.5; From 7ca8320c10da9919dd7cc6bd1bfd8417fe962e12 Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Tue, 22 Oct 2024 11:38:33 +0900 Subject: [PATCH 18/21] test(algo/crdgen): generate sample molecule --- test/algo/crdgen_test.cpp | 51 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 test/algo/crdgen_test.cpp diff --git a/test/algo/crdgen_test.cpp b/test/algo/crdgen_test.cpp new file mode 100644 index 00000000..256ac594 --- /dev/null +++ b/test/algo/crdgen_test.cpp @@ -0,0 +1,51 @@ +// +// Project NuriKit - Copyright 2024 SNU Compbio Lab. +// SPDX-License-Identifier: Apache-2.0 +// + +#include "nuri/algo/crdgen.h" + +#include + +#include +#include +#include +#include + +#include "nuri/eigen_config.h" +#include "test_utils.h" +#include "nuri/core/molecule.h" +#include "nuri/fmt/smiles.h" + +namespace nuri { +namespace { +TEST(Strgen, Playground) { + Molecule mol = read_smiles({ "CC(=O)OC1CCCC2COC(=O)C21" }); + ASSERT_TRUE(MoleculeSanitizer(mol).sanitize_all()); + + Matrix3Xd &conf = mol.confs().emplace_back(3, mol.num_atoms()); + ASSERT_TRUE(generate_coords(mol, conf)); + + // XXX: The algorithm is basically deterministic on first pass, so just + // compare it with the reference output. Should be changed later on? + MatrixX3d ans { + { -2.660, 1.200, 0.539 }, + { -1.787, 0.051, 0.036 }, + { -1.671, -0.981, 0.715 }, + { -1.188, 0.148, -1.095 }, + { 0.111, 0.683, -1.280 }, + { 0.163, 2.150, -0.882 }, + { 0.144, 2.286, 0.634 }, + { 1.209, 1.469, 1.313 }, + { 1.509, 0.149, 0.783 }, + { 0.970, -1.042, 1.575 }, + { 0.686, -2.046, 0.589 }, + { 0.840, -1.641, -0.629 }, + { 0.469, -2.281, -1.618 }, + { 1.205, -0.144, -0.679 }, + }; + + NURI_EXPECT_EIGEN_EQ_TOL(conf, ans.transpose(), 1e-2); +} +} // namespace +} // namespace nuri From 694c0575c9de3f5d0562e6b8fa3a2c4f24ad7a00 Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Tue, 22 Oct 2024 11:45:33 +0900 Subject: [PATCH 19/21] fix(algo/crdgen): sync integer size --- src/algo/crdgen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/algo/crdgen.cpp b/src/algo/crdgen.cpp index 63c2d5b5..1bd913fe 100644 --- a/src/algo/crdgen.cpp +++ b/src/algo/crdgen.cpp @@ -503,7 +503,7 @@ namespace { template // NOLINTNEXTLINE(clang-diagnostic-unused-template) double error_funcgrad(ArrayXd &ga, ConstRef xa, - const Array2Xd &bsq_inv, const int n) { + const Array2Xd &bsq_inv, const Eigen::Index n) { ga.setZero(); MutRef g = ga.reshaped(4, n); From 53898e5ed73a43166c01f40065bfe6138078cac9 Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Tue, 22 Oct 2024 11:47:44 +0900 Subject: [PATCH 20/21] docs(algo/crdgen): ignore system headers in docs --- include/nuri/algo/crdgen.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/nuri/algo/crdgen.h b/include/nuri/algo/crdgen.h index a8a4bcd0..fe088cf7 100644 --- a/include/nuri/algo/crdgen.h +++ b/include/nuri/algo/crdgen.h @@ -6,7 +6,9 @@ #ifndef NURI_ALGO_CRDGEN_H_ #define NURI_ALGO_CRDGEN_H_ +/// @cond #include +/// @endcond #include "nuri/eigen_config.h" #include "nuri/core/molecule.h" From ab6f63372460fbbfaf0e1ef2f72bf2f96ccecd94 Mon Sep 17 00:00:00 2001 From: Nuri Jung Date: Tue, 22 Oct 2024 12:09:38 +0900 Subject: [PATCH 21/21] test(algo/crdgen): update test name [skip ci] --- test/algo/crdgen_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/algo/crdgen_test.cpp b/test/algo/crdgen_test.cpp index 256ac594..67e04eb2 100644 --- a/test/algo/crdgen_test.cpp +++ b/test/algo/crdgen_test.cpp @@ -19,7 +19,7 @@ namespace nuri { namespace { -TEST(Strgen, Playground) { +TEST(Crdgen, CHEMBL2228334) { Molecule mol = read_smiles({ "CC(=O)OC1CCCC2COC(=O)C21" }); ASSERT_TRUE(MoleculeSanitizer(mol).sanitize_all());