diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a0e919..1e15431 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,7 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/PolySolveOptions.cmake) include(${CMAKE_CURRENT_SOURCE_DIR}/PolySolveOptions.cmake) endif() + ################################################################################ # CMake Policies ################################################################################ @@ -47,16 +48,19 @@ endif() # Polysolve options option(POLYSOLVE_WITH_SANITIZERS "Enable sanitizers in compilation targets" OFF) # Polysolve options for enabling/disabling optional libraries + option(POLYSOLVE_WITH_CHOLMOD "Enable Cholmod library" ON) option(POLYSOLVE_WITH_UMFPACK "Enable UmfPack library" ON) option(POLYSOLVE_WITH_SUPERLU "Enable SuperLU library" ON) option(POLYSOLVE_WITH_MKL "Enable MKL library" ${POLYSOLVE_NOT_ON_APPLE_SILICON}) option(POLYSOLVE_WITH_CUSOLVER "Enable cuSOLVER library" OFF) - +option(POLYSOLVE_WITH_CUSOLVER "Enable cuSOLVER library" OFF) +option(POLYSOLVE_WITH_CUDA "Enable CUDA" ON) option(POLYSOLVE_WITH_PARDISO "Enable Pardiso library" OFF) option(POLYSOLVE_WITH_HYPRE "Enable hypre" ON) option(POLYSOLVE_WITH_AMGCL "Use AMGCL" ON) option(POLYSOLVE_WITH_SPECTRA "Enable computing spectrum" ON) + # Sanitizer options option(POLYSOLVE_SANITIZE_ADDRESS "Sanitize Address" OFF) option(POLYSOLVE_SANITIZE_MEMORY "Sanitize Memory" OFF) @@ -76,6 +80,10 @@ if(POLYSOLVE_TOPLEVEL_PROJECT) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) endif() +if(NOT DEFINED CMAKE_CUDA_STANDARD) + set(CMAKE_CUDA_STANDARD 14) + set(CMAKE_CUDA_STANDARD_REQUIRED ON) +endif() if (MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj") @@ -92,6 +100,7 @@ include(polysolve_use_colors) # IPC Toolkit utils include(polysolve_prepend_current_path) include(polysolve_set_source_group) +include(polysolve_target_link_system_libraries) # Sort projects inside the solution set_property(GLOBAL PROPERTY USE_FOLDERS ON) @@ -143,11 +152,54 @@ endif() include(eigen) target_link_libraries(polysolve PUBLIC Eigen3::Eigen) +# CUDA +if(POLYSOLVE_WITH_CUDA) + include(CheckLanguage) + check_language(CUDA) + set(CMAKE_CUDA_ARCHITECTURES "61") + find_package(CUDAToolkit) + if(CUDAToolkit_FOUND) + message(STATUS "Found CUDATooklkit") + set(CMAKE_CUDA_COMPILER ${CUDAToolkit_NVCC_EXECUTABLE}) + enable_language(CUDA) + target_compile_definitions(polysolve PUBLIC -DPOLYSOLVE_WITH_CUDA) + else() + message(FATAL_ERROR "No CUDA support found!") + endif() + # We need to explicitly state that we need all CUDA files in the particle + # library to be built with -dc as the member functions could be called by + # other libraries and executables. + set_target_properties(polysolve PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + + # Nvidia RTX8000 -> compute_75 + # Nvidia V100 -> compute_70 + # Nvidia 1080/1080Ti -> compute_61 + # Nvidia 3080Ti -> compute_86 + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 70 75 86) + endif() + set_target_properties(polysolve PROPERTIES CUDA_ARCHITECTURES "70;75;86") + + if(APPLE) + # We need to add the path to the driver (libcuda.dylib) as an rpath, + # so that the static cuda runtime can find it at runtime. + set_property(TARGET polysolve + PROPERTY + BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) + endif() + polysolve_target_link_system_libraries(polysolve PRIVATE CUDA::cudart) + polysolve_target_link_system_libraries(polysolve PRIVATE CUDA::cusparse) +endif() + + # Hypre (GNU Lesser General Public License) if(POLYSOLVE_WITH_HYPRE) include(hypre) target_link_libraries(polysolve PUBLIC HYPRE::HYPRE) target_compile_definitions(polysolve PUBLIC -DPOLYSOLVE_WITH_HYPRE) + if(HYPRE_WITH_MPI) + target_compile_definitions(polysolve PUBLIC HYPRE_WITH_MPI) + endif() endif() # Json (MIT) @@ -249,4 +301,11 @@ if(POLYSOLVE_WITH_TESTS) include("${catch2_SOURCE_DIR}/contrib/Catch.cmake") add_subdirectory(tests) + + # Cuda test + if(POLYSOLVE_WITH_CUDA) + add_subdirectory(cudatest) + endif() endif() + + diff --git a/README.md b/README.md index 1197ea9..f1f2376 100644 --- a/README.md +++ b/README.md @@ -43,27 +43,27 @@ Polysolve uses a json file to provide parameters to the individual solvers. The { "Eigen::LeastSquaresConjugateGradient": { "max_iter": 1000, - "tolerance": 1e-6 + "tolerance": 1e-10 }, "Eigen::DGMRES": { "max_iter": 1000, - "tolerance": 1e-6 + "tolerance": 1e-10 }, "Eigen::ConjugateGradient": { "max_iter": 1000, - "tolerance": 1e-6 + "tolerance": 1e-10 }, "Eigen::BiCGSTAB": { "max_iter": 1000, - "tolerance": 1e-6 + "tolerance": 1e-10 }, "Eigen::GMRES": { "max_iter": 1000, - "tolerance": 1e-6 + "tolerance": 1e-10 }, "Eigen::MINRES": { "max_iter": 1000, - "tolerance": 1e-6 + "tolerance": 1e-10 }, "Pardiso": { "mtype": -1 @@ -71,7 +71,7 @@ Polysolve uses a json file to provide parameters to the individual solvers. The "Hypre": { "max_iter": 1000, "pre_max_iter": 1000, - "tolerance": 1e-6 + "tolerance": 1e-10 }, "AMGCL": { "precond": { diff --git a/cmake/polysolve/polysolve_target_link_system_libraries.cmake b/cmake/polysolve/polysolve_target_link_system_libraries.cmake new file mode 100644 index 0000000..43f8a14 --- /dev/null +++ b/cmake/polysolve/polysolve_target_link_system_libraries.cmake @@ -0,0 +1,28 @@ +function(polysolve_target_link_system_libraries target) + set(options PRIVATE PUBLIC INTERFACE) + cmake_parse_arguments(TLLS "${options}" "" "" ${ARGN}) + foreach(op ${options}) + if(TLLS_${op}) + set(scope ${op}) + endif() + endforeach(op) + set(libs ${TLLS_UNPARSED_ARGUMENTS}) + + foreach(lib ${libs}) + get_target_property(lib_include_dirs ${lib} INTERFACE_INCLUDE_DIRECTORIES) + if(lib_include_dirs) + if(scope) + target_include_directories(${target} SYSTEM ${scope} ${lib_include_dirs}) + else() + target_include_directories(${target} SYSTEM PRIVATE ${lib_include_dirs}) + endif() + else() + message(WARNING "${lib} doesn't set INTERFACE_INCLUDE_DIRECTORIES. No include_directories set.") + endif() + if(scope) + target_link_libraries(${target} ${scope} ${lib}) + else() + target_link_libraries(${target} ${lib}) + endif() + endforeach() +endfunction(polysolve_target_link_system_libraries) \ No newline at end of file diff --git a/cmake/recipes/hypre.cmake b/cmake/recipes/hypre.cmake index 498bddb..f0fd537 100644 --- a/cmake/recipes/hypre.cmake +++ b/cmake/recipes/hypre.cmake @@ -6,67 +6,23 @@ endif() message(STATUS "Third-party: creating target 'HYPRE::HYPRE'") -include(FetchContent) -FetchContent_Declare( - hypre - GIT_REPOSITORY https://github.com/hypre-space/hypre.git - GIT_TAG v2.15.1 - GIT_SHALLOW TRUE -) - -FetchContent_GetProperties(hypre) -if(NOT hypre_POPULATED) - FetchContent_Populate(hypre) - file(REMOVE ${hypre_SOURCE_DIR}/src/utilities/version) -endif() - -################################################################################ - set(HYPRE_SEQUENTIAL ON CACHE INTERNAL "" FORCE) set(HYPRE_PRINT_ERRORS ON CACHE INTERNAL "" FORCE) set(HYPRE_BIGINT ON CACHE INTERNAL "" FORCE) set(HYPRE_USING_FEI OFF CACHE INTERNAL "" FORCE) set(HYPRE_USING_OPENMP OFF CACHE INTERNAL "" FORCE) set(HYPRE_SHARED OFF CACHE INTERNAL "" FORCE) -# set(HYPRE_LONG_DOUBLE ON) -set(HYPRE_BUILD_TYPE "${CMAKE_BUILD_TYPE}" CACHE INTERNAL "" FORCE) -add_subdirectory(${hypre_SOURCE_DIR}/src ${hypre_BINARY_DIR}) -add_library(HYPRE::HYPRE ALIAS HYPRE) - -set_property(TARGET HYPRE PROPERTY FOLDER "dependencies") - -target_include_directories(HYPRE PUBLIC ${hypre_BINARY_DIR}) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/blas) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/lapack) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/utilities) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/multivector) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/krylov) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/seq_mv) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/parcsr_mv) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/parcsr_block_mv) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/distributed_matrix) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/IJ_mv) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/matrix_matrix) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/distributed_ls) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/distributed_ls/Euclid) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/distributed_ls/ParaSails) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/parcsr_ls) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/struct_mv) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/struct_ls) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/sstruct_mv) -target_include_directories(HYPRE PUBLIC ${hypre_SOURCE_DIR}/src/sstruct_ls) +include(FetchContent) +FetchContent_Declare( + hypre + GIT_REPOSITORY https://github.com/hypre-space/hypre.git + GIT_TAG v2.25.0 + GIT_SHALLOW TRUE +) -if(HYPRE_USING_OPENMP) - find_package(OpenMP QUIET REQUIRED) - target_link_libraries(HYPRE PUBLIC OpenMP::OpenMP_CXX) -endif() +FetchContent_MakeAvailable(hypre) -if(NOT HYPRE_SEQUENTIAL) - find_package(MPI) - if(MPI_CXX_FOUND) - target_link_libraries(HYPRE PUBLIC MPI::MPI_CXX) - endif() -endif() +add_subdirectory("${hypre_SOURCE_DIR}/src") +file(REMOVE "${hypre_SOURCE_DIR}/src/utilities/version") \ No newline at end of file diff --git a/cudatest/CMakeLists.txt b/cudatest/CMakeLists.txt new file mode 100644 index 0000000..f8f97be --- /dev/null +++ b/cudatest/CMakeLists.txt @@ -0,0 +1,37 @@ +################################################################################ +# Tests +################################################################################ +set(test_sources + main.cpp + cudatest.cpp +) +add_executable(cuda_test ${test_sources}) + +################################################################################ +# Required Libraries +################################################################################ +include(catch2) +target_link_libraries(cuda_test PUBLIC Catch2::Catch2) + +target_link_libraries(cuda_test PUBLIC polysolve::polysolve) + +include(polysolve_warnings) +target_link_libraries(cuda_test PRIVATE polysolve::warnings) +################################################################################ +# Register tests +################################################################################ +foreach(source IN ITEMS ${test_sources}) + source_group("tests" FILES "${source}") +endforeach() + +# Register tests +set(PARSE_CATCH_TESTS_ADD_TO_CONFIGURE_DEPENDS ON) +catch_discover_tests(cuda_test) + +################################################################################ +# Data +################################################################################ +set(DATA_DIR "${CMAKE_SOURCE_DIR}/tests/data") +target_compile_definitions(cuda_test PUBLIC -DPOLYSOLVE_DATA_DIR=\"${DATA_DIR}\") +polysolve_target_link_system_libraries(cuda_test PRIVATE CUDA::cudart) +polysolve_target_link_system_libraries(cuda_test PRIVATE CUDA::cusparse) \ No newline at end of file diff --git a/cudatest/cudatest.cpp b/cudatest/cudatest.cpp new file mode 100644 index 0000000..d970177 --- /dev/null +++ b/cudatest/cudatest.cpp @@ -0,0 +1,109 @@ +#include +#include +#include +#include +#include +#include +#include +#include +////////////////////////////////////////////////////////////////////////// +#include + +using namespace polysolve; + +void loadSymmetric(Eigen::SparseMatrix &A, std::string PATH) +{ + std::ifstream fin(PATH); + long int M, N, L; + while (fin.peek() == '%') + { + fin.ignore(2048, '\n'); + } + fin >> M >> N >> L; + A.resize(M, N); + A.reserve(L * 2 - M); + std::vector> triple; + for (size_t i = 0; i < L; i++) + { + int m, n; + double data; + fin >> m >> n >> data; + triple.push_back(Eigen::Triplet(m - 1, n - 1, data)); + if (m != n) + { + triple.push_back(Eigen::Triplet(n - 1, m - 1, data)); + } + } + fin.close(); + A.setFromTriplets(triple.begin(), triple.end()); +}; + +TEST_CASE("amgcl_crystm03_cg", "[solver]"){ + const std::string path = POLYSOLVE_DATA_DIR; + std::string MatrixName = "crystm03.mtx"; + Eigen::SparseMatrix A; + loadSymmetric(A, path + "/" + MatrixName); + + std::cout << "Matrix Load OK" << std::endl; + + Eigen::VectorXd b(A.rows()); + b.setOnes(); + Eigen::VectorXd x(A.rows()); + x.setZero(); + { + amgcl::profiler<> prof("crystm03_GPU"); + json solver_info; + auto solver = LinearSolver::create("AMGCL_cuda", ""); + prof.tic("setup"); + json params; + params["AMGCL"]["tolerance"] = 1e-8; + params["AMGCL"]["max_iter"] = 10000; + params["AMGCL"]["solver_type"] = "cg"; + solver->setParameters(params); + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + prof.toc("setup"); + prof.tic("solve"); + solver->solve(b, x); + prof.toc("solve"); + solver->getInfo(solver_info); + REQUIRE(solver_info["num_iterations"] > 0); + std::cout<< prof << std::endl; + } + REQUIRE((A * x - b).norm() / b.norm() < 1e-7); +} + +TEST_CASE("amgcl_crystm03_bicgstab", "[solver]"){ + const std::string path = POLYSOLVE_DATA_DIR; + std::string MatrixName = "crystm03.mtx"; + Eigen::SparseMatrix A; + loadSymmetric(A, path + "/" + MatrixName); + + std::cout << "Matrix Load OK" << std::endl; + + Eigen::VectorXd b(A.rows()); + b.setOnes(); + Eigen::VectorXd x(A.rows()); + x.setZero(); + { + amgcl::profiler<> prof("crystm03_GPU"); + json solver_info; + auto solver = LinearSolver::create("AMGCL_cuda", ""); + prof.tic("setup"); + json params; + params["AMGCL"]["tolerance"] = 1e-8; + params["AMGCL"]["max_iter"] = 10000; + params["AMGCL"]["solver_type"] = "bicgstab"; + solver->setParameters(params); + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + prof.toc("setup"); + prof.tic("solve"); + solver->solve(b, x); + prof.toc("solve"); + solver->getInfo(solver_info); + REQUIRE(solver_info["num_iterations"] > 0); + std::cout<< prof << std::endl; + } + REQUIRE((A * x - b).norm() / b.norm() < 1e-7); +} diff --git a/cudatest/main.cpp b/cudatest/main.cpp new file mode 100644 index 0000000..f30959d --- /dev/null +++ b/cudatest/main.cpp @@ -0,0 +1,6 @@ +//////////////////////////////////////////////////////////////////////////////// +// Keep this file empty, and implement unit tests in separate compilation units! +//////////////////////////////////////////////////////////////////////////////// + +#define CATCH_CONFIG_MAIN +#include \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cfba8a3..67133ea 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,8 +16,12 @@ set(SOURCES polysolve/LinearSolverPardiso.hpp polysolve/SaddlePointSolver.cpp polysolve/SaddlePointSolver.hpp + polysolve/LinearSolverAMGCL_cuda.cu + polysolve/LinearSolverAMGCL_cuda.hpp ) polysolve_prepend_current_path(SOURCES) polysolve_set_source_group(${SOURCES}) + + target_sources(polysolve PRIVATE ${SOURCES}) diff --git a/src/polysolve/LinearSolver.cpp b/src/polysolve/LinearSolver.cpp index 4757ce1..8b08f17 100644 --- a/src/polysolve/LinearSolver.cpp +++ b/src/polysolve/LinearSolver.cpp @@ -25,6 +25,9 @@ #endif #ifdef POLYSOLVE_WITH_AMGCL #include +#ifdef POLYSOLVE_WITH_CUDA +#include +#endif #endif #ifdef POLYSOLVE_WITH_CUSOLVER #include @@ -261,7 +264,12 @@ namespace polysolve else if (solver == "AMGCL") { return std::make_unique(); + } + else if (solver == "AMGCL_cuda") + { + return std::make_unique(); #endif + #if EIGEN_VERSION_AT_LEAST(3, 3, 0) // Available only with Eigen 3.3.0 and newer #ifndef POLYSOLVE_LARGE_INDEX @@ -379,6 +387,7 @@ namespace polysolve #endif #ifdef POLYSOLVE_WITH_AMGCL "AMGCL", + "AMGCL_cuda", #endif #if EIGEN_VERSION_AT_LEAST(3, 3, 0) #ifndef POLYSOLVE_LARGE_INDEX diff --git a/src/polysolve/LinearSolverAMGCL_cuda.cu b/src/polysolve/LinearSolverAMGCL_cuda.cu new file mode 100644 index 0000000..5b09387 --- /dev/null +++ b/src/polysolve/LinearSolverAMGCL_cuda.cu @@ -0,0 +1,177 @@ +#ifdef POLYSOLVE_WITH_AMGCL +#ifdef POLYSOLVE_WITH_CUDA + +//////////////////////////////////////////////////////////////////////////////// +#include + +#include +#include +//////////////////////////////////////////////////////////////////////////////// + +namespace polysolve +{ + + namespace + { + /* https://stackoverflow.com/questions/15904896/range-based-for-loop-on-a-dynamic-array */ + template + struct WrappedArray + { + WrappedArray(const T *first, const T *last) + : begin_{first}, end_{last} {} + WrappedArray(const T *first, std::ptrdiff_t size) + : WrappedArray{first, first + size} {} + + const T *begin() const noexcept { return begin_; } + const T *end() const noexcept { return end_; } + const T &operator[](const size_t i) const { return begin_[i]; } + + const T *begin_; + const T *end_; + }; + + json default_params() + { + json params = R"({ + "precond": { + "relax": { + "degree": 16, + "type": "chebyshev", + "power_iters": 100, + "higher": 2, + "lower": 0.008333333333, + "scale": true + }, + "class": "amg", + "max_levels": 6, + "direct_coarse": false, + "ncycle": 2, + "coarsening": { + "type": "smoothed_aggregation", + "estimate_spectral_radius": true, + "relax": 1, + "aggr": { + "eps_strong": 0 + } + } + }, + "solver": { + "tol": 1e-10, + "maxiter": 1000, + "type": "cg" + } + })"_json; + + return params; + } + + void set_params(const json ¶ms, json &out) + { + if (params.contains("AMGCL")) + { + // Patch the stored params with input ones + if (params["AMGCL"].contains("precond")) + out["precond"].merge_patch(params["AMGCL"]["precond"]); + if (params["AMGCL"].contains("solver")) + out["solver"].merge_patch(params["AMGCL"]["solver"]); + + if (out["precond"]["class"] == "schur_pressure_correction") + { + // Initialize the u and p solvers with a tolerance that is comparable to the main solver's + if (!out["precond"].contains("usolver")) + { + out["precond"]["usolver"] = R"({"solver": {"maxiter": 100}})"_json; + out["precond"]["usolver"]["solver"]["tol"] = 10 * out["solver"]["tol"].get(); + } + if (!out["precond"].contains("usolver")) + { + out["precond"]["psolver"] = R"({"solver": {"maxiter": 100}})"_json; + out["precond"]["psolver"]["solver"]["tol"] = 10 * out["solver"]["tol"].get(); + } + } + } + } + } // namespace + + //////////////////////////////////////////////////////////////////////////////// + + LinearSolverAMGCL_cuda::LinearSolverAMGCL_cuda() + { + params_ = default_params(); + // NOTE: usolver and psolver parameters are only used if the + // preconditioner class is "schur_pressure_correction" + precond_num_ = 0; + cusparseCreate(&backend_params_.cusparse_handle); + } + + // Set solver parameters + void LinearSolverAMGCL_cuda::setParameters(const json ¶ms) + { + if (params.contains("AMGCL")) + { + set_params(params, params_); + } + } + + void LinearSolverAMGCL_cuda::getInfo(json ¶ms) const + { + params["num_iterations"] = iterations_; + params["final_res_norm"] = residual_error_; + } + + //////////////////////////////////////////////////////////////////////////////// + + void LinearSolverAMGCL_cuda::factorize(const StiffnessMatrix &Ain) + { + assert(precond_num_ > 0); + + int numRows = Ain.rows(); + + WrappedArray ia(Ain.outerIndexPtr(), numRows + 1); + WrappedArray ja(Ain.innerIndexPtr(), Ain.nonZeros()); + WrappedArray a(Ain.valuePtr(), Ain.nonZeros()); + if (params_["precond"]["class"] == "schur_pressure_correction") + { + std::vector pmask(numRows, 0); + for (size_t i = precond_num_; i < numRows; ++i) + pmask[i] = 1; + params_["precond"]["pmask"] = pmask; + } + + // AMGCL takes the parameters as a Boost property_tree (i.e., another JSON data structure) + std::stringstream ss_params; + ss_params << params_; + boost::property_tree::ptree pt_params; + boost::property_tree::read_json(ss_params, pt_params); + auto A = std::tie(numRows, ia, ja, a); + solver_ = std::make_unique(A, pt_params,backend_params_); + iterations_ = 0; + residual_error_ = 0; + } + + //////////////////////////////////////////////////////////////////////////////// + + //////////////////////////////////////////////////////////////////////////////// + + void LinearSolverAMGCL_cuda::solve(const Eigen::Ref rhs, Eigen::Ref result) + { + assert(result.size() == rhs.size()); + std::vector _rhs(rhs.data(), rhs.data() + rhs.size()); + std::vector x(result.data(), result.data() + result.size()); + auto rhs_b = Backend::copy_vector(_rhs, backend_params_); + auto x_b = Backend::copy_vector(x, backend_params_); + + std::tie(iterations_, residual_error_) = (*solver_)(*rhs_b, *x_b); + thrust::copy(x_b->begin(), x_b->end(),result.data()); + } + + //////////////////////////////////////////////////////////////////////////////// + + LinearSolverAMGCL_cuda::~LinearSolverAMGCL_cuda() + { + } + +} // namespace polysolve + +#endif +#endif diff --git a/src/polysolve/LinearSolverAMGCL_cuda.hpp b/src/polysolve/LinearSolverAMGCL_cuda.hpp new file mode 100644 index 0000000..b3d0dce --- /dev/null +++ b/src/polysolve/LinearSolverAMGCL_cuda.hpp @@ -0,0 +1,107 @@ +#pragma once + +#ifdef POLYSOLVE_WITH_AMGCL +#ifdef POLYSOLVE_WITH_CUDA + +//////////////////////////////////////////////////////////////////////////////// +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +// #include +#include +#include +#include +#include +#include +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +// +// WARNING: +// The matrix is assumed to be in row-major format, since AMGCL assumes that the +// outer index is for row. If the matrix is symmetric, you are fine, because CSR +// and CSC are the same. If the matrix is not symmetric and you pass in a +// column-major matrix, the solver will actually solve A^T x = b. +// + +namespace polysolve +{ + class LinearSolverAMGCL_cuda : public LinearSolver + { + + public: + LinearSolverAMGCL_cuda(); + ~LinearSolverAMGCL_cuda(); + + private: + POLYSOLVE_DELETE_MOVE_COPY(LinearSolverAMGCL_cuda) + + public: + ////////////////////// + // Public interface // + ////////////////////// + + // Set solver parameters + virtual void setParameters(const json ¶ms) override; + + // Retrieve information + virtual void getInfo(json ¶ms) const override; + + // Analyze sparsity pattern + virtual void analyzePattern(const StiffnessMatrix &A, const int precond_num) override + { + precond_num_ = precond_num; + } + + // Factorize system matrix + virtual void factorize(const StiffnessMatrix &A) override; + + // Solve the linear system Ax = b + virtual void solve(const Ref b, Ref x) override; + + // Name of the solver type (for debugging purposes) + virtual std::string name() const override { return "AMGCL_cuda"; } + + private: + using Backend = amgcl::backend::cuda; + using Solver = amgcl::make_solver< + amgcl::runtime::preconditioner, + amgcl::runtime::solver::wrapper>; + std::unique_ptr solver_; + json params_; + typename Backend::params backend_params_; + + int precond_num_; + int block_size_ = 1; + + // Output info + size_t iterations_; + double residual_error_; + }; + +} // namespace polysolve + +#endif +#endif diff --git a/src/polysolve/LinearSolverHypre.cpp b/src/polysolve/LinearSolverHypre.cpp index 243bd80..912a7f1 100644 --- a/src/polysolve/LinearSolverHypre.cpp +++ b/src/polysolve/LinearSolverHypre.cpp @@ -4,6 +4,7 @@ #include #include +#include //////////////////////////////////////////////////////////////////////////////// namespace polysolve @@ -14,16 +15,22 @@ namespace polysolve LinearSolverHypre::LinearSolverHypre() { precond_num_ = 0; -#ifdef MPI_VERSION - /* Initialize MPI */ - int argc = 1; - char name[] = ""; - char *argv[] = {name}; - char **argvv = &argv[0]; - int myid, num_procs; - MPI_Init(&argc, &argvv); - MPI_Comm_rank(MPI_COMM_WORLD, &myid); - MPI_Comm_size(MPI_COMM_WORLD, &num_procs); +#ifdef HYPRE_WITH_MPI + int done_already; + + MPI_Initialized(&done_already); + if (!done_already) + { + /* Initialize MPI */ + int argc = 1; + char name[] = ""; + char *argv[] = {name}; + char **argvv = &argv[0]; + int myid, num_procs; + MPI_Init(&argc, &argvv); + MPI_Comm_rank(MPI_COMM_WORLD, &myid); + MPI_Comm_size(MPI_COMM_WORLD, &num_procs); + } #endif } @@ -32,6 +39,13 @@ namespace polysolve { if (params.contains("Hypre")) { + if (params["Hypre"].contains("block_size")) + { + if (params["Hypre"]["block_size"]==2 || params["Hypre"]["block_size"]==3) + { + dimension_ = params["Hypre"]["block_size"]; + } + } if (params["Hypre"].contains("max_iter")) { max_iter_ = params["Hypre"]["max_iter"]; @@ -68,8 +82,11 @@ namespace polysolve has_matrix_ = true; const HYPRE_Int rows = Ain.rows(); const HYPRE_Int cols = Ain.cols(); - +#ifdef HYPRE_WITH_MPI HYPRE_IJMatrixCreate(MPI_COMM_WORLD, 0, rows - 1, 0, cols - 1, &A); +#else + HYPRE_IJMatrixCreate(hypre_MPI_COMM_WORLD, 0, rows - 1, 0, cols - 1, &A); +#endif // HYPRE_IJMatrixSetPrintLevel(A, 2); HYPRE_IJMatrixSetObjectType(A, HYPRE_PARCSR); HYPRE_IJMatrixInitialize(A); @@ -180,11 +197,18 @@ namespace polysolve HYPRE_IJVector x; HYPRE_ParVector par_x; +#ifdef HYPRE_WITH_MPI HYPRE_IJVectorCreate(MPI_COMM_WORLD, 0, rhs.size() - 1, &b); +#else + HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, 0, rhs.size() - 1, &b); +#endif HYPRE_IJVectorSetObjectType(b, HYPRE_PARCSR); HYPRE_IJVectorInitialize(b); - +#ifdef HYPRE_WITH_MPI HYPRE_IJVectorCreate(MPI_COMM_WORLD, 0, rhs.size() - 1, &x); +#else + HYPRE_IJVectorCreate(hypre_MPI_COMM_WORLD, 0, rhs.size() - 1, &b); +#endif HYPRE_IJVectorSetObjectType(x, HYPRE_PARCSR); HYPRE_IJVectorInitialize(x); @@ -210,7 +234,11 @@ namespace polysolve /* Create solver */ HYPRE_Solver solver, precond; +#ifdef HYPRE_WITH_MPI HYPRE_ParCSRPCGCreate(MPI_COMM_WORLD, &solver); +#else + HYPRE_ParCSRPCGCreate(hypre_MPI_COMM_WORLD, &solver); +#endif /* Set some parameters (See Reference Manual for more parameters) */ HYPRE_PCGSetMaxIter(solver, max_iter_); /* max iterations */ diff --git a/src/polysolve/LinearSolverHypre.hpp b/src/polysolve/LinearSolverHypre.hpp index ffc4e6f..6458296 100644 --- a/src/polysolve/LinearSolverHypre.hpp +++ b/src/polysolve/LinearSolverHypre.hpp @@ -12,7 +12,6 @@ #include #include #include -#include //////////////////////////////////////////////////////////////////////////////// // diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f9e005f..226578e 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,7 +7,6 @@ set(test_sources test_solver.cpp ) add_executable(unit_tests ${test_sources}) - ################################################################################ # Required Libraries ################################################################################ diff --git a/tests/test_solver.cpp b/tests/test_solver.cpp index 983620f..f307602 100644 --- a/tests/test_solver.cpp +++ b/tests/test_solver.cpp @@ -572,6 +572,183 @@ TEST_CASE("amgcl_blocksolver_crystm03_Bicgstab", "[solver]") } #endif +#ifdef POLYSOLVE_WITH_HYPRE +TEST_CASE("Hyprel_b2", "[solver]") +{ + const std::string path = POLYSOLVE_DATA_DIR; + std::string MatrixName = "gr_30_30.mtx"; + Eigen::SparseMatrix A; + loadSymmetric(A, path + "/" + MatrixName); + std::cout << "Matrix Load OK" << std::endl; + Eigen::VectorXd b(A.rows()); + b.setOnes(); + Eigen::VectorXd x(b.size()); + x.setZero(); + Eigen::VectorXd x_b(b.size()); + x_b.setZero(); + { + clock_t start, end; + json solver_info; + start = clock(); + auto solver = LinearSolver::create("Hypre", ""); + json params; + params["Hypre"]["tolerance"] = 1e-8; + params["Hypre"]["max_iter"] = 1000; + solver->setParameters(params); + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + solver->solve(b, x); + end = clock(); + solver->getInfo(solver_info); + std::cout << "Scalar Running time is " << double(end - start) / CLOCKS_PER_SEC << std::endl; + std::cout << solver_info["num_iterations"] << std::endl; + std::cout << solver_info["final_res_norm"] << std::endl; + } + { + clock_t start, end; + json solver_info; + start = clock(); + auto solver = LinearSolver::create("Hypre", ""); + json params; + params["Hypre"]["block_size"] = 2; + params["Hypre"]["tolerance"] = 1e-8; + params["Hypre"]["max_iter"] = 1000; + solver->setParameters(params); + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + solver->solve(b, x_b); + end = clock(); + solver->getInfo(solver_info); + std::cout << "Block Running time is " << double(end - start) / CLOCKS_PER_SEC << std::endl; + std::cout << solver_info["num_iterations"] << std::endl; + std::cout << solver_info["final_res_norm"] << std::endl; + } + const double err = (A * x - b).norm() / b.norm(); + const double err_b = (A * x_b - b).norm() / b.norm(); + std::cout << "Scalar relative error " << err << std::endl; + std::cout << "Block relative error " << err_b << std::endl; + REQUIRE(err < 1e-8); + REQUIRE(err_b < 1e-8); +} +#endif + +#ifdef POLYSOLVE_WITH_HYPRE +TEST_CASE("Hybre_crystm03", "[solver]") +{ + const std::string path = POLYSOLVE_DATA_DIR; + std::string MatrixName = "crystm03.mtx"; + Eigen::SparseMatrix A; + loadSymmetric(A, path + "/" + MatrixName); + std::cout << "Matrix Load OK" << std::endl; + Eigen::VectorXd b(A.rows()); + b.setOnes(); + Eigen::VectorXd x(b.size()); + x.setZero(); + Eigen::VectorXd x_b(b.size()); + x_b.setZero(); + { + clock_t start, end; + json solver_info; + start = clock(); + auto solver = LinearSolver::create("Hypre", ""); + json params; + params["Hypre"]["tolerance"] = 1e-8; + params["Hypre"]["max_iter"] = 1000; + solver->setParameters(params); + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + solver->solve(b, x); + end = clock(); + solver->getInfo(solver_info); + std::cout << "Scalar Running time is " << double(end - start) / CLOCKS_PER_SEC << std::endl; + std::cout << solver_info["num_iterations"] << std::endl; + std::cout << solver_info["final_res_norm"] << std::endl; + } + { + clock_t start, end; + json solver_info; + start = clock(); + auto solver = LinearSolver::create("Hypre", ""); + json params; + params["Hypre"]["block_size"] = 3; + params["Hypre"]["tolerance"] = 1e-8; + params["Hypre"]["max_iter"] = 1000; + solver->setParameters(params); + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + solver->solve(b, x_b); + end = clock(); + solver->getInfo(solver_info); + std::cout << "Block Running time is " << double(end - start) / CLOCKS_PER_SEC << std::endl; + std::cout << solver_info["num_iterations"] << std::endl; + std::cout << solver_info["final_res_norm"] << std::endl; + } + const double err = (A * x - b).norm() / b.norm(); + const double err_b = (A * x_b - b).norm() / b.norm(); + std::cout << "Scalar relative error " << err << std::endl; + std::cout << "Block relative error " << err_b << std::endl; + REQUIRE(err < 1e-8); + REQUIRE(err_b < 1e-8); +} +#endif + +#ifdef POLYSOLVE_WITH_HYPRE +TEST_CASE("hypre_smallscale", "[solver]") +{ + const std::string path = POLYSOLVE_DATA_DIR; + Eigen::SparseMatrix A; + const bool ok = loadMarket(A, path + "/A_2.mat"); + REQUIRE(ok); + Eigen::VectorXd b(A.rows()); + b.setOnes(); + Eigen::VectorXd x(b.size()); + x.setZero(); + Eigen::VectorXd x_b(b.size()); + x_b.setZero(); + { + clock_t start, end; + json solver_info; + start = clock(); + auto solver = LinearSolver::create("Hypre", ""); + json params; + params["Hypre"]["tolerance"] = 1e-8; + params["Hypre"]["max_iter"] = 1000; + solver->setParameters(params); + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + solver->solve(b, x); + end = clock(); + solver->getInfo(solver_info); + std::cout << "Scalar Running time is " << double(end - start) / CLOCKS_PER_SEC << std::endl; + std::cout << solver_info["num_iterations"] << std::endl; + std::cout << solver_info["final_res_norm"] << std::endl; + } + { + clock_t start, end; + json solver_info; + start = clock(); + auto solver = LinearSolver::create("Hypre", ""); + json params; + params["Hypre"]["block_size"] = 3; + params["Hypre"]["tolerance"] = 1e-8; + params["Hypre"]["max_iter"] = 1000; + solver->setParameters(params); + solver->analyzePattern(A, A.rows()); + solver->factorize(A); + solver->solve(b, x_b); + end = clock(); + solver->getInfo(solver_info); + std::cout << "Block Running time is " << double(end - start) / CLOCKS_PER_SEC << std::endl; + std::cout << solver_info["num_iterations"] << std::endl; + std::cout << solver_info["final_res_norm"] << std::endl; + } + const double err = (A * x - b).norm() / b.norm(); + const double err_b = (A * x_b - b).norm() / b.norm(); + std::cout << "Scalar relative error " << err << std::endl; + std::cout << "Block relative error " << err_b << std::endl; + REQUIRE(err < 1e-8); + REQUIRE(err_b < 1e-8); + #ifdef POLYSOLVE_WITH_CUSOLVER TEST_CASE("cusolverdn", "[solver]") {