From fd565575b42c09a3304b4d0edcffcfd464ec0916 Mon Sep 17 00:00:00 2001 From: Thadeu Luiz Barbosa Dias Date: Mon, 23 Dec 2019 16:05:00 -0300 Subject: [PATCH] allocation cleanup and benchmarking integration --- .gitignore | 1 + CMakeLists.txt | 1 + lib/circuit/src/circuit.cpp | 17 +++-- lib/circuit/test/CMakeLists.txt | 10 ++- lib/circuit/test/circuit_test.cpp | 114 ++++-------------------------- test/src/test_main.cpp | 1 + third_party/CMakeLists.txt | 2 +- 7 files changed, 31 insertions(+), 115 deletions(-) diff --git a/.gitignore b/.gitignore index 567609b..a3a2dbd 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ build/ +.clangd/ diff --git a/CMakeLists.txt b/CMakeLists.txt index f9366db..2a4d7d1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON) find_package(PkgConfig REQUIRED) # external packages +find_package(Catch2 REQUIRED) find_package(Boost REQUIRED) find_package(Qt5 COMPONENTS Core Widgets REQUIRED) find_package(CUDA REQUIRED) diff --git a/lib/circuit/src/circuit.cpp b/lib/circuit/src/circuit.cpp index d072ae3..d6c283f 100644 --- a/lib/circuit/src/circuit.cpp +++ b/lib/circuit/src/circuit.cpp @@ -172,31 +172,30 @@ namespace rtspice::circuit { assert(row_begin == nodes_.pointers.end() && "not all coordinates used"); //get optimal pattern - const auto perm = std::make_unique(m); + int perm[m]; auto status = cusolverSpXcsrsymmdqHost(context_.solver_handle, m, nnz, - sys.desc_A, sys.row.get(), sys.col.get(), perm.get()); + sys.desc_A, sys.row.get(), sys.col.get(), perm); assert(status == CUSOLVER_STATUS_SUCCESS); //allocate permutation worksize size_t bsize; status = cusolverSpXcsrperm_bufferSizeHost(context_.solver_handle, m, m, nnz, - sys.desc_A, sys.row.get(), sys.col.get(), perm.get(), perm.get(), &bsize); + sys.desc_A, sys.row.get(), sys.col.get(), perm, perm, &bsize); assert(status == CUSOLVER_STATUS_SUCCESS); - const auto work = std::make_unique(bsize); - const auto map = std::make_unique(nnz); - iota(map.get(), map.get()+nnz, 0); + uint8_t work[bsize]; + int map[nnz]; + iota(map, map+nnz, 0); //perform Q * A * Q^T status = cusolverSpXcsrpermHost(context_.solver_handle, m, m, nnz, sys.desc_A, sys.row.get(), sys.col.get(), - perm.get(), perm.get(), map.get(), - work.get()); + perm, perm, map, work); assert(status == CUSOLVER_STATUS_SUCCESS); //update node name map for(auto&& [_, idx]: nodes_.names) - idx = find(perm.get(), perm.get()+m, idx) - perm.get(); + idx = find(perm, perm+m, idx) - perm; for(auto&& kv: nodes_.pointers){ diff --git a/lib/circuit/test/CMakeLists.txt b/lib/circuit/test/CMakeLists.txt index ead296c..cf46280 100644 --- a/lib/circuit/test/CMakeLists.txt +++ b/lib/circuit/test/CMakeLists.txt @@ -1,7 +1,5 @@ add_executable(circuit_test circuit_test.cpp) -target_link_libraries( - circuit_test - PRIVATE - circuit - test_main -) +target_link_libraries(circuit_test PRIVATE circuit test_main) + +include(Catch) +catch_discover_tests(circuit_test) diff --git a/lib/circuit/test/circuit_test.cpp b/lib/circuit/test/circuit_test.cpp index e30d387..06f75dd 100644 --- a/lib/circuit/test/circuit_test.cpp +++ b/lib/circuit/test/circuit_test.cpp @@ -22,6 +22,7 @@ #include #include +#define CATCH_CONFIG_ENABLE_BENCHMARKING #include @@ -38,9 +39,6 @@ using namespace rtspice::components; using std::vector; -using std::chrono::high_resolution_clock; -using std::chrono::nanoseconds; - using rtspice::circuit::circuit; SCENARIO("circuit initialization", "[circuit]") { @@ -80,17 +78,12 @@ SCENARIO("circuit initialization", "[circuit]") { CHECK(*c.get_x("1") == Approx(1.0f)); CHECK(*c.get_x("2") == Approx(0.5f)); - constexpr auto NITER = 44100; - - const auto start = high_resolution_clock::now(); - - for(auto i = 0; i < NITER; i++) + // check stability and benchmark + BENCHMARK("Newton-Raphson step on DC"){ c.nr_step_(); + }; - const auto delta = high_resolution_clock::now() - start; - const auto avgTime = nanoseconds{delta}.count()/NITER; - INFO( "average iteration time: " << avgTime << " ns"); CHECK(*c.get_x("1") == Approx(1.0f)); CHECK(*c.get_x("2") == Approx(0.5f)); } @@ -111,36 +104,16 @@ SCENARIO("nonlinear simulation", "[circuit]") { circuit c{components}; THEN("Newton-Raphson converges") { - { - const auto start = high_resolution_clock::now(); - const auto i = c.nr_step_(); - - const auto delta = high_resolution_clock::now() - start; - const auto RunTimeDC = nanoseconds{delta}.count(); - - - INFO("DC Runtime = " << RunTimeDC << " ns"); - INFO("V[diode] = " << *c.get_x("2") << " V"); CHECK( i > 0 ); - } - - { - const auto start = high_resolution_clock::now(); - - for(auto i = 0; i < 100; i++) c.nr_step_(); - - const auto delta = high_resolution_clock::now() - start; - const auto RunTimeBasicStep = nanoseconds{delta}.count(); + } - INFO("Average Runtime Basic Step = " << RunTimeBasicStep/100 << " ns"); - INFO("V[diode] = " << *c.get_x("2") << " V"); - REQUIRE(true); - } - - } + BENCHMARK("Newton-Raphson step on Diode"){ + c.nr_step_(); + }; + INFO("V[diode] = " << *c.get_x("2") << " V"); } } @@ -162,14 +135,7 @@ SCENARIO("dynamic simulation", "[circuit]") { constexpr int niter = 10; THEN("time simulation converges") { - const auto start = high_resolution_clock::now(); - const auto i = c.advance_(delta_t); - - const auto delta = high_resolution_clock::now() - start; - const auto RunTimeTran = nanoseconds{delta}.count(); - - INFO("Transient step Runtime = " << RunTimeTran << " ns"); INFO("V[cap] = " << *c.get_x("2") << " V"); REQUIRE( i > 0 ); } @@ -181,23 +147,15 @@ SCENARIO("dynamic simulation", "[circuit]") { const auto vptr = c.get_x("2"); - const auto start = high_resolution_clock::now(); - for(auto iter = 0; iter < niter; ++iter) { is[iter] = c.advance_(delta_t); vs[iter] = *vptr; } - const auto delta = high_resolution_clock::now() - start; - const auto RunTimeTran = nanoseconds{delta}.count(); - for(auto iter = 0; iter < niter; ++iter) { INFO("V[cap] = " << vs[iter] << " V"); CHECK(is[iter] > 0); } - INFO("Transient step Runtime = " << RunTimeTran/niter << " ns"); - REQUIRE(true); - } } @@ -222,14 +180,7 @@ SCENARIO("nonlinear dynamic simulation", "[circuit]") { constexpr int niter = 10; THEN("time simulation converges") { - const auto start = high_resolution_clock::now(); - const auto i = c.advance_(delta_t); - - const auto delta = high_resolution_clock::now() - start; - const auto RunTimeTran = nanoseconds{delta}.count(); - - INFO("Transient step Runtime = " << RunTimeTran << " ns"); INFO("V[cap] = " << *c.get_x("2") << " V"); REQUIRE( i > 0 ); } @@ -241,22 +192,15 @@ SCENARIO("nonlinear dynamic simulation", "[circuit]") { const auto vptr = c.get_x("2"); - const auto start = high_resolution_clock::now(); - for(auto iter = 0; iter < niter; ++iter) { is[iter] = c.advance_(delta_t); vs[iter] = *vptr; } - const auto delta = high_resolution_clock::now() - start; - const auto RunTimeTran = nanoseconds{delta}.count(); - for(auto iter = 0; iter < niter; ++iter) { INFO("V[cap] = " << vs[iter] << " V"); REQUIRE(is[iter] > 0); } - INFO("Transient step Runtime = " << RunTimeTran/niter << " ns"); - REQUIRE(true); } @@ -269,19 +213,12 @@ SCENARIO("nonlinear dynamic simulation", "[circuit]") { const auto vptr = c.get_x("2"); - const auto start = high_resolution_clock::now(); - for(auto iter = 0; iter < sim_size; ++iter) { is[iter] = c.advance_(delta_t); vs[iter] = *vptr; } - const auto delta = high_resolution_clock::now() - start; - const auto RunTimeTran = nanoseconds{delta}.count(); - REQUIRE(std::all_of(is.cbegin(), is.cend(), [](auto i){ return i > 0; })); - INFO("Transient step Runtime = " << RunTimeTran/sim_size << " ns"); - REQUIRE(true); std::ofstream v_file("sim_v.txt"); std::copy(vs.cbegin(), vs.cend(), std::ostream_iterator(v_file, "\n")); @@ -337,45 +274,24 @@ SCENARIO("basic circuit simulation", "[circuit]") { circuit c{components}; constexpr float delta_t = 1.0 / 44100.0; - constexpr int niter = 44100; THEN("basic simulation") { //std::vector vs(niter); - std::vector is(niter); + std::vector is; //std::vector> ts(niter); const auto vptr = c.get_x("7"); //c.nr_step_(); //dc point - const auto start = high_resolution_clock::now(); - - for(auto iter = 0; iter < niter; ++iter) { - is[iter] = c.advance_(delta_t); - //vs[iter] = *vptr; - //ts[iter] = high_resolution_clock::now(); - } + BENCHMARK_ADVANCED("basic simulation")(Catch::Benchmark::Chronometer meter) { + auto niter = meter.runs(); + is.resize(niter); + meter.measure([&](auto i){ is[i] = c.advance_(delta_t); }); + }; - const auto delta = high_resolution_clock::now() - start; - const auto RunTimeTran = nanoseconds{delta}.count(); CHECK(std::all_of(is.cbegin(), is.cend(), [](auto i){ return i > 0; })); - - const auto inner_it = std::accumulate(is.cbegin(), is.cend(), 0); - - INFO("Average solve runtime = " << RunTimeTran/inner_it << " ns"); - REQUIRE(true); - - //std::ofstream v_file("sim_vout.txt"); - //std::copy(vs.cbegin(), vs.cend(), std::ostream_iterator(v_file, "\n")); - - //std::ofstream i_file("sim_steps.txt"); - //std::copy(is.cbegin(), is.cend(), std::ostream_iterator(i_file, "\n")); - - //std::ofstream t_file("sim_times.txt"); - //auto it = std::ostream_iterator(t_file, "\n"); - //for(auto i = 1; i < niter; ++i) - // *it++ = nanoseconds{ts[i] - ts[i-1]}.count(); } } diff --git a/test/src/test_main.cpp b/test/src/test_main.cpp index c1ef972..b7d193e 100644 --- a/test/src/test_main.cpp +++ b/test/src/test_main.cpp @@ -15,5 +15,6 @@ * GNU General Public License as published by the Free Software Foundation. */ +#define CATCH_CONFIG_ENABLE_BENCHMARKING #define CATCH_CONFIG_MAIN #include "catch2/catch.hpp" diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt index 346ece9..5255117 100644 --- a/third_party/CMakeLists.txt +++ b/third_party/CMakeLists.txt @@ -1 +1 @@ -add_subdirectory(Catch2/) +#add_subdirectory(Catch2/)