From fad9b78e881b7972bd2d86d999db5b188f32cbb4 Mon Sep 17 00:00:00 2001 From: Ahsan Saghir <142340507+ahsan-ca@users.noreply.github.com> Date: Tue, 23 Apr 2024 12:16:37 -0400 Subject: [PATCH 1/2] Limit parallelism for constant propagation when parallel STL is not enabled (#2987) --- src/propagate_constant.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/propagate_constant.cpp b/src/propagate_constant.cpp index c917d29b3ed..6fffef1edaf 100644 --- a/src/propagate_constant.cpp +++ b/src/propagate_constant.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include namespace migraphx { @@ -83,7 +84,12 @@ void propagate_constant::apply(module& m) const // Compute literals in parallel std::vector const_instrs_vec{const_instrs.begin(), const_instrs.end()}; std::vector literals(const_instrs_vec.size()); - simple_par_for(const_instrs_vec.size(), 1, [&](const auto i) { + std::size_t n = 1; +#if !MIGRAPHX_HAS_EXECUTORS + n = std::max( + std::ceil(static_cast(1024) / std::thread::hardware_concurrency()), 1); +#endif + simple_par_for(const_instrs_vec.size(), n, [&](const auto i) { literals[i] = const_instrs_vec[i]->eval(); }); From cba5184266ebbe483db62cfcfd3953645568c022 Mon Sep 17 00:00:00 2001 From: Ahsan Saghir Date: Tue, 23 Apr 2024 18:40:46 +0000 Subject: [PATCH 2/2] Changes from 2993 to limit parallelism more conservatively --- src/propagate_constant.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/propagate_constant.cpp b/src/propagate_constant.cpp index 6fffef1edaf..3a6856d9450 100644 --- a/src/propagate_constant.cpp +++ b/src/propagate_constant.cpp @@ -84,12 +84,12 @@ void propagate_constant::apply(module& m) const // Compute literals in parallel std::vector const_instrs_vec{const_instrs.begin(), const_instrs.end()}; std::vector literals(const_instrs_vec.size()); - std::size_t n = 1; + std::size_t grainsize = 1; #if !MIGRAPHX_HAS_EXECUTORS - n = std::max( - std::ceil(static_cast(1024) / std::thread::hardware_concurrency()), 1); + std::size_t n = std::max(2048 / std::thread::hardware_concurrency(), 1); + grainsize = const_instrs_vec.size() / n; #endif - simple_par_for(const_instrs_vec.size(), n, [&](const auto i) { + simple_par_for(const_instrs_vec.size(), grainsize, [&](const auto i) { literals[i] = const_instrs_vec[i]->eval(); });