diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml old mode 100755 new mode 100644 index edbc8446fbc..556f4cf833a --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -235,8 +235,8 @@ jobs: mkdir build && cd build && cmake -DCMAKE_CXX_STANDARD=23 -DCMAKE_CXX_COMPILER=icpx -DONEDPL_BACKEND=dpcpp -DCMAKE_BUILD_TYPE=Release .. && - cmake --build . --target shp-all-tests -j${nproc} && - ctest --test-dir . -L SHP -j 4" + cmake --build . --target shp-all-tests -j${BUILD_CONCURRENCY} && + ctest --test-dir . -L SHP -j${BUILD_CONCURRENCY}" docker logs -f dr-test exit_code=$(docker inspect dr-test --format='{{.State.ExitCode}}') docker rm -f dr-test diff --git a/include/oneapi/dpl/distributed-ranges b/include/oneapi/dpl/distributed-ranges index 18690d0615b..75588c75eda 100644 --- a/include/oneapi/dpl/distributed-ranges +++ b/include/oneapi/dpl/distributed-ranges @@ -19,4 +19,4 @@ #error "C++23 required to use Distributed Ranges" #endif -#endif /* _ONEDPL_DISTRIBUTED_RANGES */ +#endif // _ONEDPL_DISTRIBUTED_RANGES diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp.hpp index 38795eae3d6..0eb3f29ed3c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp.hpp @@ -4,7 +4,6 @@ #pragma once -// #include "detail/logger.hpp" #include "shp/algorithms/algorithms.hpp" #include "shp/detail.hpp" #include "shp/distributed_span.hpp" diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp index 92cff9224ed..9f3b574c055 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp @@ -23,7 +23,6 @@ template requires __detail::is_syclmemcopyable, std::iter_value_t> sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) { - // auto &&q = __detail::default_queue(); auto&& q = __detail::get_queue_for_pointers(first, d_first); return q.memcpy(std::to_address(d_first), std::to_address(first), sizeof(std::iter_value_t) * (last - first)); @@ -43,7 +42,6 @@ template requires __detail::is_syclmemcopyable, T> sycl::event copy_async(Iter first, Iter last, device_ptr d_first) { - // auto &&q = __detail::default_queue(); auto&& q = __detail::get_queue_for_pointers(first, d_first); return q.memcpy(d_first.get_raw_pointer(), std::to_address(first), sizeof(T) * (last - first)); } @@ -61,7 +59,6 @@ template requires __detail::is_syclmemcopyable> sycl::event copy_async(device_ptr first, device_ptr last, Iter d_first) { - // auto &&q = __detail::default_queue(); auto&& q = __detail::get_queue_for_pointers(first, d_first); return q.memcpy(std::to_address(d_first), first.get_raw_pointer(), sizeof(T) * (last - first)); } @@ -79,7 +76,6 @@ template requires(!std::is_const_v && std::is_trivially_copyable_v) sycl::event copy_async(device_ptr> first, device_ptr> last, device_ptr d_first) { - // auto &&q = __detail::default_queue(); auto&& q = __detail::get_queue_for_pointers(first, d_first); return q.memcpy(d_first.get_raw_pointer(), first.get_raw_pointer(), sizeof(T) * (last - first)); } diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp index 94d07078cfd..878d02f924c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp @@ -24,7 +24,7 @@ requires(!std::is_const_v> && std::is_trivially_copyable { auto&& q = __detail::get_queue_for_pointer(first); std::iter_value_t* arr = std::to_address(first); - // not using q.fill because of CMPLRLLVM-46438 + // not using q.fill because p2p communication is not working when using sycl::queue.fill or sycl::queue.parallel_for with nondefault KernelName (CMPLRLLVM-46438) return dr::__detail::parallel_for(q, sycl::range<>(last - first), [=](auto idx) { arr[idx] = value; }); } @@ -41,7 +41,7 @@ requires(std::indirectly_writable, U>) sycl::event { auto&& q = __detail::get_queue_for_pointer(first); auto* arr = first.get_raw_pointer(); - // not using q.fill because of CMPLRLLVM-46438 + // not using q.fill because p2p communication is not working when using sycl::queue.fill or sycl::queue.parallel_for with nondefault KernelName (CMPLRLLVM-46438) return dr::__detail::parallel_for(q, sycl::range<>(last - first), [=](auto idx) { arr[idx] = value; }); } @@ -57,7 +57,7 @@ fill_async(R&& r, const T& value) { auto&& q = __detail::queue(ranges::rank(r)); auto* arr = std::to_address(rng::begin(ranges::local(r))); - // not using q.fill because of CMPLRLLVM-46438 + // not using q.fill because p2p communication is not working when using sycl::queue.fill or sycl::queue.parallel_for with nondefault KernelName (CMPLRLLVM-46438) return dr::__detail::parallel_for(q, sycl::range<>(rng::distance(r)), [=](auto idx) { arr[idx] = value; }); } diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp index 0eaa5e298ab..004c96fc9b3 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp @@ -122,7 +122,7 @@ queue(std::size_t rank) return queues_[rank]; } -// Retrieve global queues because of CMPLRLLVM-47008 +// Retrieve global queues because of observed significantly reduced copy performance when using newly constructed queues versus using pre-constructed global queues. (CMPLRLLVM-47008) inline sycl::queue& queue(const sycl::device& device) {