diff --git a/include/dlaf/matrix/matrix.h b/include/dlaf/matrix/matrix.h index 0e48a87022..53244137e5 100644 --- a/include/dlaf/matrix/matrix.h +++ b/include/dlaf/matrix/matrix.h @@ -129,6 +129,15 @@ class Matrix : public Matrix { return readwrite(this->distribution().localTileIndex(index)); } +private: + using typename Matrix::SubPipelineTag; + Matrix(Matrix& mat, const SubPipelineTag); + +public: + Matrix subPipeline() { + return Matrix(*this, SubPipelineTag{}); + } + protected: using Matrix::tileLinearIndex; @@ -186,10 +195,18 @@ class Matrix : public internal::MatrixBase { /// involving any of the locally available tiles are completed. void waitLocalTiles() noexcept; + Matrix subPipelineConst() { + return Matrix(*this, SubPipelineTag{}); + } + protected: Matrix(Distribution distribution) : internal::MatrixBase{std::move(distribution)} {} + struct SubPipelineTag {}; + Matrix(Matrix& mat, const SubPipelineTag); + void setUpTiles(const memory::MemoryView& mem, const LayoutInfo& layout) noexcept; + void setUpSubPipelines(Matrix&) noexcept; std::vector> tile_managers_; }; diff --git a/include/dlaf/matrix/matrix.tpp b/include/dlaf/matrix/matrix.tpp index 5dbe70cbc0..09f68fc401 100644 --- a/include/dlaf/matrix/matrix.tpp +++ b/include/dlaf/matrix/matrix.tpp @@ -56,5 +56,8 @@ Matrix::Matrix(Distribution distribution, const LayoutInfo& layout, Elemen template Matrix::Matrix(const LayoutInfo& layout, ElementType* ptr) : Matrix(layout, ptr) {} +template +Matrix::Matrix(Matrix& mat, const SubPipelineTag tag) : Matrix(mat, tag) {} + } } diff --git a/include/dlaf/matrix/matrix_const.tpp b/include/dlaf/matrix/matrix_const.tpp index b0bb711051..87883d052d 100644 --- a/include/dlaf/matrix/matrix_const.tpp +++ b/include/dlaf/matrix/matrix_const.tpp @@ -72,5 +72,29 @@ void Matrix::setUpTiles(const memory::MemoryView& me } } +template +Matrix::Matrix(Matrix& mat, const SubPipelineTag) + : MatrixBase(mat.distribution()) { + setUpSubPipelines(mat); +} + +template +void Matrix::setUpSubPipelines(Matrix& mat) noexcept { + namespace ex = pika::execution::experimental; + + // TODO: Optimize read-after-read. This is currently forced to access the base + // matrix in readwrite mode so that we can move the tile into the + // sub-pipeline. This is semantically not required and should eventually be + // optimized. + tile_managers_.reserve(mat.tile_managers_.size()); + for (auto& tm : mat.tile_managers_) { + tile_managers_.emplace_back(Tile()); + auto s = ex::when_all(tile_managers_.back().readwrite_with_wrapper(), tm.readwrite()) | + ex::then([](internal::TileAsyncRwMutexReadWriteWrapper empty_tile_wrapper, + Tile tile) { empty_tile_wrapper.get() = std::move(tile); }); + ex::start_detached(std::move(s)); + } +} + } } diff --git a/test/unit/matrix/test_matrix.cpp b/test/unit/matrix/test_matrix.cpp index 19b03f99ff..65694a6328 100644 --- a/test/unit/matrix/test_matrix.cpp +++ b/test/unit/matrix/test_matrix.cpp @@ -93,18 +93,41 @@ TYPED_TEST(MatrixLocalTest, StaticAPIConst) { TYPED_TEST(MatrixLocalTest, Constructor) { using Type = TypeParam; - auto el = [](const GlobalElementIndex& index) { + BaseType c = 0.0; + auto el = [&](const GlobalElementIndex& index) { SizeType i = index.row(); SizeType j = index.col(); - return TypeUtilities::element(i + j / 1024., j - i / 128.); + return TypeUtilities::element(i + j / 1024. + c, j - i / 128.); }; for (const auto& test : sizes_tests) { Matrix mat(test.size, test.block_size); - EXPECT_EQ(Distribution(test.size, test.block_size), mat.distribution()); + { + EXPECT_EQ(Distribution(test.size, test.block_size), mat.distribution()); + + set(mat, el); + + CHECK_MATRIX_EQ(el, mat); + } + + { + auto mat_sub = mat.subPipelineConst(); + EXPECT_EQ(mat_sub.distribution(), mat.distribution()); + + CHECK_MATRIX_EQ(el, mat_sub); + } + + c = 1.0; + + { + auto mat_sub = mat.subPipeline(); + EXPECT_EQ(mat_sub.distribution(), mat.distribution()); + + set(mat_sub, el); - set(mat, el); + CHECK_MATRIX_EQ(el, mat_sub); + } CHECK_MATRIX_EQ(el, mat); } @@ -112,10 +135,11 @@ TYPED_TEST(MatrixLocalTest, Constructor) { TYPED_TEST(MatrixTest, Constructor) { using Type = TypeParam; - auto el = [](const GlobalElementIndex& index) { + BaseType c = 0.0; + auto el = [&](const GlobalElementIndex& index) { SizeType i = index.row(); SizeType j = index.col(); - return TypeUtilities::element(i + j / 1024., j - i / 128.); + return TypeUtilities::element(i + j / 1024. + c, j - i / 128.); }; for (const auto& comm_grid : this->commGrids()) { @@ -123,10 +147,32 @@ TYPED_TEST(MatrixTest, Constructor) { GlobalElementSize size = globalTestSize(test.size, comm_grid.size()); Matrix mat(size, test.block_size, comm_grid); - EXPECT_EQ(Distribution(size, test.block_size, comm_grid.size(), comm_grid.rank(), {0, 0}), - mat.distribution()); + { + EXPECT_EQ(Distribution(size, test.block_size, comm_grid.size(), comm_grid.rank(), {0, 0}), + mat.distribution()); - set(mat, el); + set(mat, el); + + CHECK_MATRIX_EQ(el, mat); + } + + { + auto mat_sub = mat.subPipelineConst(); + EXPECT_EQ(mat_sub.distribution(), mat.distribution()); + + CHECK_MATRIX_EQ(el, mat_sub); + } + + c = 1.0; + + { + auto mat_sub = mat.subPipeline(); + EXPECT_EQ(mat_sub.distribution(), mat.distribution()); + + set(mat_sub, el); + + CHECK_MATRIX_EQ(el, mat_sub); + } CHECK_MATRIX_EQ(el, mat); } @@ -135,10 +181,11 @@ TYPED_TEST(MatrixTest, Constructor) { TYPED_TEST(MatrixTest, ConstructorFromDistribution) { using Type = TypeParam; - auto el = [](const GlobalElementIndex& index) { + BaseType c = 0.0; + auto el = [&](const GlobalElementIndex& index) { SizeType i = index.row(); SizeType j = index.col(); - return TypeUtilities::element(i + j / 1024., j - i / 128.); + return TypeUtilities::element(i + j / 1024. + c, j - i / 128.); }; for (const auto& comm_grid : this->commGrids()) { @@ -154,9 +201,31 @@ TYPED_TEST(MatrixTest, ConstructorFromDistribution) { Matrix mat(std::move(distribution)); - EXPECT_EQ(distribution_copy, mat.distribution()); + { + EXPECT_EQ(distribution_copy, mat.distribution()); + + set(mat, el); - set(mat, el); + CHECK_MATRIX_EQ(el, mat); + } + + { + auto mat_sub = mat.subPipelineConst(); + EXPECT_EQ(mat_sub.distribution(), mat.distribution()); + + CHECK_MATRIX_EQ(el, mat_sub); + } + + c = 1.0; + + { + auto mat_sub = mat.subPipeline(); + EXPECT_EQ(mat_sub.distribution(), mat.distribution()); + + set(mat_sub, el); + + CHECK_MATRIX_EQ(el, mat_sub); + } CHECK_MATRIX_EQ(el, mat); } @@ -304,12 +373,35 @@ TYPED_TEST(MatrixTest, ConstructorFromDistributionLayout) { Distribution distribution_copy(distribution); Matrix mat(std::move(distribution), layout); + Type* ptr = nullptr; if (!mat.distribution().localSize().isEmpty()) { ptr = tt::sync_wait(mat.readwrite(LocalTileIndex(0, 0))).ptr(); } CHECK_DISTRIBUTION_LAYOUT(ptr, distribution_copy, layout, mat); + + { + auto mat_sub = mat.subPipelineConst(); + + const Type* ptr_sub = nullptr; + if (!mat_sub.distribution().localSize().isEmpty()) { + ptr_sub = tt::sync_wait(mat_sub.read(LocalTileIndex(0, 0))).get().ptr(); + } + + ASSERT_EQ(ptr, ptr_sub); + } + + { + auto mat_sub = mat.subPipeline(); + + Type* ptr_sub = nullptr; + if (!mat_sub.distribution().localSize().isEmpty()) { + ptr_sub = tt::sync_wait(mat_sub.readwrite(LocalTileIndex(0, 0))).ptr(); + } + + ASSERT_EQ(ptr, ptr_sub); + } } } } @@ -343,6 +435,18 @@ TYPED_TEST(MatrixTest, LocalGlobalAccessOperatorCall) { EXPECT_NE(ptr_global, nullptr); EXPECT_EQ(ptr_global, ptr_local); + + const TypeParam* ptr_sub_global = [&]() { + auto mat_sub = mat.subPipeline(); + return tt::sync_wait(mat_sub.readwrite(global_index)).ptr(TileElementIndex{0, 0}); + }(); + const TypeParam* ptr_sub_local = [&]() { + auto mat_sub = mat.subPipeline(); + return tt::sync_wait(mat_sub.readwrite(local_index)).ptr(TileElementIndex{0, 0}); + }(); + + EXPECT_EQ(ptr_sub_global, ptr_global); + EXPECT_EQ(ptr_sub_local, ptr_global); } } } @@ -373,12 +477,36 @@ TYPED_TEST(MatrixTest, LocalGlobalAccessRead) { LocalTileIndex local_index = dist.localTileIndex(global_index); const TypeParam* ptr_global = - tt::sync_wait(mat.readwrite(global_index)).ptr(TileElementIndex{0, 0}); + tt::sync_wait(mat.read(global_index)).get().ptr(TileElementIndex{0, 0}); const TypeParam* ptr_local = - tt::sync_wait(mat.readwrite(local_index)).ptr(TileElementIndex{0, 0}); + tt::sync_wait(mat.read(local_index)).get().ptr(TileElementIndex{0, 0}); EXPECT_NE(ptr_global, nullptr); EXPECT_EQ(ptr_global, ptr_local); + + const TypeParam* ptr_sub_global = [&]() { + auto mat_sub = mat.subPipeline(); + return tt::sync_wait(mat_sub.read(global_index)).get().ptr(TileElementIndex{0, 0}); + }(); + const TypeParam* ptr_sub_local = [&]() { + auto mat_sub = mat.subPipeline(); + return tt::sync_wait(mat_sub.read(local_index)).get().ptr(TileElementIndex{0, 0}); + }(); + + EXPECT_EQ(ptr_sub_global, ptr_global); + EXPECT_EQ(ptr_sub_local, ptr_global); + + const TypeParam* ptr_sub_const_global = [&]() { + auto mat_sub = mat.subPipelineConst(); + return tt::sync_wait(mat_sub.read(global_index)).get().ptr(TileElementIndex{0, 0}); + }(); + const TypeParam* ptr_sub_const_local = [&]() { + auto mat_sub = mat.subPipelineConst(); + return tt::sync_wait(mat_sub.read(local_index)).get().ptr(TileElementIndex{0, 0}); + }(); + + EXPECT_EQ(ptr_sub_const_global, ptr_global); + EXPECT_EQ(ptr_sub_const_local, ptr_global); } } } @@ -417,6 +545,16 @@ TYPED_TEST(MatrixLocalTest, ConstructorExisting) { Matrix mat(layout, mem()); CHECK_LAYOUT_LOCAL(mem(), layout, mat); + + { + auto mat_sub = mat.subPipeline(); + CHECK_LAYOUT_LOCAL(mem(), layout, mat_sub); + } + + { + auto mat_sub_const = mat.subPipelineConst(); + CHECK_LAYOUT_LOCAL(mem(), layout, mat_sub_const); + } } } @@ -431,6 +569,11 @@ TYPED_TEST(MatrixLocalTest, ConstructorExistingConst) { Matrix mat(layout, p); CHECK_LAYOUT_LOCAL(mem(), layout, mat); + + { + auto mat_sub_const = mat.subPipelineConst(); + CHECK_LAYOUT_LOCAL(mem(), layout, mat_sub_const); + } } } @@ -450,6 +593,16 @@ TYPED_TEST(MatrixTest, ConstructorExisting) { Matrix mat(std::move(distribution), layout, mem()); CHECK_DISTRIBUTION_LAYOUT(mem(), distribution_copy, layout, mat); + + { + auto mat_sub = mat.subPipeline(); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution_copy, layout, mat_sub); + } + + { + auto mat_sub_const = mat.subPipelineConst(); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution_copy, layout, mat_sub_const); + } } } } @@ -472,6 +625,11 @@ TYPED_TEST(MatrixTest, ConstructorExistingConst) { Matrix mat(std::move(distribution), layout, p); CHECK_DISTRIBUTION_LAYOUT(mem(), distribution_copy, layout, mat); + + { + auto mat_sub_const = mat.subPipelineConst(); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution_copy, layout, mat_sub_const); + } } } } @@ -528,64 +686,112 @@ TYPED_TEST(MatrixTest, Dependencies) { } } -TYPED_TEST(MatrixTest, DependenciesConst) { +TYPED_TEST(MatrixTest, DependenciesSubPipeline) { using Type = TypeParam; for (const auto& comm_grid : this->commGrids()) { for (const auto& test : sizes_tests) { + // Dependencies graph: + // rw0 - rw1 - ro2a - rw3 - ro4a - rw5 + // \ ro2b / \ ro4b / + // + // +--------+ + // sub pipeline + GlobalElementSize size = globalTestSize(test.size, comm_grid.size()); + Matrix mat(size, test.block_size, comm_grid); - Distribution distribution(size, test.block_size, comm_grid.size(), comm_grid.rank(), {0, 0}); - LayoutInfo layout = tileLayout(distribution.localSize(), test.block_size); - memory::MemoryView mem(layout.minMemSize()); - const Type* p = mem(); - Matrix mat(std::move(distribution), layout, p); - auto rosenders1 = getReadSendersUsingGlobalIndex(mat); - EXPECT_TRUE(checkSendersStep(rosenders1.size(), rosenders1)); + auto senders0 = getReadWriteSendersUsingLocalIndex(mat); + EXPECT_TRUE(checkSendersStep(senders0.size(), senders0)); - auto rosenders2 = getReadSendersUsingLocalIndex(mat); - EXPECT_TRUE(checkSendersStep(rosenders2.size(), rosenders2)); + auto senders1 = getReadWriteSendersUsingGlobalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, senders1)); + + auto [rosenders2a, rosenders2b, senders3] = [&]() { + auto mat_sub = mat.subPipeline(); + + auto rosenders2a = getReadSendersUsingLocalIndex(mat_sub); + EXPECT_TRUE(checkSendersStep(0, rosenders2a)); + + auto rosenders2b = getReadSendersUsingGlobalIndex(mat_sub); + EXPECT_TRUE(checkSendersStep(0, rosenders2b)); + + auto senders3 = getReadWriteSendersUsingLocalIndex(mat_sub); + EXPECT_TRUE(checkSendersStep(0, senders3)); + + return std::tuple(std::move(rosenders2a), std::move(rosenders2b), std::move(senders3)); + }(); + + auto rosenders4a = getReadSendersUsingGlobalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, rosenders4a)); + + CHECK_MATRIX_SENDERS(true, senders1, senders0); + EXPECT_TRUE(checkSendersStep(0, rosenders2b)); + CHECK_MATRIX_SENDERS(true, rosenders2b, senders1); + EXPECT_TRUE(checkSendersStep(rosenders2a.size(), rosenders2a)); + + CHECK_MATRIX_SENDERS(false, senders3, rosenders2b); + CHECK_MATRIX_SENDERS(true, senders3, rosenders2a); + + CHECK_MATRIX_SENDERS(true, rosenders4a, senders3); + + auto rosenders4b = getReadSendersUsingLocalIndex(mat); + EXPECT_TRUE(checkSendersStep(rosenders4b.size(), rosenders4b)); + + auto senders5 = getReadWriteSendersUsingGlobalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, senders5)); + + CHECK_MATRIX_SENDERS(false, senders5, rosenders4a); + CHECK_MATRIX_SENDERS(true, senders5, rosenders4b); } } } -TYPED_TEST(MatrixTest, DependenciesReferenceMix) { +TYPED_TEST(MatrixTest, DependenciesSubSubPipeline) { using Type = TypeParam; for (const auto& comm_grid : this->commGrids()) { for (const auto& test : sizes_tests) { // Dependencies graph: - // rw0 - rw1 - ro2a - rw3 - ro4a - rw5 - // \ ro2b / \ ro4b / + // rw0 - rw1 - ro2a ------- rw3 - ro4a ------- rw5 + // \ ------ ro2b / \ ----- ro4b / + // + // +---------------------+ + // sub pipeline + // + // +-------+ + // sub-sub pipeline GlobalElementSize size = globalTestSize(test.size, comm_grid.size()); Matrix mat(size, test.block_size, comm_grid); - auto senders0 = getReadWriteSendersUsingGlobalIndex(mat); + auto senders0 = getReadWriteSendersUsingLocalIndex(mat); EXPECT_TRUE(checkSendersStep(senders0.size(), senders0)); - auto senders1 = getReadWriteSendersUsingLocalIndex(mat); + auto senders1 = getReadWriteSendersUsingGlobalIndex(mat); EXPECT_TRUE(checkSendersStep(0, senders1)); - auto rosenders2a = getReadSendersUsingGlobalIndex(mat); - EXPECT_TRUE(checkSendersStep(0, rosenders2a)); + auto [rosenders2a, rosenders2b, senders3, rosenders4a] = [&]() { + auto mat_sub = mat.subPipeline(); - decltype(rosenders2a) rosenders2b; - { - Matrix& const_mat = mat; - rosenders2b = getReadSendersUsingLocalIndex(const_mat); - EXPECT_TRUE(checkSendersStep(0, rosenders2b)); - } + auto rosenders2a = getReadSendersUsingLocalIndex(mat_sub); + EXPECT_TRUE(checkSendersStep(0, rosenders2a)); - auto senders3 = getReadWriteSendersUsingGlobalIndex(mat); - EXPECT_TRUE(checkSendersStep(0, senders3)); + auto [rosenders2b, senders3] = [&]() { + auto rosenders2b = getReadSendersUsingGlobalIndex(mat_sub); + EXPECT_TRUE(checkSendersStep(0, rosenders2b)); - decltype(rosenders2a) rosenders4a; - { - Matrix& const_mat = mat; - rosenders4a = getReadSendersUsingLocalIndex(const_mat); + auto senders3 = getReadWriteSendersUsingLocalIndex(mat_sub); + EXPECT_TRUE(checkSendersStep(0, senders3)); + return std::tuple(std::move(rosenders2b), std::move(senders3)); + }(); + + auto rosenders4a = getReadSendersUsingGlobalIndex(mat); EXPECT_TRUE(checkSendersStep(0, rosenders4a)); - } + + return std::tuple(std::move(rosenders2a), std::move(rosenders2b), std::move(senders3), + std::move(rosenders4a)); + }(); CHECK_MATRIX_SENDERS(true, senders1, senders0); EXPECT_TRUE(checkSendersStep(0, rosenders2b)); @@ -597,10 +803,10 @@ TYPED_TEST(MatrixTest, DependenciesReferenceMix) { CHECK_MATRIX_SENDERS(true, rosenders4a, senders3); - auto rosenders4b = getReadSendersUsingGlobalIndex(mat); + auto rosenders4b = getReadSendersUsingLocalIndex(mat); EXPECT_TRUE(checkSendersStep(rosenders4b.size(), rosenders4b)); - auto senders5 = getReadWriteSendersUsingLocalIndex(mat); + auto senders5 = getReadWriteSendersUsingGlobalIndex(mat); EXPECT_TRUE(checkSendersStep(0, senders5)); CHECK_MATRIX_SENDERS(false, senders5, rosenders4a); @@ -609,14 +815,17 @@ TYPED_TEST(MatrixTest, DependenciesReferenceMix) { } } -TYPED_TEST(MatrixTest, DependenciesPointerMix) { +TYPED_TEST(MatrixTest, DependenciesSubPipelineConst) { using Type = TypeParam; for (const auto& comm_grid : this->commGrids()) { for (const auto& test : sizes_tests) { // Dependencies graph: // rw0 - rw1 - ro2a - rw3 - ro4a - rw5 - // \ ro2b / \ ro4b / + // \ ro2b / \ ro4b / + // + // +--+ + // sub pipeline GlobalElementSize size = globalTestSize(test.size, comm_grid.size()); Matrix mat(size, test.block_size, comm_grid); @@ -627,25 +836,23 @@ TYPED_TEST(MatrixTest, DependenciesPointerMix) { auto senders1 = getReadWriteSendersUsingGlobalIndex(mat); EXPECT_TRUE(checkSendersStep(0, senders1)); - auto rosenders2a = getReadSendersUsingLocalIndex(mat); - EXPECT_TRUE(checkSendersStep(0, rosenders2a)); + auto [rosenders2a, rosenders2b] = [&]() { + auto mat_sub = mat.subPipelineConst(); - decltype(rosenders2a) rosenders2b; - { - Matrix* const_mat = &mat; - rosenders2b = getReadSendersUsingGlobalIndex(*const_mat); + auto rosenders2a = getReadSendersUsingLocalIndex(mat_sub); + EXPECT_TRUE(checkSendersStep(0, rosenders2a)); + + auto rosenders2b = getReadSendersUsingGlobalIndex(mat_sub); EXPECT_TRUE(checkSendersStep(0, rosenders2b)); - } + + return std::tuple(std::move(rosenders2a), std::move(rosenders2b)); + }(); auto senders3 = getReadWriteSendersUsingLocalIndex(mat); EXPECT_TRUE(checkSendersStep(0, senders3)); - decltype(rosenders2a) rosenders4a; - { - Matrix* const_mat = &mat; - rosenders4a = getReadSendersUsingGlobalIndex(*const_mat); - EXPECT_TRUE(checkSendersStep(0, rosenders4a)); - } + auto rosenders4a = getReadSendersUsingGlobalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, rosenders4a)); CHECK_MATRIX_SENDERS(true, senders1, senders0); EXPECT_TRUE(checkSendersStep(0, rosenders2b)); @@ -669,87 +876,347 @@ TYPED_TEST(MatrixTest, DependenciesPointerMix) { } } -TYPED_TEST(MatrixTest, TileSize) { +TYPED_TEST(MatrixTest, DependenciesConst) { using Type = TypeParam; for (const auto& comm_grid : this->commGrids()) { for (const auto& test : sizes_tests) { GlobalElementSize size = globalTestSize(test.size, comm_grid.size()); - Matrix mat(size, test.block_size, comm_grid); - for (SizeType i = 0; i < mat.nrTiles().rows(); ++i) { - SizeType mb = mat.blockSize().rows(); - SizeType ib = std::min(mb, mat.size().rows() - i * mb); - for (SizeType j = 0; j < mat.nrTiles().cols(); ++j) { - SizeType nb = mat.blockSize().cols(); - SizeType jb = std::min(nb, mat.size().cols() - j * nb); - EXPECT_EQ(TileElementSize(ib, jb), mat.tileSize({i, j})); - } - } + Distribution distribution(size, test.block_size, comm_grid.size(), comm_grid.rank(), {0, 0}); + LayoutInfo layout = tileLayout(distribution.localSize(), test.block_size); + memory::MemoryView mem(layout.minMemSize()); + const Type* p = mem(); + Matrix mat(std::move(distribution), layout, p); + auto rosenders1 = getReadSendersUsingGlobalIndex(mat); + EXPECT_TRUE(checkSendersStep(rosenders1.size(), rosenders1)); + + auto rosenders2 = getReadSendersUsingLocalIndex(mat); + EXPECT_TRUE(checkSendersStep(rosenders2.size(), rosenders2)); } } } -struct TestLocalColMajor { - LocalElementSize size; - TileElementSize block_size; - SizeType ld; -}; - -const std::vector col_major_sizes_tests({ - {{10, 7}, {3, 4}, 10}, // packed ld - {{10, 7}, {3, 4}, 11}, // padded ld - {{6, 11}, {4, 3}, 6}, // packed ld - {{6, 11}, {4, 3}, 7}, // padded ld -}); - -template -bool haveConstElements(const Matrix&) { - return false; -} - -template -bool haveConstElements(const Matrix&) { - return true; -} - -TYPED_TEST(MatrixLocalTest, FromColMajor) { +TYPED_TEST(MatrixTest, DependenciesConstSubPipelineConst) { using Type = TypeParam; - for (const auto& test : col_major_sizes_tests) { - LayoutInfo layout = colMajorLayout(test.size, test.block_size, test.ld); - memory::MemoryView mem(layout.minMemSize()); - auto mat = createMatrixFromColMajor(test.size, test.block_size, test.ld, mem()); - ASSERT_FALSE(haveConstElements(mat)); - - CHECK_LAYOUT_LOCAL(mem(), layout, mat); - } -} - -TYPED_TEST(MatrixLocalTest, FromColMajorConst) { - using Type = TypeParam; + for (const auto& comm_grid : this->commGrids()) { + for (const auto& test : sizes_tests) { + GlobalElementSize size = globalTestSize(test.size, comm_grid.size()); - for (const auto& test : col_major_sizes_tests) { - LayoutInfo layout = colMajorLayout(test.size, test.block_size, test.ld); - memory::MemoryView mem(layout.minMemSize()); - const Type* p = mem(); - auto mat = createMatrixFromColMajor(test.size, test.block_size, test.ld, p); - ASSERT_TRUE(haveConstElements(mat)); + Distribution distribution(size, test.block_size, comm_grid.size(), comm_grid.rank(), {0, 0}); + LayoutInfo layout = tileLayout(distribution.localSize(), test.block_size); + memory::MemoryView mem(layout.minMemSize()); + const Type* p = mem(); + Matrix mat(std::move(distribution), layout, p); + auto rosenders1 = getReadSendersUsingGlobalIndex(mat); + EXPECT_TRUE(checkSendersStep(rosenders1.size(), rosenders1)); - CHECK_LAYOUT_LOCAL(mem(), layout, mat); + auto rosenders2 = [&]() { + auto mat_sub = mat.subPipelineConst(); + return getReadSendersUsingLocalIndex(mat_sub); + }(); + // NOTE: This is a limitation of the current implementation. Semantically + // read-only access in sub-pipelines should be fused with read-only access + // from the parent pipeline. + EXPECT_TRUE(checkSendersStep(0, rosenders2)); + CHECK_MATRIX_SENDERS(true, rosenders2, rosenders1); + + auto rosenders3 = getReadSendersUsingLocalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, rosenders3)); + CHECK_MATRIX_SENDERS(true, rosenders3, rosenders2); + } } } -TYPED_TEST(MatrixTest, FromColMajor) { +TYPED_TEST(MatrixTest, DependenciesReferenceMix) { using Type = TypeParam; for (const auto& comm_grid : this->commGrids()) { for (const auto& test : sizes_tests) { + // Dependencies graph: + // rw0 - rw1 - ro2a - rw3 - ro4a - rw5 + // \ ro2b / \ ro4b / + GlobalElementSize size = globalTestSize(test.size, comm_grid.size()); + Matrix mat(size, test.block_size, comm_grid); - { - // src_rank = {0, 0} - Distribution distribution(size, test.block_size, comm_grid.size(), comm_grid.rank(), {0, 0}); + auto senders0 = getReadWriteSendersUsingGlobalIndex(mat); + EXPECT_TRUE(checkSendersStep(senders0.size(), senders0)); + + auto senders1 = getReadWriteSendersUsingLocalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, senders1)); + + auto rosenders2a = getReadSendersUsingGlobalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, rosenders2a)); + + decltype(rosenders2a) rosenders2b; + { + Matrix& const_mat = mat; + rosenders2b = getReadSendersUsingLocalIndex(const_mat); + EXPECT_TRUE(checkSendersStep(0, rosenders2b)); + } + + auto senders3 = getReadWriteSendersUsingGlobalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, senders3)); + + decltype(rosenders2a) rosenders4a; + { + Matrix& const_mat = mat; + rosenders4a = getReadSendersUsingLocalIndex(const_mat); + EXPECT_TRUE(checkSendersStep(0, rosenders4a)); + } + + CHECK_MATRIX_SENDERS(true, senders1, senders0); + EXPECT_TRUE(checkSendersStep(0, rosenders2b)); + CHECK_MATRIX_SENDERS(true, rosenders2b, senders1); + EXPECT_TRUE(checkSendersStep(rosenders2a.size(), rosenders2a)); + + CHECK_MATRIX_SENDERS(false, senders3, rosenders2b); + CHECK_MATRIX_SENDERS(true, senders3, rosenders2a); + + CHECK_MATRIX_SENDERS(true, rosenders4a, senders3); + + auto rosenders4b = getReadSendersUsingGlobalIndex(mat); + EXPECT_TRUE(checkSendersStep(rosenders4b.size(), rosenders4b)); + + auto senders5 = getReadWriteSendersUsingLocalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, senders5)); + + CHECK_MATRIX_SENDERS(false, senders5, rosenders4a); + CHECK_MATRIX_SENDERS(true, senders5, rosenders4b); + } + } +} + +TYPED_TEST(MatrixTest, DependenciesReferenceMixSubPipeline) { + using Type = TypeParam; + + for (const auto& comm_grid : this->commGrids()) { + for (const auto& test : sizes_tests) { + // Dependencies graph: + // rw0 - rw1 - ro2a - rw3 - ro4a - rw5 + // \ ro2b / \ ro4b / + // +--+ +--+ + // sub pipelines + + GlobalElementSize size = globalTestSize(test.size, comm_grid.size()); + Matrix mat(size, test.block_size, comm_grid); + + auto senders0 = getReadWriteSendersUsingGlobalIndex(mat); + EXPECT_TRUE(checkSendersStep(senders0.size(), senders0)); + + auto senders1 = getReadWriteSendersUsingLocalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, senders1)); + + auto rosenders2a = getReadSendersUsingGlobalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, rosenders2a)); + + decltype(rosenders2a) rosenders2b; + { + auto mat_sub = mat.subPipelineConst(); + rosenders2b = getReadSendersUsingLocalIndex(mat_sub); + EXPECT_TRUE(checkSendersStep(0, rosenders2b)); + } + + auto senders3 = getReadWriteSendersUsingGlobalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, senders3)); + + decltype(rosenders2a) rosenders4a; + { + auto mat_sub = mat.subPipelineConst(); + rosenders4a = getReadSendersUsingLocalIndex(mat_sub); + EXPECT_TRUE(checkSendersStep(0, rosenders4a)); + } + + CHECK_MATRIX_SENDERS(true, senders1, senders0); + + EXPECT_TRUE(checkSendersStep(0, rosenders2a)); + EXPECT_TRUE(checkSendersStep(0, rosenders2b)); + CHECK_MATRIX_SENDERS(true, rosenders2a, senders1); + + EXPECT_TRUE(checkSendersStep(0, rosenders2b)); + CHECK_MATRIX_SENDERS(true, rosenders2b, rosenders2a); + + CHECK_MATRIX_SENDERS(true, senders3, rosenders2b); + + CHECK_MATRIX_SENDERS(true, rosenders4a, senders3); + + auto rosenders4b = getReadSendersUsingGlobalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, rosenders4b)); + + auto senders5 = getReadWriteSendersUsingLocalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, senders5)); + + CHECK_MATRIX_SENDERS(false, senders5, rosenders4a); + CHECK_MATRIX_SENDERS(true, senders5, rosenders4b); + } + } +} + +TYPED_TEST(MatrixTest, DependenciesPointerMix) { + using Type = TypeParam; + + for (const auto& comm_grid : this->commGrids()) { + for (const auto& test : sizes_tests) { + // Dependencies graph: + // rw0 - rw1 - ro2a - rw3 - ro4a - rw5 + // \ ro2b / \ ro4b / + + GlobalElementSize size = globalTestSize(test.size, comm_grid.size()); + Matrix mat(size, test.block_size, comm_grid); + + auto senders0 = getReadWriteSendersUsingLocalIndex(mat); + EXPECT_TRUE(checkSendersStep(senders0.size(), senders0)); + + auto senders1 = getReadWriteSendersUsingGlobalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, senders1)); + + auto rosenders2a = getReadSendersUsingLocalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, rosenders2a)); + + decltype(rosenders2a) rosenders2b; + { + Matrix* const_mat = &mat; + rosenders2b = getReadSendersUsingGlobalIndex(*const_mat); + EXPECT_TRUE(checkSendersStep(0, rosenders2b)); + } + + auto senders3 = getReadWriteSendersUsingLocalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, senders3)); + + decltype(rosenders2a) rosenders4a; + { + Matrix* const_mat = &mat; + rosenders4a = getReadSendersUsingGlobalIndex(*const_mat); + EXPECT_TRUE(checkSendersStep(0, rosenders4a)); + } + + CHECK_MATRIX_SENDERS(true, senders1, senders0); + EXPECT_TRUE(checkSendersStep(0, rosenders2b)); + CHECK_MATRIX_SENDERS(true, rosenders2b, senders1); + EXPECT_TRUE(checkSendersStep(rosenders2a.size(), rosenders2a)); + + CHECK_MATRIX_SENDERS(false, senders3, rosenders2b); + CHECK_MATRIX_SENDERS(true, senders3, rosenders2a); + + CHECK_MATRIX_SENDERS(true, rosenders4a, senders3); + + auto rosenders4b = getReadSendersUsingLocalIndex(mat); + EXPECT_TRUE(checkSendersStep(rosenders4b.size(), rosenders4b)); + + auto senders5 = getReadWriteSendersUsingGlobalIndex(mat); + EXPECT_TRUE(checkSendersStep(0, senders5)); + + CHECK_MATRIX_SENDERS(false, senders5, rosenders4a); + CHECK_MATRIX_SENDERS(true, senders5, rosenders4b); + } + } +} + +TYPED_TEST(MatrixTest, TileSize) { + using Type = TypeParam; + + for (const auto& comm_grid : this->commGrids()) { + for (const auto& test : sizes_tests) { + GlobalElementSize size = globalTestSize(test.size, comm_grid.size()); + Matrix mat(size, test.block_size, comm_grid); + auto mat_sub = mat.subPipeline(); + auto mat_sub_const = mat.subPipelineConst(); + + for (SizeType i = 0; i < mat.nrTiles().rows(); ++i) { + SizeType mb = mat.blockSize().rows(); + SizeType ib = std::min(mb, mat.size().rows() - i * mb); + for (SizeType j = 0; j < mat.nrTiles().cols(); ++j) { + SizeType nb = mat.blockSize().cols(); + SizeType jb = std::min(nb, mat.size().cols() - j * nb); + EXPECT_EQ(TileElementSize(ib, jb), mat.tileSize({i, j})); + EXPECT_EQ(TileElementSize(ib, jb), mat_sub.tileSize({i, j})); + EXPECT_EQ(TileElementSize(ib, jb), mat_sub_const.tileSize({i, j})); + } + } + } + } +} + +struct TestLocalColMajor { + LocalElementSize size; + TileElementSize block_size; + SizeType ld; +}; + +const std::vector col_major_sizes_tests({ + {{10, 7}, {3, 4}, 10}, // packed ld + {{10, 7}, {3, 4}, 11}, // padded ld + {{6, 11}, {4, 3}, 6}, // packed ld + {{6, 11}, {4, 3}, 7}, // padded ld +}); + +template +bool haveConstElements(const Matrix&) { + return false; +} + +template +bool haveConstElements(const Matrix&) { + return true; +} + +TYPED_TEST(MatrixLocalTest, FromColMajor) { + using Type = TypeParam; + + for (const auto& test : col_major_sizes_tests) { + LayoutInfo layout = colMajorLayout(test.size, test.block_size, test.ld); + memory::MemoryView mem(layout.minMemSize()); + + auto mat = createMatrixFromColMajor(test.size, test.block_size, test.ld, mem()); + ASSERT_FALSE(haveConstElements(mat)); + CHECK_LAYOUT_LOCAL(mem(), layout, mat); + + { + auto mat_sub = mat.subPipeline(); + ASSERT_FALSE(haveConstElements(mat_sub)); + CHECK_LAYOUT_LOCAL(mem(), layout, mat_sub); + } + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_LAYOUT_LOCAL(mem(), layout, mat_sub_const); + } + } +} + +TYPED_TEST(MatrixLocalTest, FromColMajorConst) { + using Type = TypeParam; + + for (const auto& test : col_major_sizes_tests) { + LayoutInfo layout = colMajorLayout(test.size, test.block_size, test.ld); + memory::MemoryView mem(layout.minMemSize()); + const Type* p = mem(); + + auto mat = createMatrixFromColMajor(test.size, test.block_size, test.ld, p); + ASSERT_TRUE(haveConstElements(mat)); + CHECK_LAYOUT_LOCAL(mem(), layout, mat); + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_LAYOUT_LOCAL(mem(), layout, mat_sub_const); + } + } +} + +TYPED_TEST(MatrixTest, FromColMajor) { + using Type = TypeParam; + + for (const auto& comm_grid : this->commGrids()) { + for (const auto& test : sizes_tests) { + GlobalElementSize size = globalTestSize(test.size, comm_grid.size()); + + { + // src_rank = {0, 0} + Distribution distribution(size, test.block_size, comm_grid.size(), comm_grid.rank(), {0, 0}); SizeType ld = distribution.localSize().rows() + 3; LayoutInfo layout = colMajorLayout(distribution, ld); @@ -757,8 +1224,19 @@ TYPED_TEST(MatrixTest, FromColMajor) { auto mat = createMatrixFromColMajor(size, test.block_size, ld, comm_grid, mem()); ASSERT_FALSE(haveConstElements(mat)); - CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat); + + { + auto mat_sub = mat.subPipeline(); + ASSERT_FALSE(haveConstElements(mat_sub)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub); + } + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub_const); + } } { // specify src_rank @@ -773,8 +1251,19 @@ TYPED_TEST(MatrixTest, FromColMajor) { auto mat = createMatrixFromColMajor(size, test.block_size, ld, comm_grid, src_rank, mem()); ASSERT_FALSE(haveConstElements(mat)); - CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat); + + { + auto mat_sub = mat.subPipeline(); + ASSERT_FALSE(haveConstElements(mat_sub)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub); + } + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub_const); + } } } } @@ -794,12 +1283,17 @@ TYPED_TEST(MatrixTest, FromColMajorConst) { SizeType ld = distribution.localSize().rows() + 3; LayoutInfo layout = colMajorLayout(distribution, ld); memory::MemoryView mem(layout.minMemSize()); - const Type* p = mem(); + auto mat = createMatrixFromColMajor(size, test.block_size, ld, comm_grid, p); ASSERT_TRUE(haveConstElements(mat)); - CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat); + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub_const); + } } { // specify src_rank @@ -810,13 +1304,18 @@ TYPED_TEST(MatrixTest, FromColMajorConst) { SizeType ld = distribution.localSize().rows() + 3; LayoutInfo layout = colMajorLayout(distribution, ld); memory::MemoryView mem(layout.minMemSize()); - const Type* p = mem(); + auto mat = createMatrixFromColMajor(size, test.block_size, ld, comm_grid, src_rank, p); ASSERT_TRUE(haveConstElements(mat)); - CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat); + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub_const); + } } } } @@ -846,15 +1345,37 @@ TYPED_TEST(MatrixLocalTest, FromTile) { if (test.is_basic) { auto mat = createMatrixFromTile(test.size, test.block_size, mem()); ASSERT_FALSE(haveConstElements(mat)); - CHECK_LAYOUT_LOCAL(mem(), layout, mat); + + { + auto mat_sub = mat.subPipeline(); + ASSERT_FALSE(haveConstElements(mat_sub)); + CHECK_LAYOUT_LOCAL(mem(), layout, mat_sub); + } + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_LAYOUT_LOCAL(mem(), layout, mat_sub_const); + } } auto mat = createMatrixFromTile(test.size, test.block_size, test.ld, test.tiles_per_col, mem()); ASSERT_FALSE(haveConstElements(mat)); - CHECK_LAYOUT_LOCAL(mem(), layout, mat); + + { + auto mat_sub = mat.subPipeline(); + ASSERT_FALSE(haveConstElements(mat_sub)); + CHECK_LAYOUT_LOCAL(mem(), layout, mat_sub); + } + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_LAYOUT_LOCAL(mem(), layout, mat_sub_const); + } } } @@ -868,15 +1389,25 @@ TYPED_TEST(MatrixLocalTest, FromTileConst) { if (test.is_basic) { auto mat = createMatrixFromTile(test.size, test.block_size, p); ASSERT_TRUE(haveConstElements(mat)); - CHECK_LAYOUT_LOCAL(mem(), layout, mat); + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_LAYOUT_LOCAL(mem(), layout, mat_sub_const); + } } auto mat = createMatrixFromTile(test.size, test.block_size, test.ld, test.tiles_per_col, p); ASSERT_TRUE(haveConstElements(mat)); - CHECK_LAYOUT_LOCAL(mem(), layout, mat); + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_LAYOUT_LOCAL(mem(), layout, mat_sub_const); + } } } @@ -898,8 +1429,19 @@ TYPED_TEST(MatrixTest, FromTile) { auto mat = createMatrixFromTile(size, test.block_size, comm_grid, mem()); ASSERT_FALSE(haveConstElements(mat)); - CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat); + + { + auto mat_sub = mat.subPipeline(); + ASSERT_FALSE(haveConstElements(mat_sub)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub); + } + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub_const); + } } { // specify src_rank @@ -911,8 +1453,19 @@ TYPED_TEST(MatrixTest, FromTile) { auto mat = createMatrixFromTile(size, test.block_size, comm_grid, src_rank, mem()); ASSERT_FALSE(haveConstElements(mat)); - CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat); + + { + auto mat_sub = mat.subPipeline(); + ASSERT_FALSE(haveConstElements(mat_sub)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub); + } + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub_const); + } } // Advanced tile layout @@ -929,8 +1482,19 @@ TYPED_TEST(MatrixTest, FromTile) { auto mat = createMatrixFromTile(size, test.block_size, ld_tiles, tiles_per_col, comm_grid, mem()); ASSERT_FALSE(haveConstElements(mat)); - CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat); + + { + auto mat_sub = mat.subPipeline(); + ASSERT_FALSE(haveConstElements(mat_sub)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub); + } + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub_const); + } } { // specify src_rank @@ -947,8 +1511,19 @@ TYPED_TEST(MatrixTest, FromTile) { auto mat = createMatrixFromTile(size, test.block_size, ld_tiles, tiles_per_col, comm_grid, src_rank, mem()); ASSERT_FALSE(haveConstElements(mat)); - CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat); + + { + auto mat_sub = mat.subPipeline(); + ASSERT_FALSE(haveConstElements(mat_sub)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub); + } + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub_const); + } } } } @@ -975,6 +1550,12 @@ TYPED_TEST(MatrixTest, FromTileConst) { ASSERT_TRUE(haveConstElements(mat)); CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat); + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub_const); + } } { // specify src_rank @@ -989,6 +1570,12 @@ TYPED_TEST(MatrixTest, FromTileConst) { ASSERT_TRUE(haveConstElements(mat)); CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat); + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub_const); + } } // Advanced tile layout @@ -1008,6 +1595,12 @@ TYPED_TEST(MatrixTest, FromTileConst) { ASSERT_TRUE(haveConstElements(mat)); CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat); + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub_const); + } } { // specify src_rank @@ -1015,27 +1608,115 @@ TYPED_TEST(MatrixTest, FromTileConst) { std::min(1, comm_grid.size().cols() - 1)); Distribution distribution(size, test.block_size, comm_grid.size(), comm_grid.rank(), src_rank); - SizeType ld_tiles = test.block_size.rows(); - SizeType tiles_per_col = - ceilDiv(distribution.localSize().rows(), distribution.blockSize().rows()) + 1; - LayoutInfo layout = tileLayout(distribution, ld_tiles, tiles_per_col); - memory::MemoryView mem(layout.minMemSize()); + SizeType ld_tiles = test.block_size.rows(); + SizeType tiles_per_col = + ceilDiv(distribution.localSize().rows(), distribution.blockSize().rows()) + 1; + LayoutInfo layout = tileLayout(distribution, ld_tiles, tiles_per_col); + memory::MemoryView mem(layout.minMemSize()); + + const Type* p = mem(); + auto mat = createMatrixFromTile(size, test.block_size, ld_tiles, tiles_per_col, + comm_grid, src_rank, p); + ASSERT_TRUE(haveConstElements(mat)); + + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat); + + { + auto mat_sub_const = mat.subPipelineConst(); + ASSERT_TRUE(haveConstElements(mat_sub_const)); + CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat_sub_const); + } + } + } + } +} + +TYPED_TEST(MatrixTest, CopyFrom) { + using MemoryViewT = dlaf::memory::MemoryView; + using MatrixT = dlaf::Matrix; + using MatrixConstT = dlaf::Matrix; + + for (const auto& comm_grid : this->commGrids()) { + for (const auto& test : sizes_tests) { + GlobalElementSize size = globalTestSize(test.size, comm_grid.size()); + + Distribution distribution(size, test.block_size, comm_grid.size(), comm_grid.rank(), {0, 0}); + LayoutInfo layout = tileLayout(distribution.localSize(), test.block_size); + + auto input_matrix = [](const GlobalElementIndex& index) { + SizeType i = index.row(); + SizeType j = index.col(); + return TypeUtilities::element(i + j / 1024., j - i / 128.); + }; + + MemoryViewT mem_src(layout.minMemSize()); + MatrixT mat_src = createMatrixFromTile(size, test.block_size, comm_grid, + static_cast(mem_src())); + dlaf::matrix::util::set(mat_src, input_matrix); + + MatrixConstT mat_src_const = std::move(mat_src); + + MemoryViewT mem_dst(layout.minMemSize()); + MatrixT mat_dst = createMatrixFromTile(size, test.block_size, comm_grid, + static_cast(mem_dst())); + dlaf::matrix::util::set(mat_dst, + [](const auto&) { return TypeUtilities::element(13, 26); }); + + copy(mat_src_const, mat_dst); + + CHECK_MATRIX_NEAR(input_matrix, mat_dst, 0, TypeUtilities::error); + } + } +} + +TYPED_TEST(MatrixTest, CopyFromSubPipeline) { + using MemoryViewT = dlaf::memory::MemoryView; + using MatrixT = dlaf::Matrix; + using MatrixConstT = dlaf::Matrix; + + for (const auto& comm_grid : this->commGrids()) { + for (const auto& test : sizes_tests) { + GlobalElementSize size = globalTestSize(test.size, comm_grid.size()); + + Distribution distribution(size, test.block_size, comm_grid.size(), comm_grid.rank(), {0, 0}); + LayoutInfo layout = tileLayout(distribution.localSize(), test.block_size); + + auto input_matrix = [](const GlobalElementIndex& index) { + SizeType i = index.row(); + SizeType j = index.col(); + return TypeUtilities::element(i + j / 1024., j - i / 128.); + }; + + MemoryViewT mem_src(layout.minMemSize()); + MatrixT mat_src = createMatrixFromTile(size, test.block_size, comm_grid, + static_cast(mem_src())); + dlaf::matrix::util::set(mat_src, input_matrix); + + MemoryViewT mem_dst(layout.minMemSize()); + MatrixT mat_dst = createMatrixFromTile(size, test.block_size, comm_grid, + static_cast(mem_dst())); + dlaf::matrix::util::set(mat_dst, + [](const auto&) { return TypeUtilities::element(13, 26); }); - const Type* p = mem(); - auto mat = createMatrixFromTile(size, test.block_size, ld_tiles, tiles_per_col, - comm_grid, src_rank, p); - ASSERT_TRUE(haveConstElements(mat)); + { + MatrixConstT mat_sub_src_const = mat_src.subPipelineConst(); + MatrixT mat_sub_dst = mat_dst.subPipeline(); - CHECK_DISTRIBUTION_LAYOUT(mem(), distribution, layout, mat); + copy(mat_sub_src_const, mat_sub_dst); } + + CHECK_MATRIX_NEAR(input_matrix, mat_dst, 0, TypeUtilities::error); } } } -TYPED_TEST(MatrixTest, CopyFrom) { +#if DLAF_WITH_GPU +TYPED_TEST(MatrixTest, GPUCopy) { using MemoryViewT = dlaf::memory::MemoryView; using MatrixT = dlaf::Matrix; using MatrixConstT = dlaf::Matrix; + using GPUMemoryViewT = dlaf::memory::MemoryView; + using GPUMatrixT = dlaf::Matrix; for (const auto& comm_grid : this->commGrids()) { for (const auto& test : sizes_tests) { @@ -1057,21 +1738,30 @@ TYPED_TEST(MatrixTest, CopyFrom) { MatrixConstT mat_src_const = std::move(mat_src); + GPUMemoryViewT mem_gpu1(layout.minMemSize()); + GPUMatrixT mat_gpu1 = createMatrixFromTile(size, test.block_size, comm_grid, + static_cast(mem_gpu1())); + + GPUMemoryViewT mem_gpu2(layout.minMemSize()); + GPUMatrixT mat_gpu2 = createMatrixFromTile(size, test.block_size, comm_grid, + static_cast(mem_gpu2())); + MemoryViewT mem_dst(layout.minMemSize()); MatrixT mat_dst = createMatrixFromTile(size, test.block_size, comm_grid, static_cast(mem_dst())); dlaf::matrix::util::set(mat_dst, [](const auto&) { return TypeUtilities::element(13, 26); }); - copy(mat_src_const, mat_dst); + copy(mat_src_const, mat_gpu1); + copy(mat_gpu1, mat_gpu2); + copy(mat_gpu2, mat_dst); CHECK_MATRIX_NEAR(input_matrix, mat_dst, 0, TypeUtilities::error); } } } -#if DLAF_WITH_GPU -TYPED_TEST(MatrixTest, GPUCopy) { +TYPED_TEST(MatrixTest, GPUCopySubPipeline) { using MemoryViewT = dlaf::memory::MemoryView; using MatrixT = dlaf::Matrix; using MatrixConstT = dlaf::Matrix; @@ -1112,9 +1802,16 @@ TYPED_TEST(MatrixTest, GPUCopy) { dlaf::matrix::util::set(mat_dst, [](const auto&) { return TypeUtilities::element(13, 26); }); - copy(mat_src_const, mat_gpu1); - copy(mat_gpu1, mat_gpu2); - copy(mat_gpu2, mat_dst); + { + MatrixConstT mat_sub_src_const = mat_src_const.subPipelineConst(); + MatrixConstT mat_sub_gpu1 = mat_gpu1.subPipeline(); + MatrixConstT mat_sub_gpu2 = mat_gpu2.subPipeline(); + MatrixConstT mat_sub_dst = mat_dst.subPipelineConst(); + + copy(mat_sub_src_const, mat_sub_gpu1); + copy(mat_sub_gpu1, mat_sub_gpu2); + copy(mat_sub_gpu2, mat_sub_dst); + } CHECK_MATRIX_NEAR(input_matrix, mat_dst, 0, TypeUtilities::error); } @@ -1173,6 +1870,56 @@ TEST_F(MatrixGenericTest, SelectTilesReadonly) { } } +TEST_F(MatrixGenericTest, SelectTilesReadonlySubPipeline) { + using TypeParam = double; + using MemoryViewT = dlaf::memory::MemoryView; + using MatrixT = dlaf::Matrix; + + for (const auto& comm_grid : this->commGrids()) { + for (const auto& test : sizes_tests) { + GlobalElementSize size = globalTestSize(test.size, comm_grid.size()); + + Distribution distribution(size, test.block_size, comm_grid.size(), comm_grid.rank(), {0, 0}); + LayoutInfo layout = tileLayout(distribution.localSize(), test.block_size); + + MemoryViewT mem(layout.minMemSize()); + MatrixT mat = createMatrixFromTile(size, test.block_size, comm_grid, + static_cast(mem())); + auto mat_sub = mat.subPipeline(); + + // if this rank has no tiles locally, there's nothing interesting to do... + if (distribution.localNrTiles().isEmpty()) + continue; + + const auto ncols = to_sizet(distribution.localNrTiles().cols()); + const LocalTileSize local_row_size{1, to_SizeType(ncols)}; + auto row0_range = common::iterate_range2d(local_row_size); + + // top left tile is selected in rw (i.e. exclusive access) + auto sender_tl = mat_sub.readwrite(LocalTileIndex{0, 0}); + + // the entire first row is selected in ro + auto senders_row = selectRead(mat_sub, row0_range); + EXPECT_EQ(ncols, senders_row.size()); + + // eagerly start the tile senders, but don't release them + std::vector void_senders_row; + void_senders_row.reserve(senders_row.size()); + for (auto& s : senders_row) { + void_senders_row.emplace_back(std::move(s)); + } + + // Since the top left tile has been selected two times, the group selection + // would have all but the first tile ready... + EXPECT_TRUE(checkSendersStep(1, void_senders_row, true)); + + // ... until the first one will be released. + tt::sync_wait(std::move(sender_tl)); + EXPECT_TRUE(checkSendersStep(ncols, void_senders_row)); + } + } +} + TEST_F(MatrixGenericTest, SelectTilesReadwrite) { using TypeParam = double; using MemoryViewT = dlaf::memory::MemoryView; @@ -1222,6 +1969,56 @@ TEST_F(MatrixGenericTest, SelectTilesReadwrite) { } } +TEST_F(MatrixGenericTest, SelectTilesReadwriteSubPipeline) { + using TypeParam = double; + using MemoryViewT = dlaf::memory::MemoryView; + using MatrixT = dlaf::Matrix; + + for (const auto& comm_grid : this->commGrids()) { + for (const auto& test : sizes_tests) { + GlobalElementSize size = globalTestSize(test.size, comm_grid.size()); + + Distribution distribution(size, test.block_size, comm_grid.size(), comm_grid.rank(), {0, 0}); + LayoutInfo layout = tileLayout(distribution.localSize(), test.block_size); + + MemoryViewT mem(layout.minMemSize()); + MatrixT mat = createMatrixFromTile(size, test.block_size, comm_grid, + static_cast(mem())); + auto mat_sub = mat.subPipeline(); + + // if this rank has no tiles locally, there's nothing interesting to do... + if (distribution.localNrTiles().isEmpty()) + continue; + + const auto ncols = to_sizet(distribution.localNrTiles().cols()); + const LocalTileSize local_row_size{1, to_SizeType(ncols)}; + auto row0_range = common::iterate_range2d(local_row_size); + + // top left tile is selected in rw (i.e. exclusive access) + auto sender_tl = mat_sub.readwrite(LocalTileIndex{0, 0}); + + // the entire first row is selected in rw + auto senders_row = select(mat_sub, row0_range); + EXPECT_EQ(ncols, senders_row.size()); + + // eagerly start the tile senders, but don't release them + std::vector void_senders_row; + void_senders_row.reserve(senders_row.size()); + for (auto& s : senders_row) { + void_senders_row.emplace_back(std::move(s)); + } + + // Since the top left tile has been selected two times, the group selection + // would have all but the first tile ready... + EXPECT_TRUE(checkSendersStep(1, void_senders_row, true)); + + // ... until the first one will be released. + tt::sync_wait(std::move(sender_tl)); + EXPECT_TRUE(checkSendersStep(ncols, void_senders_row)); + } + } +} + // MatrixDestructor // // These tests checks that sender management on destruction is performed correctly. The behaviour is @@ -1380,6 +2177,145 @@ TEST(MatrixDestructor, ConstAfterRead_UserMemory) { tt::sync_wait(std::move(last_task)); } +TEST(MatrixDestructor, NonConstAfterReadSubPipeline) { + ex::unique_any_sender<> last_task; + + std::atomic is_exited_from_scope{false}; + { + auto matrix = createMatrix(); + auto matrix_sub = matrix.subPipeline(); + + auto tile_sender = matrix_sub.read(LocalTileIndex(0, 0)); + last_task = std::move(tile_sender) | + dlaf::internal::transform(dlaf::internal::Policy(), + WaitGuardHelper{is_exited_from_scope}) | + ex::ensure_started(); + } + is_exited_from_scope = true; + + tt::sync_wait(std::move(last_task)); +} + +TEST(MatrixDestructor, NonConstAfterReadSubPipelineConst) { + ex::unique_any_sender<> last_task; + + std::atomic is_exited_from_scope{false}; + { + auto matrix = createMatrix(); + auto matrix_sub = matrix.subPipelineConst(); + + auto tile_sender = matrix_sub.read(LocalTileIndex(0, 0)); + last_task = std::move(tile_sender) | + dlaf::internal::transform(dlaf::internal::Policy(), + WaitGuardHelper{is_exited_from_scope}) | + ex::ensure_started(); + } + is_exited_from_scope = true; + + tt::sync_wait(std::move(last_task)); +} + +TEST(MatrixDestructor, NonConstAfterReadWriteSubPipeline) { + namespace ex = pika::execution::experimental; + ex::unique_any_sender<> last_task; + + std::atomic is_exited_from_scope{false}; + { + auto matrix = createMatrix(); + auto matrix_sub = matrix.subPipeline(); + + auto tile_sender = matrix_sub.readwrite(LocalTileIndex(0, 0)); + last_task = std::move(tile_sender) | + dlaf::internal::transform(dlaf::internal::Policy(), + WaitGuardHelper{is_exited_from_scope}) | + ex::ensure_started(); + } + is_exited_from_scope = true; + + tt::sync_wait(std::move(last_task)); +} + +TEST(MatrixDestructor, NonConstAfterReadSubPipeline_UserMemory) { + ex::unique_any_sender<> last_task; + + std::atomic is_exited_from_scope{false}; + { + T data; + auto matrix = createMatrix(data); + auto matrix_sub = matrix.subPipeline(); + + auto tile_sender = matrix.read(LocalTileIndex(0, 0)); + last_task = std::move(tile_sender) | + dlaf::internal::transform(dlaf::internal::Policy(), + WaitGuardHelper{is_exited_from_scope}) | + ex::ensure_started(); + } + is_exited_from_scope = true; + + tt::sync_wait(std::move(last_task)); +} + +TEST(MatrixDestructor, NonConstAfterReadSubPipelineConst_UserMemory) { + ex::unique_any_sender<> last_task; + + std::atomic is_exited_from_scope{false}; + { + T data; + auto matrix = createMatrix(data); + auto matrix_sub = matrix.subPipelineConst(); + + auto tile_sender = matrix.read(LocalTileIndex(0, 0)); + last_task = std::move(tile_sender) | + dlaf::internal::transform(dlaf::internal::Policy(), + WaitGuardHelper{is_exited_from_scope}) | + ex::ensure_started(); + } + is_exited_from_scope = true; + + tt::sync_wait(std::move(last_task)); +} + +TEST(MatrixDestructor, NonConstAfterReadWriteSubPipeline_UserMemory) { + namespace ex = pika::execution::experimental; + ex::unique_any_sender<> last_task; + + std::atomic is_exited_from_scope{false}; + { + T data; + auto matrix = createMatrix(data); + auto matrix_sub = matrix.subPipeline(); + + auto tile_sender = matrix_sub.readwrite(LocalTileIndex(0, 0)); + last_task = std::move(tile_sender) | + dlaf::internal::transform(dlaf::internal::Policy(), + WaitGuardHelper{is_exited_from_scope}) | + ex::ensure_started(); + } + is_exited_from_scope = true; + + tt::sync_wait(std::move(last_task)); +} + +TEST(MatrixDestructor, ConstAfterReadSubPipeline_UserMemory) { + ex::unique_any_sender<> last_task; + + std::atomic is_exited_from_scope{false}; + { + T data; + auto matrix = createConstMatrix(data); + auto matrix_sub = matrix.subPipelineConst(); + + auto tile_sender = matrix.read(LocalTileIndex(0, 0)); + last_task = std::move(tile_sender) | + dlaf::internal::transform(dlaf::internal::Policy(), + WaitGuardHelper{is_exited_from_scope}) | + ex::ensure_started(); + } + is_exited_from_scope = true; + + tt::sync_wait(std::move(last_task)); +} + TEST_F(MatrixGenericTest, SyncBarrier) { using TypeParam = double; using MemoryViewT = dlaf::memory::MemoryView;