diff --git a/include/dlaf/matrix/create_matrix.h b/include/dlaf/matrix/create_matrix.h new file mode 100644 index 0000000000..95738c1232 --- /dev/null +++ b/include/dlaf/matrix/create_matrix.h @@ -0,0 +1,182 @@ +// +// Distributed Linear Algebra with Future (DLAF) +// +// Copyright (c) 2018-2023, ETH Zurich +// All rights reserved. +// +// Please, refer to the LICENSE file in the root directory. +// SPDX-License-Identifier: BSD-3-Clause +// + +#include +#include +#include + +namespace dlaf::matrix { + +// Note: the templates of the following helper functions are inverted w.r.t. the Matrix templates +// to allow the user to only specify the device and let the compiler deduce the type T. + +// Local versions + +/// Create a non distributed matrix of size @p size and block size @p block_size +/// which references elements +/// that are already allocated in the memory with a column major layout. +/// +/// @param[in] ld the leading dimension of the matrix, +/// @param[in] ptr is the pointer to the first element of the local part of the matrix, +/// @pre ld >= max(1, size.row()), +/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix +/// stored in the given layout. +template +Matrix createMatrixFromColMajor(const LocalElementSize& size, const TileElementSize& block_size, + SizeType ld, T* ptr) { + return Matrix(colMajorLayout(size, block_size, ld), ptr); +} + +/// Create a non distributed matrix of size @p size and block size @p block_size +/// which references elements +/// that are already allocated in the memory with a tile layout. +/// +/// @param[in] ptr is the pointer to the first element of the local part of the matrix, +/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix +/// stored in the given layout. +template +Matrix createMatrixFromTile(const LocalElementSize& size, const TileElementSize& block_size, + T* ptr) { + return Matrix(tileLayout(size, block_size), ptr); +} + +/// Create a non distributed matrix of size @p size and block size @p block_size +/// which references elements +/// that are already allocated in the memory with a tile layout. +/// +/// @param[in] ld_tile the leading dimension of the tiles, +/// @param[in] tiles_per_col the number of tiles stored for each column of tiles, +/// @param[in] ptr is the pointer to the first element of the local part of the matrix, +/// @pre @p ld_tile >= max(1, min(block_size.row(), size.row())), +/// @pre @p tiles_per_col >= ceilDiv(size.row(), block_size.col()), +/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix +/// stored in the given layout. +template +Matrix createMatrixFromTile(const LocalElementSize& size, const TileElementSize& block_size, + SizeType ld_tile, SizeType tiles_per_col, T* ptr) { + return Matrix(tileLayout(size, block_size, ld_tile, tiles_per_col), ptr); +} + +// Distributed versions + +/// Create a distributed matrix of size @p size and block size @p block_size +/// on the given 2D communicator grid @p comm which references elements +/// that are already allocated in the memory with a column major layout. +/// +/// @param[in] ld the leading dimension of the matrix, +/// @param[in] source_rank_index is the rank of the process which contains the top left tile of the matrix, +/// @param[in] ptr is the pointer to the first element of the local part of the matrix, +/// @pre @p ld >= max(1, size.row()), +/// @pre @p source_rank_index.isIn(grid_size), +/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix +/// stored in the given layout. +template +Matrix createMatrixFromColMajor(const GlobalElementSize& size, const TileElementSize& block_size, + SizeType ld, const comm::CommunicatorGrid& comm, + const comm::Index2D& source_rank_index, T* ptr) { + Distribution distribution(size, block_size, comm.size(), comm.rank(), source_rank_index); + auto layout = colMajorLayout(distribution.localSize(), block_size, ld); + + return Matrix(std::move(distribution), layout, ptr); +} + +/// Create a distributed matrix of size @p size and block size @p block_size +/// on the given 2D communicator grid @p comm which references elements +/// that are already allocated in the memory with a column major layout. +/// +/// This method assumes @p source_rank_index to be {0,0}. +/// @param[in] ld the leading dimension of the matrix, +/// @param[in] ptr is the pointer to the first element of the local part of the matrix, +/// @pre @p ld >= max(1, size.row()), +/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix +/// stored in the given layout. +template +Matrix createMatrixFromColMajor(const GlobalElementSize& size, const TileElementSize& block_size, + SizeType ld, const comm::CommunicatorGrid& comm, T* ptr) { + return createMatrixFromColMajor(size, block_size, ld, comm, {0, 0}, ptr); +} + +/// Create a distributed matrix of size @p size and block size @p block_size +/// on the given 2D communicator grid @p comm which references elements +/// that are already allocated in the memory with a tile layout. +/// +/// @param[in] source_rank_index is the rank of the process which contains the top left tile of the matrix, +/// @param[in] ptr is the pointer to the first element of the local part of the matrix, +/// @pre @p source_rank_index.isIn(grid_size), +/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix +/// stored in the given layout. +template +Matrix createMatrixFromTile(const GlobalElementSize& size, const TileElementSize& block_size, + const comm::CommunicatorGrid& comm, + const comm::Index2D& source_rank_index, T* ptr) { + Distribution distribution(size, block_size, comm.size(), comm.rank(), source_rank_index); + auto layout = tileLayout(distribution.localSize(), block_size); + + return Matrix(std::move(distribution), layout, ptr); +} + +/// Create a distributed matrix of size @p size and block size @p block_size +/// on the given 2D communicator grid @p comm which references elements +/// that are already allocated in the memory with a tile layout. +/// +/// This method assumes @p source_rank_index to be {0,0}. +/// @param[in] ptr is the pointer to the first element of the local part of the matrix, +/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix +/// stored in the given layout. +template +Matrix createMatrixFromTile(const GlobalElementSize& size, const TileElementSize& block_size, + const comm::CommunicatorGrid& comm, T* ptr) { + return createMatrixFromTile(size, block_size, comm, {0, 0}, ptr); +} + +/// Create a distributed matrix of size @p size and block size @p block_size +/// on the given 2D communicator grid @p comm which references elements +/// that are already allocated in the memory with a tile layout. +/// +/// @param[in] ld_tile the leading dimension of the tiles, +/// @param[in] tiles_per_col the number of tiles stored for each column of tiles, +/// @param[in] source_rank_index is the rank of the process which contains the top left tile of the matrix, +/// @param[in] ptr is the pointer to the first element of the local part of the matrix, +/// @pre @p ld_tile >= max(1, min(block_size.row(), size.row())), +/// @pre @p tiles_per_col >= ceilDiv(size.row(), block_size.row()), +/// @pre @p source_rank_index.isIn(grid_size), +/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix +/// stored in the given layout. +template +Matrix createMatrixFromTile(const GlobalElementSize& size, const TileElementSize& block_size, + SizeType ld_tile, SizeType tiles_per_col, + const comm::CommunicatorGrid& comm, + const comm::Index2D& source_rank_index, T* ptr) { + Distribution distribution(size, block_size, comm.size(), comm.rank(), source_rank_index); + auto layout = tileLayout(distribution.localSize(), block_size, ld_tile, tiles_per_col); + + return Matrix(std::move(distribution), layout, ptr); +} + +/// Create a distributed matrix of size @p size and block size @p block_size +/// on the given 2D communicator grid @p comm which references elements +/// that are already allocated in the memory with a tile layout. +/// +/// This method assumes @p source_rank_index to be {0,0}. +/// @param[in] ld_tile the leading dimension of the tiles, +/// @param[in] tiles_per_col the number of tiles stored for each column of tiles, +/// @param[in] ptr is the pointer to the first element of the local part of the matrix, +/// @pre @p ld_tile >= max(1, min(block_size.row(), size.row()), +/// @pre @p tiles_per_col >= ceilDiv(size.row(), block_size.col()), +/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix +/// stored in the given layout. +template +Matrix createMatrixFromTile(const GlobalElementSize& size, const TileElementSize& block_size, + SizeType ld_tile, SizeType tiles_per_col, + const comm::CommunicatorGrid& comm, T* ptr) { + return createMatrixFromTile(size, block_size, ld_tile, tiles_per_col, comm, {0, 0}, ptr); +} + +} diff --git a/include/dlaf/matrix/matrix.h b/include/dlaf/matrix/matrix.h index 71117bb0ac..5441015eb7 100644 --- a/include/dlaf/matrix/matrix.h +++ b/include/dlaf/matrix/matrix.h @@ -68,18 +68,31 @@ class Matrix : public Matrix { /// /// @pre size.isValid(), /// @pre !blockSize.isEmpty(). - Matrix(const LocalElementSize& size, const TileElementSize& block_size); + Matrix(const LocalElementSize& size, const TileElementSize& tile_size) noexcept + : Matrix(Distribution(size, tile_size)) {} - /// Create a distributed matrix of size @p size and block size @p block_size on the given 2D - /// communicator grid @p comm. + /// Create a distributed matrix of size @p size block size @p tile_size and tile size @p tile_size + /// on the given 2D communicator grid @p comm. /// /// @pre size.isValid(), /// @pre !blockSize.isEmpty(). - Matrix(const GlobalElementSize& size, const TileElementSize& block_size, - const comm::CommunicatorGrid& comm); + Matrix(const GlobalElementSize& size, const TileElementSize& tile_size, + const comm::CommunicatorGrid& comm) noexcept + : Matrix(Distribution(size, tile_size, comm.size(), comm.rank(), {0, 0})) {} /// Create a matrix distributed according to the distribution @p distribution. - Matrix(Distribution distribution); + Matrix(Distribution distribution) noexcept : Matrix(std::move(distribution)) { + const SizeType alignment = 64; + const SizeType ld = std::max( + 1, util::ceilDiv(this->distribution().local_size().rows(), alignment) * alignment); + + auto layout = colMajorLayout(this->distribution().local_size(), this->tile_size(), ld); + + SizeType memory_size = layout.minMemSize(); + memory::MemoryView mem(memory_size); + + setUpTiles(mem, layout); + } /// Create a matrix distributed according to the distribution @p distribution, /// specifying the layout. @@ -88,7 +101,18 @@ class Matrix : public Matrix { /// of the local part of the matrix will be stored in memory, /// @pre distribution.localSize() == layout.size(), /// @pre distribution.blockSize() == layout.blockSize(). - Matrix(Distribution distribution, const LayoutInfo& layout) noexcept; + Matrix(Distribution distribution, const LayoutInfo& layout) noexcept + : Matrix(std::move(distribution)) { + DLAF_ASSERT(this->distribution().local_size() == layout.size(), + "Size of distribution does not match layout size!", distribution.local_size(), + layout.size()); + DLAF_ASSERT(this->distribution().tile_size() == layout.blockSize(), distribution.tile_size(), + layout.blockSize()); + + memory::MemoryView mem(layout.minMemSize()); + + setUpTiles(mem, layout); + } /// Create a non distributed matrix, /// which references elements that are already allocated in the memory. @@ -97,7 +121,7 @@ class Matrix : public Matrix { /// of the local part of the matrix are stored in memory, /// @param[in] ptr is the pointer to the first element of the local part of the matrix, /// @pre @p ptr refers to an allocated memory region of at least @c layout.minMemSize() elements. - Matrix(const LayoutInfo& layout, ElementType* ptr); + Matrix(const LayoutInfo& layout, ElementType* ptr) noexcept : Matrix(layout, ptr) {} /// Create a matrix distributed according to the distribution @p distribution, /// which references elements that are already allocated in the memory. @@ -108,7 +132,8 @@ class Matrix : public Matrix { /// @pre @p distribution.localSize() == @p layout.size(), /// @pre @p distribution.blockSize() == @p layout.blockSize(), /// @pre @p ptr refers to an allocated memory region of at least @c layout.minMemSize() elements. - Matrix(Distribution distribution, const LayoutInfo& layout, ElementType* ptr) noexcept; + Matrix(Distribution distribution, const LayoutInfo& layout, ElementType* ptr) noexcept + : Matrix(std::move(distribution), layout, ptr) {} Matrix(const Matrix& rhs) = delete; Matrix(Matrix&& rhs) = default; @@ -118,7 +143,7 @@ class Matrix : public Matrix { /// Returns a sender of the Tile with local index @p index. /// - /// @pre index.isIn(distribution().localNrTiles()). + /// @pre index.isIn(distribution().local_nr_tiles()). ReadWriteSenderType readwrite(const LocalTileIndex& index) noexcept { return tile_managers_[tileLinearIndex(index)].readwrite(); } @@ -128,7 +153,7 @@ class Matrix : public Matrix { /// @pre the global tile is stored in the current process, /// @pre index.isIn(globalNrTiles()). ReadWriteSenderType readwrite(const GlobalTileIndex& index) noexcept { - return readwrite(this->distribution().localTileIndex(index)); + return readwrite(this->distribution().local_tile_index(index)); } public: @@ -138,7 +163,7 @@ class Matrix : public Matrix { /// All accesses to the sub-pipelined matrix are sequenced after previous accesses and before later /// accesses to the original matrix, independently of when tiles are accessed in the sub-pipelined /// matrix. - Matrix subPipeline() { + Matrix subPipeline() noexcept { return Matrix(*this, SubPipelineTag{}); } @@ -150,8 +175,8 @@ class Matrix : public Matrix { /// matrix. /// /// @pre blockSize() is divisible by @p tiles_per_block - /// @pre blockSize() == baseTileSize() - Matrix retiledSubPipeline(const LocalTileSize& tiles_per_block) { + /// @pre blockSize() == tile_size() + Matrix retiledSubPipeline(const LocalTileSize& tiles_per_block) noexcept { return Matrix(*this, tiles_per_block); } @@ -160,8 +185,9 @@ class Matrix : public Matrix { private: using typename Matrix::SubPipelineTag; - Matrix(Matrix& mat, const SubPipelineTag); - Matrix(Matrix& mat, const LocalTileSize& tiles_per_block); + Matrix(Matrix& mat, const SubPipelineTag tag) noexcept : Matrix(mat, tag) {} + Matrix(Matrix& mat, const LocalTileSize& tiles_per_block) noexcept + : Matrix(mat, tiles_per_block) {} using Matrix::setUpTiles; using Matrix::tile_managers_; @@ -179,14 +205,27 @@ class Matrix : public internal::MatrixBase { using ReadOnlySenderType = ReadOnlyTileSender; friend Matrix; - Matrix(const LayoutInfo& layout, ElementType* ptr); + Matrix(const LayoutInfo& layout, ElementType* ptr) noexcept + : MatrixBase({layout.size(), layout.blockSize()}) { + memory::MemoryView mem(ptr, layout.minMemSize()); + setUpTiles(mem, layout); + } - Matrix(const LayoutInfo& layout, const ElementType* ptr) + Matrix(const LayoutInfo& layout, const ElementType* ptr) noexcept : Matrix(layout, const_cast(ptr)) {} - Matrix(Distribution distribution, const LayoutInfo& layout, ElementType* ptr) noexcept; + Matrix(Distribution distribution, const LayoutInfo& layout, ElementType* ptr) noexcept + : MatrixBase(std::move(distribution)) { + DLAF_ASSERT(this->distribution().local_size() == layout.size(), distribution.local_size(), + layout.size()); + DLAF_ASSERT(this->distribution().tile_size() == layout.blockSize(), distribution.tile_size(), + layout.blockSize()); + + memory::MemoryView mem(ptr, layout.minMemSize()); + setUpTiles(mem, layout); + } - Matrix(Distribution distribution, const LayoutInfo& layout, const ElementType* ptr) + Matrix(Distribution distribution, const LayoutInfo& layout, const ElementType* ptr) noexcept : Matrix(std::move(distribution), layout, const_cast(ptr)) {} Matrix(const Matrix& rhs) = delete; @@ -197,7 +236,7 @@ class Matrix : public internal::MatrixBase { /// Returns a read-only sender of the Tile with local index @p index. /// - /// @pre index.isIn(distribution().localNrTiles()). + /// @pre index.isIn(distribution().local_nr_tiles()). ReadOnlySenderType read(const LocalTileIndex& index) noexcept { return tile_managers_[tileLinearIndex(index)].read(); } @@ -207,7 +246,7 @@ class Matrix : public internal::MatrixBase { /// @pre the global tile is stored in the current process, /// @pre index.isIn(globalNrTiles()). ReadOnlySenderType read(const GlobalTileIndex& index) { - return read(distribution().localTileIndex(index)); + return read(distribution().local_tile_index(index)); } /// Synchronization barrier for all local tiles in the matrix @@ -234,7 +273,7 @@ class Matrix : public internal::MatrixBase { /// matrix. /// /// @pre blockSize() is divisible by @p tiles_per_block - /// @pre blockSize() == baseTileSize() + /// @pre blockSize() == tile_size() Matrix retiledSubPipelineConst(const LocalTileSize& tiles_per_block) { return Matrix(*this, tiles_per_block); } @@ -253,14 +292,19 @@ class Matrix : public internal::MatrixBase { /// Marking a tile as done means it can no longer be accessed. Marking a tile as done also disallows /// creation of sub pipelines from the full matrix. void done(const GlobalTileIndex& index) noexcept { - done(distribution().localTileIndex(index)); + done(distribution().local_tile_index(index)); } protected: Matrix(Distribution distribution) : internal::MatrixBase{std::move(distribution)} {} struct SubPipelineTag {}; - Matrix(Matrix& mat, const SubPipelineTag); - Matrix(Matrix& mat, const LocalTileSize& tiles_per_block); + Matrix(Matrix& mat, const SubPipelineTag) noexcept : MatrixBase(mat.distribution()) { + setUpSubPipelines(mat); + } + Matrix(Matrix& mat, const LocalTileSize& tiles_per_block) noexcept + : MatrixBase(mat.distribution(), tiles_per_block) { + setUpRetiledSubPipelines(mat, tiles_per_block); + } void setUpTiles(const memory::MemoryView& mem, const LayoutInfo& layout) noexcept; void setUpSubPipelines(Matrix&) noexcept; @@ -269,169 +313,117 @@ class Matrix : public internal::MatrixBase { std::vector> tile_managers_; }; -// Note: the templates of the following helper functions are inverted w.r.t. the Matrix templates -// to allow the user to only specify the device and let the compiler deduce the type T. - -// Local versions - -/// Create a non distributed matrix of size @p size and block size @p block_size -/// which references elements -/// that are already allocated in the memory with a column major layout. -/// -/// @param[in] ld the leading dimension of the matrix, -/// @param[in] ptr is the pointer to the first element of the local part of the matrix, -/// @pre ld >= max(1, size.row()), -/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix -/// stored in the given layout. -template -Matrix createMatrixFromColMajor(const LocalElementSize& size, const TileElementSize& block_size, - SizeType ld, T* ptr) { - return Matrix(colMajorLayout(size, block_size, ld), ptr); -} - -/// Create a non distributed matrix of size @p size and block size @p block_size -/// which references elements -/// that are already allocated in the memory with a tile layout. -/// -/// @param[in] ptr is the pointer to the first element of the local part of the matrix, -/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix -/// stored in the given layout. -template -Matrix createMatrixFromTile(const LocalElementSize& size, const TileElementSize& block_size, - T* ptr) { - return Matrix(tileLayout(size, block_size), ptr); -} - -/// Create a non distributed matrix of size @p size and block size @p block_size -/// which references elements -/// that are already allocated in the memory with a tile layout. -/// -/// @param[in] ld_tile the leading dimension of the tiles, -/// @param[in] tiles_per_col the number of tiles stored for each column of tiles, -/// @param[in] ptr is the pointer to the first element of the local part of the matrix, -/// @pre @p ld_tile >= max(1, min(block_size.row(), size.row())), -/// @pre @p tiles_per_col >= ceilDiv(size.row(), block_size.col()), -/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix -/// stored in the given layout. -template -Matrix createMatrixFromTile(const LocalElementSize& size, const TileElementSize& block_size, - SizeType ld_tile, SizeType tiles_per_col, T* ptr) { - return Matrix(tileLayout(size, block_size, ld_tile, tiles_per_col), ptr); +template +void Matrix::waitLocalTiles() noexcept { + // Note: + // Using a readwrite access to the tile ensures that the access is exclusive and not shared + // among multiple tasks. + + const auto range_local = common::iterate_range2d(distribution().local_nr_tiles()); + + auto s = pika::execution::experimental::when_all_vector(internal::selectGeneric( + [this](const LocalTileIndex& index) { + return this->tile_managers_[tileLinearIndex(index)].readwrite(); + }, + range_local)) | + pika::execution::experimental::drop_value(); + pika::this_thread::experimental::sync_wait(std::move(s)); } -// Distributed versions - -/// Create a distributed matrix of size @p size and block size @p block_size -/// on the given 2D communicator grid @p comm which references elements -/// that are already allocated in the memory with a column major layout. -/// -/// @param[in] ld the leading dimension of the matrix, -/// @param[in] source_rank_index is the rank of the process which contains the top left tile of the matrix, -/// @param[in] ptr is the pointer to the first element of the local part of the matrix, -/// @pre @p ld >= max(1, size.row()), -/// @pre @p source_rank_index.isIn(grid_size), -/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix -/// stored in the given layout. -template -Matrix createMatrixFromColMajor(const GlobalElementSize& size, const TileElementSize& block_size, - SizeType ld, const comm::CommunicatorGrid& comm, - const comm::Index2D& source_rank_index, T* ptr) { - Distribution distribution(size, block_size, comm.size(), comm.rank(), source_rank_index); - auto layout = colMajorLayout(distribution.localSize(), block_size, ld); - - return Matrix(std::move(distribution), layout, ptr); +template +void Matrix::setUpTiles(const memory::MemoryView& mem, + const LayoutInfo& layout) noexcept { + const auto& nr_tiles = layout.nrTiles(); + + DLAF_ASSERT(tile_managers_.empty(), ""); + tile_managers_.reserve(to_sizet(nr_tiles.linear_size())); + + using MemView = memory::MemoryView; + + for (SizeType j = 0; j < nr_tiles.cols(); ++j) { + for (SizeType i = 0; i < nr_tiles.rows(); ++i) { + LocalTileIndex ind(i, j); + TileElementSize tile_size = layout.tileSize(ind); + tile_managers_.emplace_back( + TileDataType(tile_size, MemView(mem, layout.tileOffset(ind), layout.minTileMemSize(tile_size)), + layout.ldTile())); + } + } } -/// Create a distributed matrix of size @p size and block size @p block_size -/// on the given 2D communicator grid @p comm which references elements -/// that are already allocated in the memory with a column major layout. -/// -/// This method assumes @p source_rank_index to be {0,0}. -/// @param[in] ld the leading dimension of the matrix, -/// @param[in] ptr is the pointer to the first element of the local part of the matrix, -/// @pre @p ld >= max(1, size.row()), -/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix -/// stored in the given layout. -template -Matrix createMatrixFromColMajor(const GlobalElementSize& size, const TileElementSize& block_size, - SizeType ld, const comm::CommunicatorGrid& comm, T* ptr) { - return createMatrixFromColMajor(size, block_size, ld, comm, {0, 0}, ptr); +template +void Matrix::setUpSubPipelines(Matrix& mat) noexcept { + namespace ex = pika::execution::experimental; + + // TODO: Optimize read-after-read. This is currently forced to access the base + // matrix in readwrite mode so that we can move the tile into the + // sub-pipeline. This is semantically not required and should eventually be + // optimized. + tile_managers_.reserve(mat.tile_managers_.size()); + for (auto& tm : mat.tile_managers_) { + tile_managers_.emplace_back(Tile()); + auto s = ex::when_all(tile_managers_.back().readwrite_with_wrapper(), tm.readwrite()) | + ex::then([](internal::TileAsyncRwMutexReadWriteWrapper empty_tile_wrapper, + Tile tile) { empty_tile_wrapper.get() = std::move(tile); }); + ex::start_detached(std::move(s)); + } } -/// Create a distributed matrix of size @p size and block size @p block_size -/// on the given 2D communicator grid @p comm which references elements -/// that are already allocated in the memory with a tile layout. -/// -/// @param[in] source_rank_index is the rank of the process which contains the top left tile of the matrix, -/// @param[in] ptr is the pointer to the first element of the local part of the matrix, -/// @pre @p source_rank_index.isIn(grid_size), -/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix -/// stored in the given layout. -template -Matrix createMatrixFromTile(const GlobalElementSize& size, const TileElementSize& block_size, - const comm::CommunicatorGrid& comm, - const comm::Index2D& source_rank_index, T* ptr) { - Distribution distribution(size, block_size, comm.size(), comm.rank(), source_rank_index); - auto layout = tileLayout(distribution.localSize(), block_size); - - return Matrix(std::move(distribution), layout, ptr); -} +template +void Matrix::setUpRetiledSubPipelines(Matrix& mat, + const LocalTileSize& tiles_per_block) noexcept { + DLAF_ASSERT(mat.blockSize() == mat.tile_size(), mat.blockSize(), mat.tile_size()); -/// Create a distributed matrix of size @p size and block size @p block_size -/// on the given 2D communicator grid @p comm which references elements -/// that are already allocated in the memory with a tile layout. -/// -/// This method assumes @p source_rank_index to be {0,0}. -/// @param[in] ptr is the pointer to the first element of the local part of the matrix, -/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix -/// stored in the given layout. -template -Matrix createMatrixFromTile(const GlobalElementSize& size, const TileElementSize& block_size, - const comm::CommunicatorGrid& comm, T* ptr) { - return createMatrixFromTile(size, block_size, comm, {0, 0}, ptr); -} + using common::internal::vector; + namespace ex = pika::execution::experimental; -/// Create a distributed matrix of size @p size and block size @p block_size -/// on the given 2D communicator grid @p comm which references elements -/// that are already allocated in the memory with a tile layout. -/// -/// @param[in] ld_tile the leading dimension of the tiles, -/// @param[in] tiles_per_col the number of tiles stored for each column of tiles, -/// @param[in] source_rank_index is the rank of the process which contains the top left tile of the matrix, -/// @param[in] ptr is the pointer to the first element of the local part of the matrix, -/// @pre @p ld_tile >= max(1, min(block_size.row(), size.row())), -/// @pre @p tiles_per_col >= ceilDiv(size.row(), block_size.row()), -/// @pre @p source_rank_index.isIn(grid_size), -/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix -/// stored in the given layout. -template -Matrix createMatrixFromTile(const GlobalElementSize& size, const TileElementSize& block_size, - SizeType ld_tile, SizeType tiles_per_col, - const comm::CommunicatorGrid& comm, - const comm::Index2D& source_rank_index, T* ptr) { - Distribution distribution(size, block_size, comm.size(), comm.rank(), source_rank_index); - auto layout = tileLayout(distribution.localSize(), block_size, ld_tile, tiles_per_col); - - return Matrix(std::move(distribution), layout, ptr); -} + const auto n = to_sizet(distribution().local_nr_tiles().linear_size()); + tile_managers_.reserve(n); + for (std::size_t i = 0; i < n; ++i) { + tile_managers_.emplace_back(Tile()); + } -/// Create a distributed matrix of size @p size and block size @p block_size -/// on the given 2D communicator grid @p comm which references elements -/// that are already allocated in the memory with a tile layout. -/// -/// This method assumes @p source_rank_index to be {0,0}. -/// @param[in] ld_tile the leading dimension of the tiles, -/// @param[in] tiles_per_col the number of tiles stored for each column of tiles, -/// @param[in] ptr is the pointer to the first element of the local part of the matrix, -/// @pre @p ld_tile >= max(1, min(block_size.row(), size.row()), -/// @pre @p tiles_per_col >= ceilDiv(size.row(), block_size.col()), -/// @pre @p ptr refers to an allocated memory region which can contain the elements of the local matrix -/// stored in the given layout. -template -Matrix createMatrixFromTile(const GlobalElementSize& size, const TileElementSize& block_size, - SizeType ld_tile, SizeType tiles_per_col, - const comm::CommunicatorGrid& comm, T* ptr) { - return createMatrixFromTile(size, block_size, ld_tile, tiles_per_col, comm, {0, 0}, ptr); + const auto tile_size = distribution().tile_size(); + vector specs; + vector indices; + specs.reserve(tiles_per_block.linear_size()); + indices.reserve(tiles_per_block.linear_size()); + + // TODO: Optimize read-after-read. This is currently forced to access the base matrix in readwrite + // mode so that we can move the tile into the sub-pipeline. This is semantically not required and + // should eventually be optimized. + for (const auto& orig_tile_index : common::iterate_range2d(mat.distribution().local_nr_tiles())) { + const auto original_tile_size = mat.tileSize(mat.distribution().global_tile_index(orig_tile_index)); + + for (SizeType j = 0; j < original_tile_size.cols(); j += tile_size.cols()) + for (SizeType i = 0; i < original_tile_size.rows(); i += tile_size.rows()) { + indices.emplace_back( + LocalTileIndex{orig_tile_index.row() * tiles_per_block.rows() + i / tile_size.rows(), + orig_tile_index.col() * tiles_per_block.cols() + j / tile_size.cols()}); + specs.emplace_back(SubTileSpec{{i, j}, + tileSize(distribution().global_tile_index(indices.back()))}); + } + + auto sub_tiles = + splitTileDisjoint(mat.tile_managers_[mat.tileLinearIndex(orig_tile_index)].readwrite(), specs); + + DLAF_ASSERT_HEAVY(specs.size() == indices.size(), specs.size(), indices.size()); + for (SizeType j = 0; j < specs.size(); ++j) { + const auto i = tileLinearIndex(indices[j]); + + // Move subtile to be managed by the tile manager of RetiledMatrix. We + // use readwrite_with_wrapper to get access to the original tile managed + // by the underlying async_rw_mutex. + auto s = + ex::when_all(tile_managers_[i].readwrite_with_wrapper(), std::move(sub_tiles[to_sizet(j)])) | + ex::then([](internal::TileAsyncRwMutexReadWriteWrapper empty_tile_wrapper, + Tile sub_tile) { empty_tile_wrapper.get() = std::move(sub_tile); }); + ex::start_detached(std::move(s)); + } + + specs.clear(); + indices.clear(); + } } /// Returns a container grouping all the tiles retrieved using Matrix::read @@ -476,6 +468,3 @@ DLAF_MATRIX_ETI(extern, std::complex, Device::GPU) using matrix::Matrix; #endif } - -#include -#include diff --git a/include/dlaf/matrix/matrix.tpp b/include/dlaf/matrix/matrix.tpp deleted file mode 100644 index 08da1a38bc..0000000000 --- a/include/dlaf/matrix/matrix.tpp +++ /dev/null @@ -1,67 +0,0 @@ -// -// Distributed Linear Algebra with Future (DLAF) -// -// Copyright (c) 2018-2023, ETH Zurich -// All rights reserved. -// -// Please, refer to the LICENSE file in the root directory. -// SPDX-License-Identifier: BSD-3-Clause -// - -namespace dlaf { -namespace matrix { - -template -Matrix::Matrix(const LocalElementSize& size, const TileElementSize& block_size) - : Matrix(Distribution(size, block_size)) {} - -template -Matrix::Matrix(const GlobalElementSize& size, const TileElementSize& block_size, - const comm::CommunicatorGrid& comm) - : Matrix(Distribution(size, block_size, comm.size(), comm.rank(), {0, 0})) {} - -template -Matrix::Matrix(Distribution distribution) : Matrix(std::move(distribution)) { - const SizeType alignment = 64; - const SizeType ld = - std::max(1, - util::ceilDiv(this->distribution().localSize().rows(), alignment) * alignment); - - auto layout = colMajorLayout(this->distribution().localSize(), this->baseTileSize(), ld); - - SizeType memory_size = layout.minMemSize(); - memory::MemoryView mem(memory_size); - - setUpTiles(mem, layout); -} - -template -Matrix::Matrix(Distribution distribution, const LayoutInfo& layout) noexcept - : Matrix(std::move(distribution)) { - DLAF_ASSERT(this->distribution().localSize() == layout.size(), - "Size of distribution does not match layout size!", distribution.localSize(), - layout.size()); - DLAF_ASSERT(this->distribution().baseTileSize() == layout.blockSize(), distribution.baseTileSize(), - layout.blockSize()); - - memory::MemoryView mem(layout.minMemSize()); - - setUpTiles(mem, layout); -} - -template -Matrix::Matrix(Distribution distribution, const LayoutInfo& layout, ElementType* ptr) noexcept - : Matrix(std::move(distribution), layout, ptr) {} - -template -Matrix::Matrix(const LayoutInfo& layout, ElementType* ptr) : Matrix(layout, ptr) {} - -template -Matrix::Matrix(Matrix& mat, const SubPipelineTag tag) : Matrix(mat, tag) {} - -template -Matrix::Matrix(Matrix& mat, const LocalTileSize& tiles_per_block) - : Matrix(mat, tiles_per_block) {} - -} -} diff --git a/include/dlaf/matrix/matrix_base.h b/include/dlaf/matrix/matrix_base.h index 81c29adf55..636351ed06 100644 --- a/include/dlaf/matrix/matrix_base.h +++ b/include/dlaf/matrix/matrix_base.h @@ -26,16 +26,16 @@ class MatrixBase { MatrixBase(Distribution distribution) : distribution_(std::move(distribution)) {} MatrixBase(const Distribution& distribution, const LocalTileSize& tiles_per_block) - : distribution_(distribution.size(), distribution.blockSize(), - TileElementSize{distribution.blockSize().rows() / tiles_per_block.rows(), - distribution.blockSize().cols() / tiles_per_block.cols()}, - distribution.commGridSize(), distribution.rankIndex(), - distribution.sourceRankIndex()) { - DLAF_ASSERT(distribution.blockSize() == distribution.baseTileSize(), + : distribution_(distribution.size(), distribution.block_size(), + TileElementSize{distribution.block_size().rows() / tiles_per_block.rows(), + distribution.block_size().cols() / tiles_per_block.cols()}, + distribution.grid_size(), distribution.rank_index(), + distribution.source_rank_index()) { + DLAF_ASSERT(distribution.block_size() == distribution.tile_size(), "distribution should be the distribution of the original Matrix.", - distribution.blockSize(), distribution.baseTileSize()); - DLAF_ASSERT(distribution.blockSize() == distribution_.blockSize(), distribution.blockSize(), - distribution_.blockSize()); + distribution.block_size(), distribution.tile_size()); + DLAF_ASSERT(distribution.block_size() == distribution_.block_size(), distribution.block_size(), + distribution_.block_size()); } MatrixBase(const MatrixBase& rhs) = default; @@ -46,49 +46,77 @@ class MatrixBase { return distribution_.size(); } - /// Returns the block size of the matrix. + /// Returns the complete block size of the matrix. + const TileElementSize& block_size() const noexcept { + return distribution_.block_size(); + } + + /// Returns the complete tile size of the matrix. + const TileElementSize& tile_size() const noexcept { + return distribution_.tile_size(); + } + + /// Returns the number of tiles of the global matrix (2D size). + const GlobalTileSize& nr_tiles() const noexcept { + return distribution_.nr_tiles(); + } + + /// Returns the id associated to the matrix of this rank. + const comm::Index2D& rank_index() const noexcept { + return distribution_.rank_index(); + } + + /// Returns the size of the communicator grid associated to the matrix. + const comm::Size2D& grid_size() const noexcept { + return distribution_.grid_size(); + } + + /// Returns the 2D rank index of the process that stores the tile with global index @p global_tile. + /// + /// @pre global_tile.isIn(nrTiles()). + comm::Index2D rank_global_tile(const GlobalTileIndex& global_tile) const noexcept { + return distribution_.rank_global_tile(global_tile); + } + + /// Returns the size of the Tile with global index @p index. + TileElementSize tile_size_of(const GlobalTileIndex& index) const noexcept { + return distribution_.tile_size_of(index); + } + + /// Returns the distribution of the matrix. + const matrix::Distribution& distribution() const noexcept { + return distribution_; + } + + // TODO remove deprecated const TileElementSize& blockSize() const noexcept { return distribution_.blockSize(); } - /// Returns the complete tile size of the matrix. const TileElementSize& baseTileSize() const noexcept { return distribution_.baseTileSize(); } - /// Returns the number of tiles of the global matrix (2D size). const GlobalTileSize& nrTiles() const noexcept { return distribution_.nrTiles(); } - /// Returns the id associated to the matrix of this rank. const comm::Index2D& rankIndex() const noexcept { return distribution_.rankIndex(); } - /// const comm::Index2D& sourceRankIndex() const noexcept { return distribution_.sourceRankIndex(); } - /// Returns the size of the communicator grid associated to the matrix. const comm::Size2D& commGridSize() const noexcept { return distribution_.commGridSize(); } - /// Returns the 2D rank index of the process that stores the tile with global index @p global_tile. - /// - /// @pre global_tile.isIn(nrTiles()). comm::Index2D rankGlobalTile(const GlobalTileIndex& global_tile) const noexcept { return distribution_.rankGlobalTile(global_tile); } - /// Returns the distribution of the matrix. - const matrix::Distribution& distribution() const noexcept { - return distribution_; - } - - /// Returns the size of the Tile with global index @p index. TileElementSize tileSize(const GlobalTileIndex& index) const noexcept { return distribution_.tileSize(index); } @@ -105,19 +133,26 @@ class MatrixBase { /// Returns the position in the vector of the index Tile. /// /// @pre index.isIn(localNrTiles()). + std::size_t tile_linear_index(const LocalTileIndex& index) const noexcept { + DLAF_ASSERT_MODERATE(index.isIn(distribution_.local_nr_tiles()), index, + distribution_.local_nr_tiles()); + return to_sizet(index.row() + distribution_.local_nr_tiles().rows() * index.col()); + } + + // TODO DEPRECATE std::size_t tileLinearIndex(const LocalTileIndex& index) const noexcept { - DLAF_ASSERT_MODERATE(index.isIn(distribution_.localNrTiles()), index, distribution_.localNrTiles()); - return to_sizet(index.row() + distribution_.localNrTiles().rows() * index.col()); + return tile_linear_index(index); } /// Prints information about the matrix. friend std::ostream& operator<<(std::ostream& out, const MatrixBase& matrix) { // clang-format off return out << "size=" << matrix.size() - << ", block_size=" << matrix.blockSize() - << ", tiles_grid=" << matrix.nrTiles() - << ", rank_index=" << matrix.rankIndex() - << ", comm_grid=" << matrix.commGridSize(); + << ", block_size=" << matrix.block_size() + << ", tile_size=" << matrix.tile_size() + << ", tiles_grid=" << matrix.nr_tiles() + << ", rank_index=" << matrix.rank_index() + << ", comm_grid=" << matrix.grid_size(); // clang-format on } diff --git a/include/dlaf/matrix/matrix_const.tpp b/include/dlaf/matrix/matrix_const.tpp deleted file mode 100644 index 1c60d6f855..0000000000 --- a/include/dlaf/matrix/matrix_const.tpp +++ /dev/null @@ -1,163 +0,0 @@ -// -// Distributed Linear Algebra with Future (DLAF) -// -// Copyright (c) 2018-2023, ETH Zurich -// All rights reserved. -// -// Please, refer to the LICENSE file in the root directory. -// SPDX-License-Identifier: BSD-3-Clause -// - -#include -#include - -namespace dlaf { -namespace matrix { - -template -Matrix::Matrix(const LayoutInfo& layout, ElementType* ptr) - : MatrixBase({layout.size(), layout.blockSize()}) { - memory::MemoryView mem(ptr, layout.minMemSize()); - setUpTiles(mem, layout); -} - -template -Matrix::Matrix(Distribution distribution, const matrix::LayoutInfo& layout, - ElementType* ptr) noexcept - : MatrixBase(std::move(distribution)) { - DLAF_ASSERT(this->distribution().localSize() == layout.size(), distribution.localSize(), - layout.size()); - DLAF_ASSERT(this->distribution().baseTileSize() == layout.blockSize(), distribution.baseTileSize(), - layout.blockSize()); - - memory::MemoryView mem(ptr, layout.minMemSize()); - setUpTiles(mem, layout); -} - -template -void Matrix::waitLocalTiles() noexcept { - // Note: - // Using a readwrite access to the tile ensures that the access is exclusive and not shared - // among multiple tasks. - - const auto range_local = common::iterate_range2d(distribution().localNrTiles()); - - auto s = pika::execution::experimental::when_all_vector(internal::selectGeneric( - [this](const LocalTileIndex& index) { - return this->tile_managers_[tileLinearIndex(index)].readwrite(); - }, - range_local)) | - pika::execution::experimental::drop_value(); - pika::this_thread::experimental::sync_wait(std::move(s)); -} - -template -void Matrix::setUpTiles(const memory::MemoryView& mem, - const LayoutInfo& layout) noexcept { - const auto& nr_tiles = layout.nrTiles(); - - DLAF_ASSERT(tile_managers_.empty(), ""); - tile_managers_.reserve(to_sizet(nr_tiles.linear_size())); - - using MemView = memory::MemoryView; - - for (SizeType j = 0; j < nr_tiles.cols(); ++j) { - for (SizeType i = 0; i < nr_tiles.rows(); ++i) { - LocalTileIndex ind(i, j); - TileElementSize tile_size = layout.tileSize(ind); - tile_managers_.emplace_back( - TileDataType(tile_size, MemView(mem, layout.tileOffset(ind), layout.minTileMemSize(tile_size)), - layout.ldTile())); - } - } -} - -template -Matrix::Matrix(Matrix& mat, const SubPipelineTag) - : MatrixBase(mat.distribution()) { - setUpSubPipelines(mat); -} - -template -Matrix::Matrix(Matrix& mat, const LocalTileSize& tiles_per_block) - : MatrixBase(mat.distribution(), tiles_per_block) { - setUpRetiledSubPipelines(mat, tiles_per_block); -} - -template -void Matrix::setUpSubPipelines(Matrix& mat) noexcept { - namespace ex = pika::execution::experimental; - - // TODO: Optimize read-after-read. This is currently forced to access the base - // matrix in readwrite mode so that we can move the tile into the - // sub-pipeline. This is semantically not required and should eventually be - // optimized. - tile_managers_.reserve(mat.tile_managers_.size()); - for (auto& tm : mat.tile_managers_) { - tile_managers_.emplace_back(Tile()); - auto s = ex::when_all(tile_managers_.back().readwrite_with_wrapper(), tm.readwrite()) | - ex::then([](internal::TileAsyncRwMutexReadWriteWrapper empty_tile_wrapper, - Tile tile) { empty_tile_wrapper.get() = std::move(tile); }); - ex::start_detached(std::move(s)); - } -} - -template -void Matrix::setUpRetiledSubPipelines(Matrix& mat, - const LocalTileSize& tiles_per_block) noexcept { - DLAF_ASSERT(mat.blockSize() == mat.baseTileSize(), mat.blockSize(), mat.baseTileSize()); - - using common::internal::vector; - namespace ex = pika::execution::experimental; - - const auto n = to_sizet(distribution().localNrTiles().linear_size()); - tile_managers_.reserve(n); - for (std::size_t i = 0; i < n; ++i) { - tile_managers_.emplace_back(Tile()); - } - - const auto tile_size = distribution().baseTileSize(); - vector specs; - vector indices; - specs.reserve(tiles_per_block.linear_size()); - indices.reserve(tiles_per_block.linear_size()); - - // TODO: Optimize read-after-read. This is currently forced to access the base matrix in readwrite mode - // so that we can move the tile into the sub-pipeline. This is semantically not required and should - // eventually be optimized. - for (const auto& orig_tile_index : common::iterate_range2d(mat.distribution().localNrTiles())) { - const auto original_tile_size = mat.tileSize(mat.distribution().globalTileIndex(orig_tile_index)); - - for (SizeType j = 0; j < original_tile_size.cols(); j += tile_size.cols()) - for (SizeType i = 0; i < original_tile_size.rows(); i += tile_size.rows()) { - indices.emplace_back( - LocalTileIndex{orig_tile_index.row() * tiles_per_block.rows() + i / tile_size.rows(), - orig_tile_index.col() * tiles_per_block.cols() + j / tile_size.cols()}); - specs.emplace_back(SubTileSpec{{i, j}, - tileSize(distribution().globalTileIndex(indices.back()))}); - } - - auto sub_tiles = - splitTileDisjoint(mat.tile_managers_[mat.tileLinearIndex(orig_tile_index)].readwrite(), specs); - - DLAF_ASSERT_HEAVY(specs.size() == indices.size(), specs.size(), indices.size()); - for (SizeType j = 0; j < specs.size(); ++j) { - const auto i = tileLinearIndex(indices[j]); - - // Move subtile to be managed by the tile manager of RetiledMatrix. We - // use readwrite_with_wrapper to get access to the original tile managed - // by the underlying async_rw_mutex. - auto s = - ex::when_all(tile_managers_[i].readwrite_with_wrapper(), std::move(sub_tiles[to_sizet(j)])) | - ex::then([](internal::TileAsyncRwMutexReadWriteWrapper empty_tile_wrapper, - Tile sub_tile) { empty_tile_wrapper.get() = std::move(sub_tile); }); - ex::start_detached(std::move(s)); - } - - specs.clear(); - indices.clear(); - } -} - -} -} diff --git a/include/dlaf/util_matrix.h b/include/dlaf/util_matrix.h index 4de51211c5..1f6efb443f 100644 --- a/include/dlaf/util_matrix.h +++ b/include/dlaf/util_matrix.h @@ -46,13 +46,13 @@ bool square_size(const MatrixLike& m) noexcept { /// Returns true if the matrix block size is square. template bool square_blocksize(const MatrixLike& m) noexcept { - return m.blockSize().rows() == m.blockSize().cols(); + return m.block_size().rows() == m.block_size().cols(); } /// Returns true if the matrix has a single tile per block. template bool single_tile_per_block(const MatrixLike& m) noexcept { - return m.blockSize() == m.baseTileSize(); + return m.block_size() == m.tile_size(); } /// Returns true if matrices have equal sizes. @@ -64,7 +64,7 @@ bool equal_size(const MatrixLikeA& lhs, const MatrixLikeB& rhs) noexcept { /// Returns true if matrices have equal blocksizes. template bool equal_blocksize(const Matrix& lhs, Matrix& rhs) noexcept { - return lhs.blockSize() == rhs.blockSize(); + return lhs.block_size() == rhs.block_size(); } /// Returns true if the matrix is local to a process. @@ -102,7 +102,7 @@ template bool multipliable(const Matrix& a, const Matrix& b, const Matrix& c, const blas::Op opA, const blas::Op opB) noexcept { return multipliable_sizes(a.size(), b.size(), c.size(), opA, opB) && - multipliable_sizes(a.blockSize(), b.blockSize(), c.blockSize(), opA, opB); + multipliable_sizes(a.block_size(), b.block_size(), c.block_size(), opA, opB); } namespace util { @@ -161,7 +161,7 @@ void set0(pika::execution::thread_priority priority, LocalTileIndex begin, Local /// template void set0(pika::execution::thread_priority priority, Matrix& matrix) { - set0(priority, LocalTileIndex(0, 0), matrix.distribution().localNrTiles(), matrix); + set0(priority, LocalTileIndex(0, 0), matrix.distribution().local_nr_tiles(), matrix); } /// Sets all the elements of all the tiles in the active range to zero @@ -183,9 +183,9 @@ void set0(pika::execution::thread_priority priority, Panel& template void set(Matrix& matrix, ElementGetter el_f) { const Distribution& dist = matrix.distribution(); - for (auto tile_wrt_local : iterate_range2d(dist.localNrTiles())) { - GlobalTileIndex tile_wrt_global = dist.globalTileIndex(tile_wrt_local); - auto tl_index = dist.globalElementIndex(tile_wrt_global, {0, 0}); + for (auto tile_wrt_local : iterate_range2d(dist.local_nr_tiles())) { + GlobalTileIndex tile_wrt_global = dist.global_tile_index(tile_wrt_local); + auto tl_index = dist.global_element_index(tile_wrt_global, {0, 0}); using TileType = typename std::decay_t::TileType; auto set_f = [tl_index, el_f = el_f](const TileType& tile) { @@ -242,9 +242,9 @@ void set(Matrix& matrix, ElementGetter el_f, const blas::Op op) template void set_random(Matrix& matrix) { const Distribution& dist = matrix.distribution(); - for (auto tile_wrt_local : iterate_range2d(dist.localNrTiles())) { - GlobalTileIndex tile_wrt_global = dist.globalTileIndex(tile_wrt_local); - auto tl_index = dist.globalElementIndex(tile_wrt_global, {0, 0}); + for (auto tile_wrt_local : iterate_range2d(dist.local_nr_tiles())) { + GlobalTileIndex tile_wrt_global = dist.global_tile_index(tile_wrt_local); + auto tl_index = dist.global_element_index(tile_wrt_global, {0, 0}); auto seed = tl_index.col() + tl_index.row() * matrix.size().cols(); using TileType = typename std::decay_t::TileType; @@ -333,12 +333,12 @@ void set_random_hermitian_with_offset(Matrix& matrix, const Size DLAF_ASSERT(square_size(matrix), matrix); DLAF_ASSERT(square_blocksize(matrix), matrix); - auto full_tile_size = matrix.blockSize(); + auto full_tile_size = matrix.block_size(); - for (auto tile_wrt_local : iterate_range2d(dist.localNrTiles())) { - GlobalTileIndex tile_wrt_global = dist.globalTileIndex(tile_wrt_local); + for (auto tile_wrt_local : iterate_range2d(dist.local_nr_tiles())) { + GlobalTileIndex tile_wrt_global = dist.global_tile_index(tile_wrt_local); - auto tl_index = dist.globalElementIndex(tile_wrt_global, {0, 0}); + auto tl_index = dist.global_element_index(tile_wrt_global, {0, 0}); // compute the same seed for original and "transposed" tiles, so transposed ones will know the // values of the original one without the need of accessing real values (nor communication in case diff --git a/src/c_api/eigensolver/eigensolver.h b/src/c_api/eigensolver/eigensolver.h index 8040eb1ce3..4b71e712da 100644 --- a/src/c_api/eigensolver/eigensolver.h +++ b/src/c_api/eigensolver/eigensolver.h @@ -17,6 +17,7 @@ #include #include +#include #include #include #include diff --git a/test/include/dlaf_test/matrix/matrix_local.h b/test/include/dlaf_test/matrix/matrix_local.h index 011ad2fe0e..ff4fcbb549 100644 --- a/test/include/dlaf_test/matrix/matrix_local.h +++ b/test/include/dlaf_test/matrix/matrix_local.h @@ -97,6 +97,10 @@ struct MatrixLocal { return GlobalElementSize{layout_.size().rows(), layout_.size().cols()}; } + TileElementSize block_size() const noexcept { + return layout_.blockSize(); + } + TileElementSize blockSize() const noexcept { return layout_.blockSize(); } diff --git a/test/unit/matrix/test_matrix.cpp b/test/unit/matrix/test_matrix.cpp index 917ada9614..dc021bad87 100644 --- a/test/unit/matrix/test_matrix.cpp +++ b/test/unit/matrix/test_matrix.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include