Skip to content

Commit

Permalink
Support different tile and block size in Matrix (#909)
Browse files Browse the repository at this point in the history
  • Loading branch information
msimberg authored Jul 28, 2023
1 parent df5ae82 commit 9b7f0e0
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 71 deletions.
4 changes: 2 additions & 2 deletions include/dlaf/matrix/distribution.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class Distribution {
/// @param[in] source_rank_index is the rank of the process which contains the top left tile of the matrix,
/// @param[in] element_offset is the element-wise offset of the top left tile of the matrix ,
/// @pre size.isValid(),
/// @pre !tile_size.isEmpty(),
/// @pre !block_size.isEmpty(),
/// @pre !grid_size.isEmpty(),
/// @pre rank_index.isIn(grid_size),
/// @pre source_rank_index.isIn(grid_size).
Expand All @@ -62,7 +62,7 @@ class Distribution {
/// @param[in] element_offset is the element-wise offset of the top left tile
/// of the matrix, used in addition to @p tile_offset,
/// @pre size.isValid(),
/// @pre !tile_size.isEmpty(),
/// @pre !block_size.isEmpty(),
/// @pre !grid_size.isEmpty(),
/// @pre rank_index.isIn(grid_size),
/// @pre source_rank_index.isIn(grid_size).
Expand Down
4 changes: 2 additions & 2 deletions include/dlaf/matrix/matrix.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Matrix<T, D>::Matrix(Distribution distribution) : Matrix<const T, D>(std::move(d
std::max<SizeType>(1,
util::ceilDiv(this->distribution().localSize().rows(), alignment) * alignment);

auto layout = colMajorLayout(this->distribution().localSize(), this->blockSize(), ld);
auto layout = colMajorLayout(this->distribution().localSize(), this->baseTileSize(), ld);

SizeType memory_size = layout.minMemSize();
memory::MemoryView<ElementType, D> mem(memory_size);
Expand All @@ -41,7 +41,7 @@ Matrix<T, D>::Matrix(Distribution distribution, const LayoutInfo& layout) noexce
DLAF_ASSERT(this->distribution().localSize() == layout.size(),
"Size of distribution does not match layout size!", distribution.localSize(),
layout.size());
DLAF_ASSERT(this->distribution().blockSize() == layout.blockSize(), distribution.blockSize(),
DLAF_ASSERT(this->distribution().baseTileSize() == layout.blockSize(), distribution.baseTileSize(),
layout.blockSize());

memory::MemoryView<ElementType, D> mem(layout.minMemSize());
Expand Down
5 changes: 1 addition & 4 deletions include/dlaf/matrix/matrix_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@ namespace internal {

class MatrixBase {
public:
MatrixBase(Distribution distribution) : distribution_(std::move(distribution)) {
DLAF_ASSERT(distribution.blockSize() == distribution.baseTileSize(),
"Multi Tile distribution block is not supperted by Matrix yet.");
}
MatrixBase(Distribution distribution) : distribution_(std::move(distribution)) {}

MatrixBase(const Distribution& distribution, const LocalTileSize& tiles_per_block)
: distribution_(distribution.size(), distribution.blockSize(),
Expand Down
2 changes: 1 addition & 1 deletion include/dlaf/matrix/matrix_const.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ Matrix<const T, D>::Matrix(Distribution distribution, const matrix::LayoutInfo&
: MatrixBase(std::move(distribution)) {
DLAF_ASSERT(this->distribution().localSize() == layout.size(), distribution.localSize(),
layout.size());
DLAF_ASSERT(this->distribution().blockSize() == layout.blockSize(), distribution.blockSize(),
DLAF_ASSERT(this->distribution().baseTileSize() == layout.blockSize(), distribution.baseTileSize(),
layout.blockSize());

memory::MemoryView<ElementType, D> mem(ptr, layout.minMemSize());
Expand Down
4 changes: 2 additions & 2 deletions test/include/dlaf_test/matrix/util_matrix_local.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ template <class T>
MatrixLocal<T> allGather(blas::Uplo uplo, Matrix<const T, Device::CPU>& source) {
DLAF_ASSERT(matrix::local_matrix(source), source);

MatrixLocal<std::remove_const_t<T>> dest(source.size(), source.blockSize());
MatrixLocal<std::remove_const_t<T>> dest(source.size(), source.baseTileSize());

auto targeted_tile = internal::checkerForIndexIn(uplo);

Expand All @@ -103,7 +103,7 @@ MatrixLocal<T> allGather(blas::Uplo uplo, Matrix<const T, Device::CPU>& source,
comm::CommunicatorGrid comm_grid) {
DLAF_ASSERT(matrix::equal_process_grid(source, comm_grid), source, comm_grid);

MatrixLocal<std::remove_const_t<T>> dest(source.size(), source.blockSize());
MatrixLocal<std::remove_const_t<T>> dest(source.size(), source.baseTileSize());

const auto& dist_source = source.distribution();
const auto rank = dist_source.rankIndex();
Expand Down
4 changes: 2 additions & 2 deletions test/unit/eigensolver/test_reduction_to_band.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ std::vector<config_t> configs_subband{

template <class T>
MatrixLocal<T> makeLocal(const Matrix<const T, Device::CPU>& matrix) {
return {matrix.size(), matrix.distribution().blockSize()};
return {matrix.size(), matrix.distribution().baseTileSize()};
}

template <class T>
Expand All @@ -125,7 +125,7 @@ void setupHermitianBand(MatrixLocal<T>& matrix, const SizeType band_size) {
DLAF_ASSERT(matrix.blockSize().rows() % band_size == 0, band_size, matrix.blockSize().rows());

DLAF_ASSERT(square_blocksize(matrix), matrix.blockSize());
DLAF_ASSERT(square_size(matrix), matrix.blockSize());
DLAF_ASSERT(square_size(matrix), matrix.size());

dlaf::common::internal::SingleThreadedBlasScope single;

Expand Down
67 changes: 42 additions & 25 deletions test/unit/matrix/test_matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <dlaf/communication/communicator_grid.h>
#include <dlaf/matrix/copy.h>
#include <dlaf/matrix/distribution.h>
#include <dlaf/matrix/matrix.h>
#include <dlaf/matrix/matrix_mirror.h>
#include <dlaf/util_matrix.h>
Expand Down Expand Up @@ -55,16 +56,17 @@ TYPED_TEST_SUITE(MatrixTest, MatrixElementTypes);
struct TestSizes {
LocalElementSize size;
TileElementSize block_size;
TileElementSize tile_size;
};

const std::vector<TestSizes> sizes_tests({
{{0, 0}, {11, 13}},
{{3, 0}, {1, 2}},
{{0, 1}, {7, 32}},
{{15, 18}, {5, 9}},
{{6, 6}, {2, 2}},
{{3, 4}, {24, 15}},
{{16, 24}, {3, 5}},
{{0, 0}, {11, 13}, {11, 13}},
{{3, 0}, {1, 2}, {1, 1}},
{{0, 1}, {7, 32}, {7, 8}},
{{15, 18}, {5, 9}, {5, 3}},
{{6, 6}, {2, 2}, {2, 2}},
{{3, 4}, {24, 15}, {8, 15}},
{{16, 24}, {3, 5}, {3, 5}},
});

GlobalElementSize globalTestSize(const LocalElementSize& size, const Size2D& grid_size) {
Expand Down Expand Up @@ -195,8 +197,8 @@ TYPED_TEST(MatrixTest, ConstructorFromDistribution) {
GlobalElementSize size = globalTestSize(test.size, comm_grid.size());
comm::Index2D src_rank_index(std::max(0, comm_grid.size().rows() - 1),
std::min(1, comm_grid.size().cols() - 1));
Distribution distribution(size, test.block_size, comm_grid.size(), comm_grid.rank(),
src_rank_index);
Distribution distribution(size, test.block_size, test.tile_size, comm_grid.size(),
comm_grid.rank(), src_rank_index);

// Copy distribution for testing purpose.
Distribution distribution_copy(distribution);
Expand Down Expand Up @@ -519,22 +521,34 @@ TYPED_TEST(MatrixTest, LocalGlobalAccessRead) {
struct ExistingLocalTestSizes {
LocalElementSize size;
TileElementSize block_size;
TileElementSize tile_size;
SizeType ld;
SizeType row_offset;
SizeType col_offset;
};

const std::vector<ExistingLocalTestSizes> existing_local_tests({
{{10, 7}, {3, 4}, 10, 3, 40}, // Column major layout
{{10, 7}, {3, 4}, 11, 3, 44}, // with padding (ld)
{{10, 7}, {3, 4}, 13, 4, 52}, // with padding (row)
{{10, 7}, {3, 4}, 10, 3, 41}, // with padding (col)
{{6, 11}, {4, 3}, 4, 12, 24}, // Tile layout
{{6, 11}, {4, 3}, 5, 15, 30}, // with padding (ld)
{{6, 11}, {4, 3}, 4, 13, 26}, // with padding (row)
{{6, 11}, {4, 3}, 4, 12, 31}, // with padding (col)
{{6, 11}, {4, 3}, 4, 12, 28}, // compressed col_offset
{{0, 0}, {1, 1}, 1, 1, 1},
{{10, 7}, {3, 4}, {3, 4}, 10, 3, 40}, // Column major layout
{{10, 7}, {3, 4}, {3, 4}, 11, 3, 44}, // with padding (ld)
{{10, 7}, {3, 4}, {3, 4}, 13, 4, 52}, // with padding (row)
{{10, 7}, {3, 4}, {3, 4}, 10, 3, 41}, // with padding (col)
{{6, 11}, {4, 3}, {4, 3}, 4, 12, 24}, // Tile layout
{{6, 11}, {4, 3}, {4, 3}, 5, 15, 30}, // with padding (ld)
{{6, 11}, {4, 3}, {4, 3}, 4, 13, 26}, // with padding (row)
{{6, 11}, {4, 3}, {4, 3}, 4, 12, 31}, // with padding (col)
{{6, 11}, {4, 3}, {4, 3}, 4, 12, 28}, // compressed col_offset
{{0, 0}, {1, 1}, {1, 1}, 1, 1, 1},
// Same, but with block_size != tile_size
{{10, 7}, {3, 4}, {3, 2}, 10, 3, 40}, // Column major layout
{{10, 7}, {3, 4}, {3, 2}, 11, 3, 44}, // with padding (ld)
{{10, 7}, {3, 4}, {3, 2}, 13, 4, 52}, // with padding (row)
{{10, 7}, {3, 4}, {3, 1}, 10, 3, 41}, // with padding (col)
{{6, 11}, {4, 3}, {2, 3}, 4, 12, 24}, // Tile layout
{{6, 11}, {4, 3}, {2, 3}, 5, 15, 30}, // with padding (ld)
{{6, 11}, {4, 3}, {2, 3}, 4, 13, 26}, // with padding (row)
{{6, 11}, {4, 3}, {1, 3}, 4, 12, 31}, // with padding (col)
{{6, 11}, {4, 3}, {1, 3}, 4, 12, 28}, // compressed col_offset
{{0, 0}, {1, 1}, {1, 1}, 1, 1, 1},
});

TYPED_TEST(MatrixLocalTest, ConstructorExisting) {
Expand Down Expand Up @@ -585,8 +599,9 @@ TYPED_TEST(MatrixTest, ConstructorExisting) {
for (const auto& comm_grid : this->commGrids()) {
for (const auto& test : sizes_tests) {
GlobalElementSize size = globalTestSize(test.size, comm_grid.size());
Distribution distribution(size, test.block_size, comm_grid.size(), comm_grid.rank(), {0, 0});
LayoutInfo layout = tileLayout(distribution.localSize(), test.block_size);
Distribution distribution(size, test.block_size, test.tile_size, comm_grid.size(),
comm_grid.rank(), {0, 0});
LayoutInfo layout = tileLayout(distribution.localSize(), test.tile_size);
memory::MemoryView<Type, Device::CPU> mem(layout.minMemSize());

// Copy distribution for testing purpose.
Expand Down Expand Up @@ -615,8 +630,9 @@ TYPED_TEST(MatrixTest, ConstructorExistingConst) {
for (const auto& comm_grid : this->commGrids()) {
for (const auto& test : sizes_tests) {
GlobalElementSize size = globalTestSize(test.size, comm_grid.size());
Distribution distribution(size, test.block_size, comm_grid.size(), comm_grid.rank(), {0, 0});
LayoutInfo layout = colMajorLayout(distribution.localSize(), test.block_size,
Distribution distribution(size, test.block_size, test.tile_size, comm_grid.size(),
comm_grid.rank(), {0, 0});
LayoutInfo layout = colMajorLayout(distribution.localSize(), test.tile_size,
std::max<SizeType>(1, distribution.localSize().rows()));
memory::MemoryView<Type, Device::CPU> mem(layout.minMemSize());

Expand Down Expand Up @@ -916,8 +932,9 @@ TYPED_TEST(MatrixTest, DependenciesConstSubPipelineConst) {
for (const auto& test : sizes_tests) {
GlobalElementSize size = globalTestSize(test.size, comm_grid.size());

Distribution distribution(size, test.block_size, comm_grid.size(), comm_grid.rank(), {0, 0});
LayoutInfo layout = tileLayout(distribution.localSize(), test.block_size);
Distribution distribution(size, test.block_size, test.tile_size, comm_grid.size(),
comm_grid.rank(), {0, 0});
LayoutInfo layout = tileLayout(distribution.localSize(), test.tile_size);
memory::MemoryView<Type, Device::CPU> mem(layout.minMemSize());
const Type* p = mem();
Matrix<const Type, Device::CPU> mat(std::move(distribution), layout, p);
Expand Down
28 changes: 15 additions & 13 deletions test/unit/matrix/test_matrix_local.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <sstream>
#include <vector>

#include <dlaf/matrix/distribution.h>
#include <dlaf/matrix/matrix.h>
#include <dlaf/util_math.h>

Expand Down Expand Up @@ -41,13 +42,14 @@ T value_preset(const GlobalElementIndex& index) {
struct TestSizes {
GlobalElementSize size;
TileElementSize block_size;
TileElementSize tile_size;
};

const std::vector<TestSizes> sizes_tests({
{{15, 18}, {5, 9}},
{{6, 6}, {2, 2}},
{{3, 4}, {24, 15}},
{{16, 24}, {3, 5}},
{{15, 18}, {5, 9}, {5, 3}},
{{6, 6}, {2, 2}, {2, 2}},
{{3, 4}, {24, 15}, {8, 15}},
{{16, 24}, {3, 5}, {3, 5}},
});

template <typename Type>
Expand All @@ -57,14 +59,14 @@ TYPED_TEST_SUITE(MatrixLocalTest, MatrixElementTypes);

TYPED_TEST(MatrixLocalTest, ConstructorAndShape) {
for (const auto& test : sizes_tests) {
const MatrixLocal<const TypeParam> mat(test.size, test.block_size);
const MatrixLocal<const TypeParam> mat(test.size, test.tile_size);

EXPECT_EQ(test.size, mat.size());
EXPECT_EQ(test.block_size, mat.blockSize());
EXPECT_EQ(test.tile_size, mat.blockSize());

const GlobalTileSize nrTiles{
dlaf::util::ceilDiv(test.size.rows(), test.block_size.rows()),
dlaf::util::ceilDiv(test.size.cols(), test.block_size.cols()),
dlaf::util::ceilDiv(test.size.rows(), test.tile_size.rows()),
dlaf::util::ceilDiv(test.size.cols(), test.tile_size.cols()),
};
EXPECT_EQ(nrTiles, mat.nrTiles());

Expand All @@ -76,7 +78,7 @@ TYPED_TEST(MatrixLocalTest, Set) {
constexpr auto error = TypeUtilities<TypeParam>::error;

for (const auto& test : sizes_tests) {
MatrixLocal<TypeParam> mat(test.size, test.block_size);
MatrixLocal<TypeParam> mat(test.size, test.tile_size);

set(mat, value_preset<TypeParam>);

Expand All @@ -89,12 +91,12 @@ TYPED_TEST(MatrixLocalTest, Copy) {

for (const auto& config : sizes_tests) {
MatrixLocal<const TypeParam> source = [&config]() {
MatrixLocal<TypeParam> source(config.size, config.block_size);
MatrixLocal<TypeParam> source(config.size, config.tile_size);
set(source, value_preset<TypeParam>);
return source;
}();

MatrixLocal<TypeParam> dest(config.size, config.block_size);
MatrixLocal<TypeParam> dest(config.size, config.tile_size);

copy(source, dest);

Expand Down Expand Up @@ -195,8 +197,8 @@ TYPED_TEST(MatrixLocalWithCommTest, AllGather) {
const GlobalElementSize size = globalTestSize(config.size, comm_grid.size());
comm::Index2D src_rank_index(std::max(0, comm_grid.size().rows() - 1),
std::min(1, comm_grid.size().cols() - 1));
Distribution distribution(size, config.block_size, comm_grid.size(), comm_grid.rank(),
src_rank_index);
Distribution distribution(size, config.block_size, config.tile_size, comm_grid.size(),
comm_grid.rank(), src_rank_index);

Matrix<TypeParam, Device::CPU> source(std::move(distribution));

Expand Down
Loading

0 comments on commit 9b7f0e0

Please sign in to comment.