Skip to content

Commit

Permalink
Clean up permutations implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
msimberg committed Aug 7, 2023
1 parent 5781370 commit 68e3064
Showing 1 changed file with 14 additions and 17 deletions.
31 changes: 14 additions & 17 deletions include/dlaf/permutations/general/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -468,31 +468,28 @@ void unpackLocalOnCPU(const matrix::Distribution& subm_dist, const matrix::Distr
[offset](const SizeType perm) { return perm + offset; });

constexpr auto OC = orthogonal(C);
// TODO: Always 0?
const SizeType in_offset = 0;
// TODO: Always {0, 0}?
const GlobalElementIndex out_begin{0, 0};

std::vector<SizeType> splits =
dlaf::util::interleaveSplits(sz.get<OC>(), subm_dist.blockSize().get<OC>(),
subm_dist.distanceToAdjacentTile<OC>(in_offset),
subm_dist.distanceToAdjacentTile<OC>(out_begin.get<OC>()));

// TODO: send/recv_counts not really needed in next step?
return std::tuple(a, b, std::move(splits), std::move(perm_offseted), std::move(send_counts),
std::move(recv_counts), std::move(index_tile_futs), std::move(mat_in_tiles),
return std::tuple(a, b, std::move(splits), std::move(perm_offseted), std::move(mat_in_tiles),
std::move(mat_out_tiles));
};

auto permutations_unpack_local_f = [subm_dist](const auto i_perm, const auto& args) {
auto& [a, b, splits, perm_offseted, send_counts, recv_counts, index_tile_futs, mat_in_tiles,
mat_out_tiles] = args;
const SizeType* perm_arr = perm_offseted.data(); // index_tile_futs[0].get().ptr();
auto& [a, b, splits, perm_offseted, mat_in_tiles, mat_out_tiles] = args;
const SizeType* perm_arr = perm_offseted.data();

// [a, b)
if (a <= perm_arr[i_perm] && perm_arr[i_perm] < b) {
applyPermutationOnCPU<T, C>(i_perm, splits, {0, 0}, 0, subm_dist, perm_arr, mat_in_tiles,
mat_out_tiles);
const SizeType in_offset = 0;
const GlobalElementIndex out_begin{0, 0};
applyPermutationOnCPU<T, C>(i_perm, splits, out_begin, in_offset, subm_dist, perm_arr,
mat_in_tiles, mat_out_tiles);
}
};

Expand Down Expand Up @@ -522,28 +519,28 @@ void unpackOthersOnCPU(const matrix::Distribution& subm_dist, const matrix::Dist

constexpr auto OC = orthogonal(C);
const GlobalElementSize sz = subm_dist.size();
// TODO: Always 0?
const SizeType in_offset = 0;
// TODO: Always {0, 0}?
const GlobalElementIndex out_begin{0, 0};

std::vector<SizeType> splits =
dlaf::util::interleaveSplits(sz.get<OC>(), subm_dist.blockSize().get<OC>(),
subm_dist.distanceToAdjacentTile<OC>(in_offset),
subm_dist.distanceToAdjacentTile<OC>(out_begin.get<OC>()));

return std::tuple(a, b, std::move(splits), std::move(recv_counts), std::move(index_tile_futs),
std::move(mat_in_tiles), std::move(mat_out_tiles));
return std::tuple(a, b, std::move(splits), std::move(index_tile_futs), std::move(mat_in_tiles),
std::move(mat_out_tiles));
};

auto permutations_unpack_f = [subm_dist](const auto i_perm, const auto& args) {
auto& [a, b, splits, recv_counts, index_tile_futs, mat_in_tiles, mat_out_tiles] = args;
auto& [a, b, splits, index_tile_futs, mat_in_tiles, mat_out_tiles] = args;
const SizeType* perm_arr = index_tile_futs[0].get().ptr();

// [0, a) and [b, end)
if (perm_arr[i_perm] < a || b <= perm_arr[i_perm]) {
applyPermutationOnCPU<T, C>(i_perm, splits, {0, 0}, 0, subm_dist, perm_arr, mat_in_tiles,
mat_out_tiles);
const SizeType in_offset = 0;
const GlobalElementIndex out_begin{0, 0};
applyPermutationOnCPU<T, C>(i_perm, splits, out_begin, in_offset, subm_dist, perm_arr,
mat_in_tiles, mat_out_tiles);
}
};

Expand Down

0 comments on commit 68e3064

Please sign in to comment.