Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add concepts and concrete types for BlockZipperJoinImpl #1625

Merged
merged 5 commits into from
Nov 29, 2024
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 63 additions & 37 deletions src/util/JoinAlgorithms/JoinAlgorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "util/JoinAlgorithms/FindUndefRanges.h"
#include "util/JoinAlgorithms/JoinColumnMapping.h"
#include "util/TransparentFunctors.h"
#include "util/TypeTraits.h"

namespace ad_utility {

Expand Down Expand Up @@ -770,9 +771,10 @@ struct BlockZipperJoinImpl {
// Recompute the `currentEl`. It is the minimum of the last element in the
// first block of either of the join sides.
ProjectedEl getCurrentEl() {
auto getFirst = [](const auto& side) {
return side.projection_(side.currentBlocks_.front().back());
};
auto getFirst =
[](const ad_utility::SameAsAny<LeftSide, RightSide> auto& side) {
return side.projection_(side.currentBlocks_.front().back());
};
return std::min(getFirst(leftSide_), getFirst(rightSide_), lessThan_);
}

Expand All @@ -784,7 +786,9 @@ struct BlockZipperJoinImpl {
// blocks that contain elements <= `currentEl` have been added, and `false` if
// the function returned because 3 blocks were added without fulfilling the
// condition.
bool fillEqualToCurrentEl(auto& side, const auto& currentEl) {
bool fillEqualToCurrentEl(
ad_utility::SameAsAny<LeftSide, RightSide> auto& side,
const ProjectedEl& currentEl) {
auto& it = side.it_;
auto& end = side.end_;
for (size_t numBlocksRead = 0; it != end && numBlocksRead < 3;
Expand All @@ -808,7 +812,7 @@ struct BlockZipperJoinImpl {
// sides contain all the relevant blocks. Only filling one side is used for
// the optimization for the Cartesian product described in the documentation.
enum struct BlockStatus { leftMissing, rightMissing, allFilled };
BlockStatus fillEqualToCurrentElBothSides(const auto& currentEl) {
BlockStatus fillEqualToCurrentElBothSides(const ProjectedEl& currentEl) {
bool allBlocksFromLeft = false;
bool allBlocksFromRight = false;
while (!(allBlocksFromLeft || allBlocksFromRight)) {
Expand All @@ -828,14 +832,15 @@ struct BlockZipperJoinImpl {
// `rightSide_.currentBlocks`) s.t. only elements `> lastProcessedElement`
// remain. This effectively removes all blocks completely, except maybe the
// last one.
template <typename Blocks, typename ProjectedEl>
void removeEqualToCurrentEl(Blocks& blocks,
ProjectedEl lastProcessedElement) {
void removeEqualToCurrentEl(
ad_utility::SameAsAny<typename LeftSide::CurrentBlocks,
typename RightSide::CurrentBlocks> auto& blocks,
const ProjectedEl& lastProcessedElement) {
// Erase all but the last block.
AD_CORRECTNESS_CHECK(!blocks.empty());
if (blocks.size() > 1 && !blocks.front().empty()) {
AD_CORRECTNESS_CHECK(!lessThan_(lastProcessedElement,
std::as_const(blocks.front()).back()));
AD_CORRECTNESS_CHECK(
!lessThan_(lastProcessedElement, blocks.front().back()));
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here as_const is dropped. This presumably works because of recent fixes to the IdTable class.

}
blocks.erase(blocks.begin(), blocks.end() - 1);

Expand All @@ -854,15 +859,19 @@ struct BlockZipperJoinImpl {
// * A reference to the first full block
// * The currently active subrange of that block
// * An iterator pointing to the first element ` >= currentEl` in the block.
auto getFirstBlock(auto& currentBlocks, const auto& currentEl) {
auto getFirstBlock(ad_utility::SameAsAny<
typename LeftSide::CurrentBlocks,
typename RightSide::CurrentBlocks> auto& currentBlocks,
const ProjectedEl& currentEl) {
AD_CORRECTNESS_CHECK(!currentBlocks.empty());
const auto& first = currentBlocks.at(0);
auto it = std::ranges::lower_bound(first.subrange(), currentEl, lessThan_);
return std::tuple{std::ref(first.fullBlock()), first.subrange(), it};
}

// Check if a side contains undefined values.
static bool hasUndef(const auto& side) {
static bool hasUndef(
const ad_utility::SameAsAny<LeftSide, RightSide> auto& side) {
if constexpr (potentiallyHasUndef) {
return !side.undefBlocks_.empty();
}
Expand All @@ -871,7 +880,8 @@ struct BlockZipperJoinImpl {

// Combine all elements from all blocks on the left with all elements from all
// blocks on the right and add them to the result.
void addCartesianProduct(const auto& blocksLeft, const auto& blocksRight) {
void addCartesianProduct(const LeftSide::CurrentBlocks& blocksLeft,
const RightSide::CurrentBlocks& blocksRight) {
// TODO<C++23> use `std::views::cartesian_product`.
for (const auto& lBlock : blocksLeft) {
for (const auto& rBlock : blocksRight) {
Expand All @@ -888,8 +898,9 @@ struct BlockZipperJoinImpl {
// Handle non-matching rows from the left side for an optional join or a minus
// join.
template <bool DoOptionalJoin>
void addNonMatchingRowsFromLeftForOptionalJoin(const auto& blocksLeft,
const auto& blocksRight) {
void addNonMatchingRowsFromLeftForOptionalJoin(
const LeftSide::CurrentBlocks& blocksLeft,
const RightSide::CurrentBlocks& blocksRight) {
if constexpr (DoOptionalJoin) {
if (!hasUndef(rightSide_) &&
std::ranges::all_of(
Expand All @@ -910,7 +921,8 @@ struct BlockZipperJoinImpl {
// Call `compatibleRowAction` for all pairs of elements in the Cartesian
// product of the blocks in `blocksLeft` and `blocksRight`.
template <bool DoOptionalJoin>
void addAll(const auto& blocksLeft, const auto& blocksRight) {
void addAll(const LeftSide::CurrentBlocks& blocksLeft,
const RightSide::CurrentBlocks& blocksRight) {
addNonMatchingRowsFromLeftForOptionalJoin<DoOptionalJoin>(blocksLeft,
blocksRight);
addCartesianProduct(blocksLeft, blocksRight);
Expand All @@ -921,7 +933,10 @@ struct BlockZipperJoinImpl {
// `currentEl`. Effectively, these subranges cover all the blocks completely
// except maybe the last one, which might contain elements `> currentEl` at
// the end.
auto getEqualToCurrentEl(const auto& blocks, const auto& currentEl) {
auto getEqualToCurrentEl(const ad_utility::SameAsAny<
typename LeftSide::CurrentBlocks,
typename RightSide::CurrentBlocks> auto& blocks,
const ProjectedEl& currentEl) {
auto result = blocks;
if (result.empty()) {
return result;
Expand Down Expand Up @@ -990,9 +1005,10 @@ struct BlockZipperJoinImpl {
// The fully joined parts of the block are then removed from
// `currentBlocksLeft/Right`, as they are not needed anymore.
template <bool DoOptionalJoin>
void joinAndRemoveLessThanCurrentEl(auto& currentBlocksLeft,
auto& currentBlocksRight,
const auto& currentEl) {
void joinAndRemoveLessThanCurrentEl(
LeftSide::CurrentBlocks& currentBlocksLeft,
RightSide::CurrentBlocks& currentBlocksRight,
const ProjectedEl& currentEl) {
// Get the first blocks.
auto [fullBlockLeft, subrangeLeft, currentElItL] =
getFirstBlock(currentBlocksLeft, currentEl);
Expand Down Expand Up @@ -1045,7 +1061,8 @@ struct BlockZipperJoinImpl {

// If the `targetBuffer` is empty, read the next nonempty block from `[it,
// end)` if there is one.
void fillWithAtLeastOne(auto& side) {
void fillWithAtLeastOne(
ad_utility::SameAsAny<LeftSide, RightSide> auto& side) {
auto& targetBuffer = side.currentBlocks_;
auto& it = side.it_;
const auto& end = side.end_;
Expand Down Expand Up @@ -1086,8 +1103,9 @@ struct BlockZipperJoinImpl {
// Based on `blockStatus` add the Cartesian product of the blocks in
// `leftBlocks` and/or `rightBlocks` with their respective counterpart in
// `undefBlocks_`.
void joinWithUndefBlocks(BlockStatus blockStatus, const auto& leftBlocks,
const auto& rightBlocks) {
void joinWithUndefBlocks(BlockStatus blockStatus,
const LeftSide::CurrentBlocks& leftBlocks,
const RightSide::CurrentBlocks& rightBlocks) {
if (blockStatus == BlockStatus::allFilled ||
blockStatus == BlockStatus::leftMissing) {
addCartesianProduct(leftBlocks, rightSide_.undefBlocks_);
Expand Down Expand Up @@ -1117,18 +1135,23 @@ struct BlockZipperJoinImpl {
auto equalToCurrentElRight =
getEqualToCurrentEl(currentBlocksRight, currentEl);

auto getNextBlocks = [&currentEl, self = this, &blockStatus](auto& target,
auto& side) {
self->removeEqualToCurrentEl(side.currentBlocks_, currentEl);
bool allBlocksWereFilled = self->fillEqualToCurrentEl(side, currentEl);
if (side.currentBlocks_.empty()) {
AD_CORRECTNESS_CHECK(allBlocksWereFilled);
}
target = self->getEqualToCurrentEl(side.currentBlocks_, currentEl);
if (allBlocksWereFilled) {
blockStatus = BlockStatus::allFilled;
}
};
auto getNextBlocks =
[this, &currentEl, &blockStatus](
ad_utility::SameAsAny<typename LeftSide::CurrentBlocks,
typename RightSide::CurrentBlocks> auto&
target,
ad_utility::SameAsAny<LeftSide, RightSide> auto& side) {
// Explicit this to avoid false positive warning in clang.
this->removeEqualToCurrentEl(side.currentBlocks_, currentEl);
bool allBlocksWereFilled = fillEqualToCurrentEl(side, currentEl);
if (side.currentBlocks_.empty()) {
AD_CORRECTNESS_CHECK(allBlocksWereFilled);
}
target = getEqualToCurrentEl(side.currentBlocks_, currentEl);
if (allBlocksWereFilled) {
blockStatus = BlockStatus::allFilled;
}
};
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, self is swapped with this


// We are only guaranteed to have all relevant blocks from one side, so we
// also need to pass through the remaining blocks from the other side.
Expand Down Expand Up @@ -1185,7 +1208,9 @@ struct BlockZipperJoinImpl {
// those blocks with the undef blocks from the other side.
// `reverse` is used to determine if the left or right side is consumed.
template <bool reversed>
void consumeRemainingBlocks(auto& side, const auto& undefBlocks) {
void consumeRemainingBlocks(
ad_utility::SameAsAny<LeftSide, RightSide> auto& side,
const auto& undefBlocks) {
while (side.it_ != side.end_) {
const auto& lBlock = *side.it_;
for (const auto& rBlock : undefBlocks) {
Expand Down Expand Up @@ -1228,7 +1253,8 @@ struct BlockZipperJoinImpl {
// `side.undefBlocks_` and skipped for subsequent processing. The first block
// containing defined values is split and the defined part is stored in
// `side.currentBlocks_`.
void findFirstBlockWithoutUndef(auto& side) {
void findFirstBlockWithoutUndef(
ad_utility::SameAsAny<LeftSide, RightSide> auto& side) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only concepts you are using are SameAsAny<LeftSide, RightSide> and SameAsAny<CurrentBlocksLeft, CurrentBlocksRight>.
Can we explore if we can remove some of the redundancy?
My first approach is: With concepts it doesn't work, they can't be local, you cant shorten their names, etc.

So a syntax could be to have a static constexpr bool and then write

template <typename T> requires LeftOrRightSide<T>

But that is not at all shorter....

If you want to....
Bad word Bad word Bad word...

** MACRO **

(don't forget to Undef it).

But maybe this all isn't worth it.

// The reference of `it` is there on purpose.
for (auto& it = side.it_; it != side.end_; ++it) {
auto& el = *it;
Expand Down
Loading