-
Notifications
You must be signed in to change notification settings - Fork 56
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add concepts and concrete types for BlockZipperJoinImpl
#1625
Changes from 2 commits
108d5ae
c75d100
37ae277
ec5737a
a9b7af7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ | |
#include "util/JoinAlgorithms/FindUndefRanges.h" | ||
#include "util/JoinAlgorithms/JoinColumnMapping.h" | ||
#include "util/TransparentFunctors.h" | ||
#include "util/TypeTraits.h" | ||
|
||
namespace ad_utility { | ||
|
||
|
@@ -770,9 +771,10 @@ struct BlockZipperJoinImpl { | |
// Recompute the `currentEl`. It is the minimum of the last element in the | ||
// first block of either of the join sides. | ||
ProjectedEl getCurrentEl() { | ||
auto getFirst = [](const auto& side) { | ||
return side.projection_(side.currentBlocks_.front().back()); | ||
}; | ||
auto getFirst = | ||
[](const ad_utility::SameAsAny<LeftSide, RightSide> auto& side) { | ||
return side.projection_(side.currentBlocks_.front().back()); | ||
}; | ||
return std::min(getFirst(leftSide_), getFirst(rightSide_), lessThan_); | ||
} | ||
|
||
|
@@ -784,7 +786,9 @@ struct BlockZipperJoinImpl { | |
// blocks that contain elements <= `currentEl` have been added, and `false` if | ||
// the function returned because 3 blocks were added without fulfilling the | ||
// condition. | ||
bool fillEqualToCurrentEl(auto& side, const auto& currentEl) { | ||
bool fillEqualToCurrentEl( | ||
ad_utility::SameAsAny<LeftSide, RightSide> auto& side, | ||
const ProjectedEl& currentEl) { | ||
auto& it = side.it_; | ||
auto& end = side.end_; | ||
for (size_t numBlocksRead = 0; it != end && numBlocksRead < 3; | ||
|
@@ -808,7 +812,7 @@ struct BlockZipperJoinImpl { | |
// sides contain all the relevant blocks. Only filling one side is used for | ||
// the optimization for the Cartesian product described in the documentation. | ||
enum struct BlockStatus { leftMissing, rightMissing, allFilled }; | ||
BlockStatus fillEqualToCurrentElBothSides(const auto& currentEl) { | ||
BlockStatus fillEqualToCurrentElBothSides(const ProjectedEl& currentEl) { | ||
bool allBlocksFromLeft = false; | ||
bool allBlocksFromRight = false; | ||
while (!(allBlocksFromLeft || allBlocksFromRight)) { | ||
|
@@ -828,14 +832,15 @@ struct BlockZipperJoinImpl { | |
// `rightSide_.currentBlocks`) s.t. only elements `> lastProcessedElement` | ||
// remain. This effectively removes all blocks completely, except maybe the | ||
// last one. | ||
template <typename Blocks, typename ProjectedEl> | ||
void removeEqualToCurrentEl(Blocks& blocks, | ||
ProjectedEl lastProcessedElement) { | ||
void removeEqualToCurrentEl( | ||
ad_utility::SameAsAny<typename LeftSide::CurrentBlocks, | ||
typename RightSide::CurrentBlocks> auto& blocks, | ||
const ProjectedEl& lastProcessedElement) { | ||
// Erase all but the last block. | ||
AD_CORRECTNESS_CHECK(!blocks.empty()); | ||
if (blocks.size() > 1 && !blocks.front().empty()) { | ||
AD_CORRECTNESS_CHECK(!lessThan_(lastProcessedElement, | ||
std::as_const(blocks.front()).back())); | ||
AD_CORRECTNESS_CHECK( | ||
!lessThan_(lastProcessedElement, blocks.front().back())); | ||
} | ||
blocks.erase(blocks.begin(), blocks.end() - 1); | ||
|
||
|
@@ -854,15 +859,19 @@ struct BlockZipperJoinImpl { | |
// * A reference to the first full block | ||
// * The currently active subrange of that block | ||
// * An iterator pointing to the first element ` >= currentEl` in the block. | ||
auto getFirstBlock(auto& currentBlocks, const auto& currentEl) { | ||
auto getFirstBlock(ad_utility::SameAsAny< | ||
typename LeftSide::CurrentBlocks, | ||
typename RightSide::CurrentBlocks> auto& currentBlocks, | ||
const ProjectedEl& currentEl) { | ||
AD_CORRECTNESS_CHECK(!currentBlocks.empty()); | ||
const auto& first = currentBlocks.at(0); | ||
auto it = std::ranges::lower_bound(first.subrange(), currentEl, lessThan_); | ||
return std::tuple{std::ref(first.fullBlock()), first.subrange(), it}; | ||
} | ||
|
||
// Check if a side contains undefined values. | ||
static bool hasUndef(const auto& side) { | ||
static bool hasUndef( | ||
const ad_utility::SameAsAny<LeftSide, RightSide> auto& side) { | ||
if constexpr (potentiallyHasUndef) { | ||
return !side.undefBlocks_.empty(); | ||
} | ||
|
@@ -871,7 +880,8 @@ struct BlockZipperJoinImpl { | |
|
||
// Combine all elements from all blocks on the left with all elements from all | ||
// blocks on the right and add them to the result. | ||
void addCartesianProduct(const auto& blocksLeft, const auto& blocksRight) { | ||
void addCartesianProduct(const LeftSide::CurrentBlocks& blocksLeft, | ||
const RightSide::CurrentBlocks& blocksRight) { | ||
// TODO<C++23> use `std::views::cartesian_product`. | ||
for (const auto& lBlock : blocksLeft) { | ||
for (const auto& rBlock : blocksRight) { | ||
|
@@ -888,8 +898,9 @@ struct BlockZipperJoinImpl { | |
// Handle non-matching rows from the left side for an optional join or a minus | ||
// join. | ||
template <bool DoOptionalJoin> | ||
void addNonMatchingRowsFromLeftForOptionalJoin(const auto& blocksLeft, | ||
const auto& blocksRight) { | ||
void addNonMatchingRowsFromLeftForOptionalJoin( | ||
const LeftSide::CurrentBlocks& blocksLeft, | ||
const RightSide::CurrentBlocks& blocksRight) { | ||
if constexpr (DoOptionalJoin) { | ||
if (!hasUndef(rightSide_) && | ||
std::ranges::all_of( | ||
|
@@ -910,7 +921,8 @@ struct BlockZipperJoinImpl { | |
// Call `compatibleRowAction` for all pairs of elements in the Cartesian | ||
// product of the blocks in `blocksLeft` and `blocksRight`. | ||
template <bool DoOptionalJoin> | ||
void addAll(const auto& blocksLeft, const auto& blocksRight) { | ||
void addAll(const LeftSide::CurrentBlocks& blocksLeft, | ||
const RightSide::CurrentBlocks& blocksRight) { | ||
addNonMatchingRowsFromLeftForOptionalJoin<DoOptionalJoin>(blocksLeft, | ||
blocksRight); | ||
addCartesianProduct(blocksLeft, blocksRight); | ||
|
@@ -921,7 +933,10 @@ struct BlockZipperJoinImpl { | |
// `currentEl`. Effectively, these subranges cover all the blocks completely | ||
// except maybe the last one, which might contain elements `> currentEl` at | ||
// the end. | ||
auto getEqualToCurrentEl(const auto& blocks, const auto& currentEl) { | ||
auto getEqualToCurrentEl(const ad_utility::SameAsAny< | ||
typename LeftSide::CurrentBlocks, | ||
typename RightSide::CurrentBlocks> auto& blocks, | ||
const ProjectedEl& currentEl) { | ||
auto result = blocks; | ||
if (result.empty()) { | ||
return result; | ||
|
@@ -990,9 +1005,10 @@ struct BlockZipperJoinImpl { | |
// The fully joined parts of the block are then removed from | ||
// `currentBlocksLeft/Right`, as they are not needed anymore. | ||
template <bool DoOptionalJoin> | ||
void joinAndRemoveLessThanCurrentEl(auto& currentBlocksLeft, | ||
auto& currentBlocksRight, | ||
const auto& currentEl) { | ||
void joinAndRemoveLessThanCurrentEl( | ||
LeftSide::CurrentBlocks& currentBlocksLeft, | ||
RightSide::CurrentBlocks& currentBlocksRight, | ||
const ProjectedEl& currentEl) { | ||
// Get the first blocks. | ||
auto [fullBlockLeft, subrangeLeft, currentElItL] = | ||
getFirstBlock(currentBlocksLeft, currentEl); | ||
|
@@ -1045,7 +1061,8 @@ struct BlockZipperJoinImpl { | |
|
||
// If the `targetBuffer` is empty, read the next nonempty block from `[it, | ||
// end)` if there is one. | ||
void fillWithAtLeastOne(auto& side) { | ||
void fillWithAtLeastOne( | ||
ad_utility::SameAsAny<LeftSide, RightSide> auto& side) { | ||
auto& targetBuffer = side.currentBlocks_; | ||
auto& it = side.it_; | ||
const auto& end = side.end_; | ||
|
@@ -1086,8 +1103,9 @@ struct BlockZipperJoinImpl { | |
// Based on `blockStatus` add the Cartesian product of the blocks in | ||
// `leftBlocks` and/or `rightBlocks` with their respective counterpart in | ||
// `undefBlocks_`. | ||
void joinWithUndefBlocks(BlockStatus blockStatus, const auto& leftBlocks, | ||
const auto& rightBlocks) { | ||
void joinWithUndefBlocks(BlockStatus blockStatus, | ||
const LeftSide::CurrentBlocks& leftBlocks, | ||
const RightSide::CurrentBlocks& rightBlocks) { | ||
if (blockStatus == BlockStatus::allFilled || | ||
blockStatus == BlockStatus::leftMissing) { | ||
addCartesianProduct(leftBlocks, rightSide_.undefBlocks_); | ||
|
@@ -1117,18 +1135,23 @@ struct BlockZipperJoinImpl { | |
auto equalToCurrentElRight = | ||
getEqualToCurrentEl(currentBlocksRight, currentEl); | ||
|
||
auto getNextBlocks = [¤tEl, self = this, &blockStatus](auto& target, | ||
auto& side) { | ||
self->removeEqualToCurrentEl(side.currentBlocks_, currentEl); | ||
bool allBlocksWereFilled = self->fillEqualToCurrentEl(side, currentEl); | ||
if (side.currentBlocks_.empty()) { | ||
AD_CORRECTNESS_CHECK(allBlocksWereFilled); | ||
} | ||
target = self->getEqualToCurrentEl(side.currentBlocks_, currentEl); | ||
if (allBlocksWereFilled) { | ||
blockStatus = BlockStatus::allFilled; | ||
} | ||
}; | ||
auto getNextBlocks = | ||
[this, ¤tEl, &blockStatus]( | ||
ad_utility::SameAsAny<typename LeftSide::CurrentBlocks, | ||
typename RightSide::CurrentBlocks> auto& | ||
target, | ||
ad_utility::SameAsAny<LeftSide, RightSide> auto& side) { | ||
// Explicit this to avoid false positive warning in clang. | ||
this->removeEqualToCurrentEl(side.currentBlocks_, currentEl); | ||
bool allBlocksWereFilled = fillEqualToCurrentEl(side, currentEl); | ||
if (side.currentBlocks_.empty()) { | ||
AD_CORRECTNESS_CHECK(allBlocksWereFilled); | ||
} | ||
target = getEqualToCurrentEl(side.currentBlocks_, currentEl); | ||
if (allBlocksWereFilled) { | ||
blockStatus = BlockStatus::allFilled; | ||
} | ||
}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here, |
||
|
||
// We are only guaranteed to have all relevant blocks from one side, so we | ||
// also need to pass through the remaining blocks from the other side. | ||
|
@@ -1185,7 +1208,9 @@ struct BlockZipperJoinImpl { | |
// those blocks with the undef blocks from the other side. | ||
// `reverse` is used to determine if the left or right side is consumed. | ||
template <bool reversed> | ||
void consumeRemainingBlocks(auto& side, const auto& undefBlocks) { | ||
void consumeRemainingBlocks( | ||
ad_utility::SameAsAny<LeftSide, RightSide> auto& side, | ||
const auto& undefBlocks) { | ||
while (side.it_ != side.end_) { | ||
const auto& lBlock = *side.it_; | ||
for (const auto& rBlock : undefBlocks) { | ||
|
@@ -1228,7 +1253,8 @@ struct BlockZipperJoinImpl { | |
// `side.undefBlocks_` and skipped for subsequent processing. The first block | ||
// containing defined values is split and the defined part is stored in | ||
// `side.currentBlocks_`. | ||
void findFirstBlockWithoutUndef(auto& side) { | ||
void findFirstBlockWithoutUndef( | ||
ad_utility::SameAsAny<LeftSide, RightSide> auto& side) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The only concepts you are using are So a syntax could be to have a
But that is not at all shorter.... If you want to.... ** MACRO ** (don't forget to Undef it). But maybe this all isn't worth it. |
||
// The reference of `it` is there on purpose. | ||
for (auto& it = side.it_; it != side.end_; ++it) { | ||
auto& el = *it; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here
as_const
is dropped. This presumably works because of recent fixes to theIdTable
class.