Skip to content

Commit

Permalink
Fix lifetime issues and add documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
RobinTF committed Nov 18, 2024
1 parent 08be2d8 commit c9a5648
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 17 deletions.
28 changes: 15 additions & 13 deletions src/engine/Join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ ProtoResult Join::computeResult(bool requestLaziness) {
if (rightResIfCached && !leftResIfCached) {
AD_CORRECTNESS_CHECK(rightResIfCached->isFullyMaterialized());
return computeResultForIndexScanAndIdTable<true>(
requestLaziness, rightResIfCached->idTable(), _rightJoinCol,
requestLaziness, std::move(rightResIfCached), _rightJoinCol,
leftIndexScan, _leftJoinCol);

} else if (!leftResIfCached) {
Expand Down Expand Up @@ -172,10 +172,9 @@ ProtoResult Join::computeResult(bool requestLaziness) {
auto rightIndexScan =
std::dynamic_pointer_cast<IndexScan>(_right->getRootOperation());
if (rightIndexScan && !rightResIfCached && leftRes->isFullyMaterialized()) {
const auto& leftIdTable = leftRes->idTable();
return computeResultForIndexScanAndIdTable<false>(
requestLaziness, leftIdTable, _leftJoinCol, rightIndexScan,
_rightJoinCol, leftRes);
requestLaziness, std::move(leftRes), _leftJoinCol, rightIndexScan,
_rightJoinCol);
}

std::shared_ptr<const Result> rightRes =
Expand Down Expand Up @@ -758,15 +757,17 @@ ProtoResult Join::computeResultForTwoIndexScans(bool requestLaziness) const {
// ______________________________________________________________________________________________________
template <bool idTableIsRightInput>
ProtoResult Join::computeResultForIndexScanAndIdTable(
bool requestLaziness, const IdTable& idTable, ColumnIndex joinColTable,
std::shared_ptr<IndexScan> scan, ColumnIndex joinColScan,
const std::shared_ptr<const Result>& subResult) const {
return createResult(requestLaziness, [this, &idTable, joinColTable,
bool requestLaziness, std::shared_ptr<const Result> resultWithIdTable,
ColumnIndex joinColTable, std::shared_ptr<IndexScan> scan,
ColumnIndex joinColScan) const {
return createResult(requestLaziness, [this, joinColTable,
scan = std::move(scan), joinColScan,
subResult = std::move(subResult)](
resultWithIdTable =
std::move(resultWithIdTable)](
std::invocable<
Result::IdTableVocabPair> auto
yieldTable) {
const IdTable& idTable = resultWithIdTable->idTable();
// We first have to permute the columns.
auto [jcLeft, jcRight, numColsLeft, numColsRight] = [&]() {
return idTableIsRightInput
Expand All @@ -781,12 +782,13 @@ ProtoResult Join::computeResultForIndexScanAndIdTable(
ad_utility::AddCombinedRowToIdTable rowAdder{
1, IdTable{getResultWidth(), getExecutionContext()->getAllocator()},
cancellationHandle_, CHUNK_SIZE,
[&yieldTable, &joinColMap](IdTable& idTable, LocalVocab& localVocab) {
if (idTable.size() < CHUNK_SIZE) {
[&yieldTable, &joinColMap](IdTable& partialIdTable,
LocalVocab& localVocab) {
if (partialIdTable.size() < CHUNK_SIZE) {
return;
}
idTable.setColumnSubset(joinColMap.permutationResult());
yieldTable(Result::IdTableVocabPair{std::move(idTable),
partialIdTable.setColumnSubset(joinColMap.permutationResult());
yieldTable(Result::IdTableVocabPair{std::move(partialIdTable),
std::move(localVocab)});
}};

Expand Down
21 changes: 17 additions & 4 deletions src/engine/Join.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,26 @@ class Join : public Operation {
void join(const IdTable& a, ColumnIndex jc1, const IdTable& b,
ColumnIndex jc2, IdTable* result) const;

// action is a lambda with signature
// Helper function to compute the result of a join operation and conditionally
// return a lazy or fully materialized result depending on `requestLaziness`.
// This is achieved by running the `action` lambda in a separate thread and
// returning a lazy result that reads from the queue of the thread. If
// `requestLaziness` is false, the result is fully materialized and returned
// directly.
// `action` is a lambda with signature
// Result::IdTableVocabPair(void(Result::IdTableVocabPair))
ProtoResult createResult(bool requestedLaziness, auto action) const;

// Helper function that cheaply checks if a join could contain undefined. For
// fully materialized tables it can just look at the first element. For lazy
// tables it has to look at the meta information which could potentially
// indicate undefinedness even when all values are defined.
static bool couldContainUndef(const auto& blocks, const auto& tree,
ColumnIndex joinColumn);

// Fallback implementation of a join that is used when at least one of the two
// inputs is not fully materialized. This represents the general case where we
// don't have any optimization left to try.
ProtoResult lazyJoin(std::shared_ptr<const Result> a, ColumnIndex jc1,
std::shared_ptr<const Result> b, ColumnIndex jc2,
bool requestLaziness) const;
Expand Down Expand Up @@ -140,9 +153,9 @@ class Join : public Operation {
// determine the correct order of the columns in the result.
template <bool scanIsLeft>
ProtoResult computeResultForIndexScanAndIdTable(
bool requestLaziness, const IdTable& idTable, ColumnIndex joinColTable,
std::shared_ptr<IndexScan> scan, ColumnIndex joinColScan,
const std::shared_ptr<const Result>& subResult = nullptr) const;
bool requestLaziness, std::shared_ptr<const Result> resultWithIdTable,
ColumnIndex joinColTable, std::shared_ptr<IndexScan> scan,
ColumnIndex joinColScan) const;

/*
* @brief Combines 2 rows like in a join and inserts the result in the
Expand Down

0 comments on commit c9a5648

Please sign in to comment.