Skip to content

Commit

Permalink
changes w.r.t. comments from #1573
Browse files Browse the repository at this point in the history
  • Loading branch information
realHannes committed Nov 18, 2024
1 parent 96ded86 commit 45cc1b4
Show file tree
Hide file tree
Showing 11 changed files with 185 additions and 90 deletions.
26 changes: 19 additions & 7 deletions src/engine/Filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Filter::Filter(QueryExecutionContext* qec,
: Operation(qec),
_subtree(std::move(subtree)),
_expression{std::move(expression)} {
setPrefilterExpressionForIndexScanChildren();
setPrefilterExpressionForDirectIndexScanChild();
}

// _____________________________________________________________________________
Expand All @@ -45,14 +45,26 @@ string Filter::getDescriptor() const {
}

//______________________________________________________________________________
void Filter::setPrefilterExpressionForIndexScanChildren() {
const std::vector<PrefilterVariablePair>& prefilterVec =
void Filter::setPrefilterExpressionForDirectIndexScanChild() {
std::vector<PrefilterVariablePair> prefilterPairs =
_expression.getPrefilterExpressionForMetadata();
this->forAllDescendants([&prefilterVec](const QueryExecutionTree* ptr) {
if (ptr) {
ptr->setPrefilterExpression(prefilterVec);
std::vector<PrefilterVariablePair> relevantPairs;
relevantPairs.reserve(prefilterPairs.size());
VariableToColumnMap varToColMap = _subtree->getVariableColumns();
// Add all the PrefilterVariable values whose Variable value is
// contained in the VariableToColumnMap. This is done to avoid that certain
// subqueries filter out too much.
for (auto& prefilterPair : prefilterPairs) {
if (varToColMap.find(prefilterPair.second) != varToColMap.end()) {
relevantPairs.emplace_back(std::move(prefilterPair));
}
});
}
auto optNewSubTree =
_subtree->getRootOperation()->setPrefilterExprGetUpdatedQetPtr(
std::move(relevantPairs));
if (optNewSubTree.has_value()) {
_subtree = std::move(optNewSubTree.value());
}

Check warning on line 67 in src/engine/Filter.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/Filter.cpp#L66-L67

Added lines #L66 - L67 were not covered by tests
}

// _____________________________________________________________________________
Expand Down
6 changes: 5 additions & 1 deletion src/engine/Filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ class Filter : public Operation {
return _subtree->getVariableColumns();
}

void setPrefilterExpressionForIndexScanChildren();
// This method is directly called by the constructor.
// It sets the appropriate `<PrefilterExpression, Variable>` pair for each
// `IndexScan` child by invoking `setPrefilterExpression` on all descendants
// in the `QueryExecutionTree`.
void setPrefilterExpressionForDirectIndexScanChild();

ProtoResult computeResult(bool requestLaziness) override;

Expand Down
94 changes: 67 additions & 27 deletions src/engine/IndexScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ using std::string;
// _____________________________________________________________________________
IndexScan::IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation,
const SparqlTripleSimple& triple, Graphs graphsToFilter,
PrefilterIndexPairs prefilters)
PrefilterIndexPair prefilter)
: Operation(qec),
permutation_(permutation),
subject_(triple.s_),
predicate_(triple.p_),
object_(triple.o_),
graphsToFilter_{std::move(graphsToFilter)},
prefilters_{std::move(prefilters)},
prefilter_{std::move(prefilter)},
numVariables_(static_cast<size_t>(subject_.isVariable()) +
static_cast<size_t>(predicate_.isVariable()) +
static_cast<size_t>(object_.isVariable())) {
Expand Down Expand Up @@ -54,9 +54,26 @@ IndexScan::IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation,
// _____________________________________________________________________________
IndexScan::IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation,
const SparqlTriple& triple, Graphs graphsToFilter,
PrefilterIndexPairs prefilters)
PrefilterIndexPair prefilter)
: IndexScan(qec, permutation, triple.getSimple(), std::move(graphsToFilter),
std::move(prefilters)) {}
std::move(prefilter)) {}

// _____________________________________________________________________________
IndexScan::IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation,
const TripleComponent& s, const TripleComponent& p,
const TripleComponent& o,
const std::vector<ColumnIndex>& additionalColumns,
const std::vector<Variable>& additionalVariables,
Graphs graphsToFilter, PrefilterIndexPair prefilter)
: Operation(qec),
permutation_(permutation),
subject_(s),
predicate_(p),
object_(o),
graphsToFilter_(std::move(graphsToFilter)),
prefilter_(std::move(prefilter)),
additionalColumns_(additionalColumns),
additionalVariables_(additionalVariables) {}

Check warning on line 76 in src/engine/IndexScan.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/IndexScan.cpp#L76

Added line #L76 was not covered by tests

// _____________________________________________________________________________
string IndexScan::getCacheKeyImpl() const {
Expand Down Expand Up @@ -119,21 +136,34 @@ vector<ColumnIndex> IndexScan::resultSortedOn() const {
}

// _____________________________________________________________________________
void IndexScan::setPrefilterExpression(
std::optional<std::shared_ptr<QueryExecutionTree>>
IndexScan::setPrefilterExprGetUpdatedQetPtr(
const std::vector<PrefilterVariablePair>& prefilterVariablePairs) {
const std::vector<ColumnIndex>& sortedColumns = resultSortedOn();
VariableToColumnMap varToColMap = computeVariableToColumnMap();
// The column index of the first sorted column.
const ColumnIndex sortedIdx = 0;
if (numVariables_ < 1) {
return std::nullopt;
}

Check warning on line 146 in src/engine/IndexScan.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/IndexScan.cpp#L145-L146

Added lines #L145 - L146 were not covered by tests

const auto addPrefilterIfSorted = [&](const PrefilterVariablePair& pair) {
const Variable& variable = pair.second;
if (varToColMap.find(variable) != varToColMap.end()) {
const ColumnIndex colIdx = varToColMap[variable].columnIndex_;
if (std::ranges::find(sortedColumns, colIdx) != sortedColumns.end()) {
prefilters_.push_back(std::make_pair(pair.first->clone(), colIdx));
}
VariableToColumnMap varToColMap = computeVariableToColumnMap();
// Search for a Variable key-value given the sortedIdx (index of first sorted
// column) in the VariableToColumnMap.
auto mapIt =
std::ranges::find_if(varToColMap, [&sortedIdx](const auto& keyValuePair) {
return keyValuePair.second.columnIndex_ == sortedIdx;
});
if (mapIt != varToColMap.end()) {
// Check if the previously found Variable (key-value from varToColMap)
// matches with a <PrefilterExpression, Variable> pair.
auto itPairs = std::ranges::find_if(
prefilterVariablePairs,
[&mapIt](const auto& pair) { return pair.second == mapIt->first; });
if (itPairs != prefilterVariablePairs.end()) {
return makeCopyWithAddedPrefilters(
std::make_pair(itPairs->first->clone(), sortedIdx));
}

Check warning on line 164 in src/engine/IndexScan.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/IndexScan.cpp#L162-L164

Added lines #L162 - L164 were not covered by tests
};
std::ranges::for_each(prefilterVariablePairs, addPrefilterIfSorted);
}
return std::nullopt;
}

// _____________________________________________________________________________
Expand All @@ -155,12 +185,23 @@ VariableToColumnMap IndexScan::computeVariableToColumnMap() const {
return variableToColumnMap;
}

//______________________________________________________________________________
std::shared_ptr<QueryExecutionTree> IndexScan::makeCopyWithAddedPrefilters(
PrefilterIndexPair prefilter) {
return ad_utility::makeExecutionTree<IndexScan>(
getExecutionContext(), permutation_, subject_, predicate_, object_,
additionalColumns_, additionalVariables_, std::move(graphsToFilter_),
std::move(prefilter));
}

Check warning on line 195 in src/engine/IndexScan.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/IndexScan.cpp#L190-L195

Added lines #L190 - L195 were not covered by tests

//______________________________________________________________________________
std::vector<CompressedBlockMetadata> IndexScan::applyFilterBlockMetadata(
std::vector<CompressedBlockMetadata>&& blocks) const {
std::ranges::for_each(prefilters_, [&blocks](const PrefilterIndexPair& pair) {
pair.first->evaluate(blocks, pair.second);
});
const std::vector<CompressedBlockMetadata> blocks) const {
if (prefilter_.has_value()) {
auto& prefilterIndexPair = prefilter_.value();
return prefilterExpressions::detail::evaluatePrefilterExpressionOnMetadata(
prefilterIndexPair.first->clone(), blocks, prefilterIndexPair.second);
}

Check warning on line 204 in src/engine/IndexScan.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/IndexScan.cpp#L201-L204

Added lines #L201 - L204 were not covered by tests
return blocks;
};

Expand All @@ -178,7 +219,7 @@ Result::Generator IndexScan::chunkedIndexScan() const {
}

// _____________________________________________________________________________
IdTable IndexScan::completeIndexScan() const {
IdTable IndexScan::materializedIndexScan() const {
// Get the blocks.
auto metadata = getMetadataForScan();
auto blockSpan =
Expand All @@ -189,7 +230,7 @@ IdTable IndexScan::completeIndexScan() const {
? std::optional{applyFilterBlockMetadata(
{blockSpan.begin(), blockSpan.end()})}
: std::nullopt;
// Create IdTable, fill it with content by performing scan().
// Create the IdTable and fill it with content by performing scan().
using enum Permutation::Enum;
IdTable idTable{getExecutionContext()->getAllocator()};
idTable.setNumColumns(numVariables_);
Expand All @@ -211,7 +252,7 @@ ProtoResult IndexScan::computeResult(bool requestLaziness) {
if (requestLaziness) {
return {chunkedIndexScan(), resultSortedOn()};
}
return {completeIndexScan(), getResultSortedOn(), LocalVocab{}};
return {materializedIndexScan(), getResultSortedOn(), LocalVocab{}};
}

// _____________________________________________________________________________
Expand Down Expand Up @@ -287,7 +328,7 @@ ScanSpecificationAsTripleComponent IndexScan::getScanSpecificationTc() const {

// _____________________________________________________________________________
Permutation::IdTableGenerator IndexScan::getLazyScan(
std::vector<CompressedBlockMetadata>&& blocks) const {
std::vector<CompressedBlockMetadata> blocks) const {
// If there is a LIMIT or OFFSET clause that constrains the scan
// (which can happen with an explicit subquery), we cannot use the prefiltered
// blocks, as we currently have no mechanism to include limits and offsets
Expand Down Expand Up @@ -355,8 +396,7 @@ IndexScan::lazyScanForJoinOfTwoScans(const IndexScan& s1, const IndexScan& s2) {
auto [blocks1, blocks2] = CompressedRelationReader::getBlocksForJoin(
metaBlocks1.value(), metaBlocks2.value());

std::array result{s1.getLazyScan(std::move(blocks1)),
s2.getLazyScan(std::move(blocks2))};
std::array result{s1.getLazyScan(blocks1), s2.getLazyScan(blocks2)};
result[0].details().numBlocksAll_ = metaBlocks1.value().blockMetadata_.size();
result[1].details().numBlocksAll_ = metaBlocks2.value().blockMetadata_.size();
return result;
Expand All @@ -377,7 +417,7 @@ Permutation::IdTableGenerator IndexScan::lazyScanForJoinOfColumnWithScan(
auto blocks = CompressedRelationReader::getBlocksForJoin(joinColumn,
metaBlocks1.value());

auto result = getLazyScan(std::move(blocks));
auto result = getLazyScan(blocks);
result.details().numBlocksAll_ = metaBlocks1.value().blockMetadata_.size();
return result;
}
40 changes: 25 additions & 15 deletions src/engine/IndexScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,18 @@ class SparqlTripleSimple;

class IndexScan final : public Operation {
using Graphs = ScanSpecificationAsTripleComponent::Graphs;
// Pair containing a `PrefilterExpression` with `ColumnIndex` (eval. index)
using PrefilterIndexPair =
std::pair<std::unique_ptr<prefilterExpressions::PrefilterExpression>,
ColumnIndex>;
// Vector with `PrefilterIndexPair` values.
using PrefilterIndexPairs = std::vector<PrefilterIndexPair>;
// Optional pair containing a `PrefilterExpression` with `ColumnIndex` (eval.
// index)
using PrefilterIndexPair = std::optional<std::pair<
std::unique_ptr<prefilterExpressions::PrefilterExpression>, ColumnIndex>>;

private:
Permutation::Enum permutation_;
TripleComponent subject_;
TripleComponent predicate_;
TripleComponent object_;
Graphs graphsToFilter_;
PrefilterIndexPairs prefilters_;
PrefilterIndexPair prefilter_;
size_t numVariables_;
size_t sizeEstimate_;
bool sizeEstimateIsExact_;
Expand All @@ -40,11 +38,18 @@ class IndexScan final : public Operation {
public:
IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation,
const SparqlTriple& triple, Graphs graphsToFilter = std::nullopt,
PrefilterIndexPairs prefilters = {});
PrefilterIndexPair prefilter = std::nullopt);
IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation,
const SparqlTripleSimple& triple,
Graphs graphsToFilter = std::nullopt,
PrefilterIndexPairs prefilters = {});
PrefilterIndexPair prefilter = std::nullopt);
// Constructor to simplify copy creation of an `IndexScan`.
IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation,
const TripleComponent& s, const TripleComponent& p,
const TripleComponent& o,
const std::vector<ColumnIndex>& additionalColumns,
const std::vector<Variable>& additionalVariables,
Graphs graphsToFilter, PrefilterIndexPair prefilter);

~IndexScan() override = default;

Expand All @@ -66,9 +71,11 @@ class IndexScan final : public Operation {

vector<ColumnIndex> resultSortedOn() const override;

// Set `PrefilterExpression`s.
void setPrefilterExpression(const std::vector<PrefilterVariablePair>&
prefilterVariablePairs) override;
// Set `PrefilterExpression`s and return updated `QueryExecutionTree` pointer
// if necessary.
std::optional<std::shared_ptr<QueryExecutionTree>>
setPrefilterExprGetUpdatedQetPtr(const std::vector<PrefilterVariablePair>&
prefilterVariablePairs) override;

size_t numVariables() const { return numVariables_; }

Expand Down Expand Up @@ -151,19 +158,22 @@ class IndexScan final : public Operation {

VariableToColumnMap computeVariableToColumnMap() const override;

std::shared_ptr<QueryExecutionTree> makeCopyWithAddedPrefilters(
PrefilterIndexPair prefilter);

// Filter relevant `CompressedBlockMetadata` blocks by applying the
// `PrefilterExpression`s from `prefilters_`.
std::vector<CompressedBlockMetadata> applyFilterBlockMetadata(
std::vector<CompressedBlockMetadata>&& blocks) const;
const std::vector<CompressedBlockMetadata> blocks) const;

// Return the (lazy) `IdTable` for this `IndexScan` in chunks.
Result::Generator chunkedIndexScan() const;
// Get the `IdTable` for this `IndexScan` in one piece.
IdTable completeIndexScan() const;
IdTable materializedIndexScan() const;

// Helper functions for the public `getLazyScanFor...` methods and
// `chunkedIndexScan` (see above).
Permutation::IdTableGenerator getLazyScan(
std::vector<CompressedBlockMetadata>&& blocks) const;
std::vector<CompressedBlockMetadata> blocks) const;
std::optional<Permutation::MetadataAndBlocks> getMetadataForScan() const;
};
18 changes: 6 additions & 12 deletions src/engine/Operation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,36 +12,30 @@ using namespace std::chrono_literals;

//______________________________________________________________________________
template <typename F>
void Operation::forAllDescendantsImpl(F&& f) {
void Operation::forAllDescendantsImpl(F f) {
static_assert(
std::is_same_v<void, std::invoke_result_t<F, QueryExecutionTree*>>);
for (auto ptr : getChildren()) {
if (ptr) {
std::forward<F>(f)(ptr);
ptr->forAllDescendants(std::forward<F>(f));
f(ptr);
ptr->forAllDescendants(f);
}
}
}

//______________________________________________________________________________
template <typename F>
void Operation::forAllDescendantsImpl(F&& f) const {
void Operation::forAllDescendantsImpl(F f) const {
static_assert(
std::is_same_v<void, std::invoke_result_t<F, const QueryExecutionTree*>>);
for (auto ptr : getChildren()) {
if (ptr) {
std::forward<F>(f)(ptr);
ptr->forAllDescendants(std::forward<F>(f));
f(ptr);
ptr->forAllDescendants(f);
}
}
}

// _____________________________________________________________________________
void Operation::forAllDescendants(
std::function<void(const QueryExecutionTree*)>&& func) {
forAllDescendantsImpl(std::move(func));
}

// _____________________________________________________________________________
vector<string> Operation::collectWarnings() const {
vector<string> res = getWarnings();
Expand Down
Loading

0 comments on commit 45cc1b4

Please sign in to comment.