Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge located triples when performing index scans #1597

Merged
merged 19 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions src/engine/IndexScan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ IndexScan::IndexScan(QueryExecutionContext* qec, Permutation::Enum permutation,
additionalColumns_.push_back(idx);
additionalVariables_.push_back(variable);
}
sizeEstimate_ = computeSizeEstimate();
std::tie(sizeEstimateIsExact_, sizeEstimate_) = computeSizeEstimate();

// Check the following invariant: All the variables must be at the end of the
// permuted triple. For example in the PSO permutation, either only the O, or
Expand Down Expand Up @@ -171,7 +171,18 @@ ProtoResult IndexScan::computeResult(bool requestLaziness) {
}

// _____________________________________________________________________________
size_t IndexScan::computeSizeEstimate() const {
std::pair<bool, size_t> IndexScan::computeSizeEstimate() const {
AD_CORRECTNESS_CHECK(_executionContext);
auto [lower, upper] = getIndex()
.getImpl()
.getPermutation(permutation())
.getSizeEstimateForScan(getScanSpecification(),
locatedTriplesSnapshot());
return {lower == upper, std::midpoint(lower, upper)};
}

// _____________________________________________________________________________
size_t IndexScan::getExactSize() const {
AD_CORRECTNESS_CHECK(_executionContext);
return getIndex().getResultSizeOfScan(getScanSpecification(), permutation_,
locatedTriplesSnapshot());
Expand Down
17 changes: 12 additions & 5 deletions src/engine/IndexScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class IndexScan final : public Operation {
Graphs graphsToFilter_;
size_t numVariables_;
size_t sizeEstimate_;
bool sizeEstimateIsExact_;
vector<float> multiplicity_;

// Additional columns (e.g. patterns) that are being retrieved in addition to
Expand Down Expand Up @@ -59,7 +60,7 @@ class IndexScan final : public Operation {

// Return the exact result size of the index scan. This is always known as it
// can be read from the Metadata.
size_t getExactSize() const { return sizeEstimate_; }
size_t getExactSize() const;

// Return two generators that lazily yield the results of `s1` and `s2` in
// blocks, but only the blocks that can theoretically contain matching rows
Expand All @@ -78,7 +79,7 @@ class IndexScan final : public Operation {
private:
// TODO<joka921> Make the `getSizeEstimateBeforeLimit()` function `const` for
// ALL the `Operations`.
uint64_t getSizeEstimateBeforeLimit() override { return getExactSize(); }
uint64_t getSizeEstimateBeforeLimit() override { return sizeEstimate_; }

public:
size_t getCostEstimate() override;
Expand All @@ -93,7 +94,9 @@ class IndexScan final : public Operation {
return multiplicity_[col];
}

bool knownEmptyResult() override { return getExactSize() == 0; }
bool knownEmptyResult() override {
return sizeEstimateIsExact_ && sizeEstimate_ == 0;
}

bool isIndexScanWithNumVariables(size_t target) const override {
return numVariables() == target;
Expand All @@ -103,7 +106,7 @@ class IndexScan final : public Operation {
// size of wikidata, so we don't even need to try and waste performance.
bool unlikelyToFitInCache(
ad_utility::MemorySize maxCacheableSize) const override {
return ad_utility::MemorySize::bytes(getExactSize() * getResultWidth() *
return ad_utility::MemorySize::bytes(sizeEstimate_ * getResultWidth() *
sizeof(Id)) > maxCacheableSize;
}

Expand All @@ -124,7 +127,11 @@ class IndexScan final : public Operation {

vector<QueryExecutionTree*> getChildren() override { return {}; }

size_t computeSizeEstimate() const;
// Compute the size estimate of the index scan, taking delta triples (from
// the `queryExecutionContext_`) into account. The `bool` is true iff the
// estimate is exact. If not, the estimate is the mean of the lower and upper
// bound.
std::pair<bool, size_t> computeSizeEstimate() const;

std::string getCacheKeyImpl() const override;

Expand Down
Loading
Loading