Skip to content

Commit

Permalink
Fix a bug in the cache key of DISTINCT (#1403)
Browse files Browse the repository at this point in the history
The cache key of a DISTINCT operation previously did not change when the subtree stayed the same, but the variables on which the DISTINCT was performed changed. For example, when first executing `SELECT DISTINCT ?x ?y WHERE { ?x wdt:P30 ?y }` and then `SELECT DISTINCT ?x WHERE { ?x wdt:P30 ? }` on Wikidata, the result for the second query would be mistakenly taken from the cache, although those queries have different results. This is now fixed by adding the column indices of the variables to the cache key.
  • Loading branch information
joka921 authored Jul 18, 2024
1 parent ee0e2ac commit 49e4134
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 18 deletions.
13 changes: 6 additions & 7 deletions src/engine/Distinct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,34 +14,33 @@ using std::endl;
using std::string;

// _____________________________________________________________________________
size_t Distinct::getResultWidth() const { return _subtree->getResultWidth(); }
size_t Distinct::getResultWidth() const { return subtree_->getResultWidth(); }

// _____________________________________________________________________________
Distinct::Distinct(QueryExecutionContext* qec,
std::shared_ptr<QueryExecutionTree> subtree,
const vector<ColumnIndex>& keepIndices)
: Operation(qec), _subtree(subtree), _keepIndices(keepIndices) {}
: Operation(qec), subtree_(std::move(subtree)), _keepIndices(keepIndices) {}

// _____________________________________________________________________________
string Distinct::getCacheKeyImpl() const {
std::ostringstream os;
os << "Distinct " << _subtree->getCacheKey();
return std::move(os).str();
return absl::StrCat("DISTINCT (", subtree_->getCacheKey(), ") (",
absl::StrJoin(_keepIndices, ","), ")");
}

// _____________________________________________________________________________
string Distinct::getDescriptor() const { return "Distinct"; }

// _____________________________________________________________________________
VariableToColumnMap Distinct::computeVariableToColumnMap() const {
return _subtree->getVariableColumns();
return subtree_->getVariableColumns();
}

// _____________________________________________________________________________
Result Distinct::computeResult([[maybe_unused]] bool requestLaziness) {
IdTable idTable{getExecutionContext()->getAllocator()};
LOG(DEBUG) << "Getting sub-result for distinct result computation..." << endl;
std::shared_ptr<const Result> subRes = _subtree->getResult();
std::shared_ptr<const Result> subRes = subtree_->getResult();

LOG(DEBUG) << "Distinct result computation..." << endl;
idTable.setNumColumns(subRes->idTable().numColumns());
Expand Down
21 changes: 10 additions & 11 deletions src/engine/Distinct.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ using std::vector;

class Distinct : public Operation {
private:
std::shared_ptr<QueryExecutionTree> _subtree;
std::shared_ptr<QueryExecutionTree> subtree_;
vector<ColumnIndex> _keepIndices;

public:
Expand All @@ -24,38 +24,37 @@ class Distinct : public Operation {

[[nodiscard]] size_t getResultWidth() const override;

public:
[[nodiscard]] string getDescriptor() const override;

[[nodiscard]] vector<ColumnIndex> resultSortedOn() const override {
return _subtree->resultSortedOn();
return subtree_->resultSortedOn();
}

private:
uint64_t getSizeEstimateBeforeLimit() override {
return _subtree->getSizeEstimate();
return subtree_->getSizeEstimate();
}

public:
virtual size_t getCostEstimate() override {
return getSizeEstimateBeforeLimit() + _subtree->getCostEstimate();
size_t getCostEstimate() override {
return getSizeEstimateBeforeLimit() + subtree_->getCostEstimate();
}

virtual float getMultiplicity(size_t col) override {
return _subtree->getMultiplicity(col);
float getMultiplicity(size_t col) override {
return subtree_->getMultiplicity(col);
}

bool knownEmptyResult() override { return _subtree->knownEmptyResult(); }
bool knownEmptyResult() override { return subtree_->knownEmptyResult(); }

vector<QueryExecutionTree*> getChildren() override {
return {_subtree.get()};
return {subtree_.get()};
}

protected:
[[nodiscard]] string getCacheKeyImpl() const override;

private:
virtual Result computeResult([[maybe_unused]] bool requestLaziness) override;
Result computeResult([[maybe_unused]] bool requestLaziness) override;

VariableToColumnMap computeVariableToColumnMap() const override;
};
1 change: 1 addition & 0 deletions test/engine/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ addLinkAndDiscoverTest(IndexScanTest engine)
addLinkAndDiscoverTest(CartesianProductJoinTest engine)
addLinkAndDiscoverTest(TextIndexScanForWordTest engine)
addLinkAndDiscoverTest(TextIndexScanForEntityTest engine)
addLinkAndDiscoverTest(DistinctTest engine)
26 changes: 26 additions & 0 deletions test/engine/DistinctTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright 2022, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Johannes Kalmbach <[email protected]>

#include <gmock/gmock.h>

#include "../util/IndexTestHelpers.h"
#include "engine/Distinct.h"
#include "engine/NeutralElementOperation.h"

TEST(Distinct, CacheKey) {
// The cache key has to change when the subtree changes or when the
// `keepIndices` (the distinct variables) change.
auto qec = ad_utility::testing::getQec();
auto d = ad_utility::makeExecutionTree<Distinct>(
ad_utility::testing::getQec(),
ad_utility::makeExecutionTree<NeutralElementOperation>(qec),
std::vector<ColumnIndex>{0, 1});
Distinct d2(ad_utility::testing::getQec(),
ad_utility::makeExecutionTree<NeutralElementOperation>(qec), {0});
Distinct d3(ad_utility::testing::getQec(), d, {0});

EXPECT_NE(d->getCacheKey(), d2.getCacheKey());
EXPECT_NE(d->getCacheKey(), d3.getCacheKey());
EXPECT_NE(d2.getCacheKey(), d3.getCacheKey());
}

0 comments on commit 49e4134

Please sign in to comment.