Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow operations to not store their result in the cache #1665

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 33 additions & 6 deletions src/engine/Operation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,8 @@
auto maxSize =
std::min(RuntimeParameters().get<"lazy-result-max-cache-size">(),
cache.getMaxSizeSingleEntry());
if (!result.isFullyMaterialized() && !unlikelyToFitInCache(maxSize)) {
if (canResultBeCached() && !result.isFullyMaterialized() &&
!unlikelyToFitInCache(maxSize)) {
AD_CONTRACT_CHECK(!pinned);
result.cacheDuringConsumption(
[maxSize](
Expand Down Expand Up @@ -316,11 +317,16 @@

bool onlyReadFromCache = computationMode == ComputationMode::ONLY_IF_CACHED;

auto result =
pinResult ? cache.computeOncePinned(cacheKey, cacheSetup,
onlyReadFromCache, suitedForCache)
: cache.computeOnce(cacheKey, cacheSetup, onlyReadFromCache,
suitedForCache);
auto result = [&]() {
auto compute = [&](auto&&... args) {
if (!canResultBeCached()) {
return cache.computeButDontStore(AD_FWD(args)...);
}
return pinResult ? cache.computeOncePinned(AD_FWD(args)...)
: cache.computeOnce(AD_FWD(args)...);
};
return compute(cacheKey, cacheSetup, onlyReadFromCache, suitedForCache);
}();

if (result._resultPointer == nullptr) {
AD_CORRECTNESS_CHECK(onlyReadFromCache);
Expand Down Expand Up @@ -596,3 +602,24 @@
_executionContext->signalQueryUpdate(*_rootRuntimeInfo);
}
}

// _____________________________________________________________________________
std::string Operation::getCacheKey() const {
auto result = getCacheKeyImpl();
if (_limit._limit.has_value()) {
absl::StrAppend(&result, " LIMIT ", _limit._limit.value());
}
if (_limit._offset != 0) {
absl::StrAppend(&result, " OFFSET ", _limit._offset);
}
return result;
}

// _____________________________________________________________________________
uint64_t Operation::getSizeEstimate() {
if (_limit._limit.has_value()) {
return std::min(_limit._limit.value(), getSizeEstimateBeforeLimit());

Check warning on line 621 in src/engine/Operation.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/Operation.cpp#L621

Added line #L621 was not covered by tests
} else {
return getSizeEstimateBeforeLimit();
}
}
38 changes: 19 additions & 19 deletions src/engine/Operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ class Operation {
// limit/offset is applied post computation.
bool externalLimitApplied_ = false;

// See the documentation of the getter function below.
bool canResultBeCached_ = true;

public:
// Holds a `PrefilterExpression` with its corresponding `Variable`.
using PrefilterVariablePair = sparqlExpression::PrefilterExprVariablePair;
Expand Down Expand Up @@ -162,20 +165,23 @@ class Operation {
// Get a unique, not ambiguous string representation for a subtree.
// This should act like an ID for each subtree.
// Calls `getCacheKeyImpl` and adds the information about the `LIMIT` clause.
virtual string getCacheKey() const final {
auto result = getCacheKeyImpl();
if (_limit._limit.has_value()) {
absl::StrAppend(&result, " LIMIT ", _limit._limit.value());
}
if (_limit._offset != 0) {
absl::StrAppend(&result, " OFFSET ", _limit._offset);
}
return result;
}
virtual std::string getCacheKey() const final;

// If this function returns `false`, then the result of this `Operation` will
// never be stored in the cache. It might however be read from the cache.
// This can be used, if the operation actually only returns a subset of the
// actual result because it has been constrained by a parent operation (e.g.
// an IndexScan that has been prefiltered by another operation which it is
// joined with).
virtual bool canResultBeCached() const { return canResultBeCached_; }

// After calling this function, `canResultBeCached()` will return `false` (see
// above for details).
virtual void disableStoringInCache() final { canResultBeCached_ = false; }

private:
// The individual implementation of `getCacheKey` (see above) that has to be
// customized by every child class.
// The individual implementation of `getCacheKey` (see above) that has to
// be customized by every child class.
virtual string getCacheKeyImpl() const = 0;

public:
Expand All @@ -186,13 +192,7 @@ class Operation {

virtual size_t getCostEstimate() = 0;

virtual uint64_t getSizeEstimate() final {
if (_limit._limit.has_value()) {
return std::min(_limit._limit.value(), getSizeEstimateBeforeLimit());
} else {
return getSizeEstimateBeforeLimit();
}
}
virtual uint64_t getSizeEstimate() final;

private:
virtual uint64_t getSizeEstimateBeforeLimit() = 0;
Expand Down
22 changes: 22 additions & 0 deletions src/util/ConcurrentCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,28 @@ class ConcurrentCache {
suitedForCache);
}

// If the result is contained in the cache, read and return it. Otherwise,
// compute it, but do not store it in the cache. The interface is the same as
// for the above two functions, therefore some of the arguments are unused.
ResultAndCacheStatus computeButDontStore(
const Key& key,
const InvocableWithConvertibleReturnType<Value> auto& computeFunction,
bool onlyReadFromCache,
[[maybe_unused]] const InvocableWithConvertibleReturnType<
bool, const Value&> auto& suitedForCache) {
{
auto resultPtr = _cacheAndInProgressMap.wlock()->_cache[key];
if (resultPtr != nullptr) {
return {std::move(resultPtr), CacheStatus::cachedNotPinned};
}
}
if (onlyReadFromCache) {
return {nullptr, CacheStatus::notInCacheAndNotComputed};
}
auto value = std::make_shared<Value>(computeFunction());
return {std::move(value), CacheStatus::computed};
}

// Insert `value` into the cache, if the `key` is not already present. In case
// `pinned` is true and the key is already present, the existing value is
// pinned in case it is not pinned yet.
Expand Down
36 changes: 36 additions & 0 deletions test/ConcurrentCacheTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,3 +530,39 @@ TEST(ConcurrentCache, testTryInsertIfNotPresentDoesWorkCorrectly) {

expectContainsSingleElementAtKey0(true, "jkl");
}

TEST(ConcurrentCache, computeButDontStore) {
SimpleConcurrentLruCache cache{};

// The last argument of `computeOnce...`: For the sake of this test, all
// results are suitable for the cache. Note: In the `computeButDontStore`
// function this argument is ignored, because the results are never stored in
// the cache.
auto alwaysSuitable = [](auto&&) { return true; };
// Store the element in the cache.
cache.computeOnce(
42, []() { return "42"; }, false, alwaysSuitable);

// The result is read from the cache, so we get "42", not "blubb".
auto res = cache.computeButDontStore(
42, []() { return "blubb"; }, false, alwaysSuitable);
EXPECT_EQ(*res._resultPointer, "42");

// The same with `onlyReadFromCache` == true;
res = cache.computeButDontStore(
42, []() { return "blubb"; }, true, alwaysSuitable);
EXPECT_EQ(*res._resultPointer, "42");

cache.clearAll();

// Compute, but don't store.
res = cache.computeButDontStore(
42, []() { return "blubb"; }, false, alwaysSuitable);
EXPECT_EQ(*res._resultPointer, "blubb");

// Nothing is stored in the cache, so we cannot read it.
EXPECT_FALSE(cache.getIfContained(42).has_value());
res = cache.computeButDontStore(
42, []() { return "blubb"; }, true, alwaysSuitable);
EXPECT_EQ(res._resultPointer, nullptr);
}
37 changes: 37 additions & 0 deletions test/OperationTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -653,3 +653,40 @@ TEST(Operation, checkLazyOperationIsNotCachedIfUnlikelyToFitInCache) {
EXPECT_FALSE(
qec->getQueryTreeCache().cacheContains(makeQueryCacheKey("test")));
}

TEST(OperationTest, disableCaching) {
auto qec = getQec();
qec->getQueryTreeCache().clearAll();
std::vector<IdTable> idTablesVector{};
idTablesVector.push_back(makeIdTableFromVector({{3, 4}}));
idTablesVector.push_back(makeIdTableFromVector({{7, 8}, {9, 123}}));
ValuesForTesting valuesForTesting{
qec, std::move(idTablesVector), {Variable{"?x"}, Variable{"?y"}}, true};

QueryCacheKey cacheKey{valuesForTesting.getCacheKey(),
qec->locatedTriplesSnapshot().index_};

// By default, the result of `valuesForTesting` is cached because it is
// sufficiently small, no matter if it was computed lazily or fully
// materialized.
EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey));
valuesForTesting.getResult(true);
EXPECT_TRUE(qec->getQueryTreeCache().cacheContains(cacheKey));
qec->getQueryTreeCache().clearAll();
EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey));
valuesForTesting.getResult(false);
EXPECT_TRUE(qec->getQueryTreeCache().cacheContains(cacheKey));

// We now disable caching for the `valuesForTesting`. Then the result is never
// cached, no matter if it is computed lazily or fully materialized.
valuesForTesting.disableStoringInCache();
qec->getQueryTreeCache().clearAll();

EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey));
valuesForTesting.getResult(true);
EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey));
qec->getQueryTreeCache().clearAll();
EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey));
valuesForTesting.getResult(false);
EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey));
}
Loading