Skip to content

Commit

Permalink
Allow operations to not store their result in the cache (#1665)
Browse files Browse the repository at this point in the history
Each operation now has a `bool` that determines whether the results can be stored in the cache or not (whether it is actually stored depends on other circumstances, like the available cache size). That `bool` does not have to be fixed when the operation is created, but can be changed.

For example, this is useful for index scans that only return a subset of their full result (because of another constraining operation, like a join or a filter).
  • Loading branch information
joka921 authored Dec 12, 2024
1 parent 4a15994 commit 70964d6
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 25 deletions.
39 changes: 33 additions & 6 deletions src/engine/Operation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,8 @@ CacheValue Operation::runComputationAndPrepareForCache(
auto maxSize =
std::min(RuntimeParameters().get<"lazy-result-max-cache-size">(),
cache.getMaxSizeSingleEntry());
if (!result.isFullyMaterialized() && !unlikelyToFitInCache(maxSize)) {
if (canResultBeCached() && !result.isFullyMaterialized() &&
!unlikelyToFitInCache(maxSize)) {
AD_CONTRACT_CHECK(!pinned);
result.cacheDuringConsumption(
[maxSize](
Expand Down Expand Up @@ -316,11 +317,16 @@ std::shared_ptr<const Result> Operation::getResult(

bool onlyReadFromCache = computationMode == ComputationMode::ONLY_IF_CACHED;

auto result =
pinResult ? cache.computeOncePinned(cacheKey, cacheSetup,
onlyReadFromCache, suitedForCache)
: cache.computeOnce(cacheKey, cacheSetup, onlyReadFromCache,
suitedForCache);
auto result = [&]() {
auto compute = [&](auto&&... args) {
if (!canResultBeCached()) {
return cache.computeButDontStore(AD_FWD(args)...);
}
return pinResult ? cache.computeOncePinned(AD_FWD(args)...)
: cache.computeOnce(AD_FWD(args)...);
};
return compute(cacheKey, cacheSetup, onlyReadFromCache, suitedForCache);
}();

if (result._resultPointer == nullptr) {
AD_CORRECTNESS_CHECK(onlyReadFromCache);
Expand Down Expand Up @@ -596,3 +602,24 @@ void Operation::signalQueryUpdate() const {
_executionContext->signalQueryUpdate(*_rootRuntimeInfo);
}
}

// _____________________________________________________________________________
std::string Operation::getCacheKey() const {
auto result = getCacheKeyImpl();
if (_limit._limit.has_value()) {
absl::StrAppend(&result, " LIMIT ", _limit._limit.value());
}
if (_limit._offset != 0) {
absl::StrAppend(&result, " OFFSET ", _limit._offset);
}
return result;
}

// _____________________________________________________________________________
uint64_t Operation::getSizeEstimate() {
if (_limit._limit.has_value()) {
return std::min(_limit._limit.value(), getSizeEstimateBeforeLimit());
} else {
return getSizeEstimateBeforeLimit();
}
}
38 changes: 19 additions & 19 deletions src/engine/Operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ class Operation {
// limit/offset is applied post computation.
bool externalLimitApplied_ = false;

// See the documentation of the getter function below.
bool canResultBeCached_ = true;

public:
// Holds a `PrefilterExpression` with its corresponding `Variable`.
using PrefilterVariablePair = sparqlExpression::PrefilterExprVariablePair;
Expand Down Expand Up @@ -162,20 +165,23 @@ class Operation {
// Get a unique, not ambiguous string representation for a subtree.
// This should act like an ID for each subtree.
// Calls `getCacheKeyImpl` and adds the information about the `LIMIT` clause.
virtual string getCacheKey() const final {
auto result = getCacheKeyImpl();
if (_limit._limit.has_value()) {
absl::StrAppend(&result, " LIMIT ", _limit._limit.value());
}
if (_limit._offset != 0) {
absl::StrAppend(&result, " OFFSET ", _limit._offset);
}
return result;
}
virtual std::string getCacheKey() const final;

// If this function returns `false`, then the result of this `Operation` will
// never be stored in the cache. It might however be read from the cache.
// This can be used, if the operation actually only returns a subset of the
// actual result because it has been constrained by a parent operation (e.g.
// an IndexScan that has been prefiltered by another operation which it is
// joined with).
virtual bool canResultBeCached() const { return canResultBeCached_; }

// After calling this function, `canResultBeCached()` will return `false` (see
// above for details).
virtual void disableStoringInCache() final { canResultBeCached_ = false; }

private:
// The individual implementation of `getCacheKey` (see above) that has to be
// customized by every child class.
// The individual implementation of `getCacheKey` (see above) that has to
// be customized by every child class.
virtual string getCacheKeyImpl() const = 0;

public:
Expand All @@ -186,13 +192,7 @@ class Operation {

virtual size_t getCostEstimate() = 0;

virtual uint64_t getSizeEstimate() final {
if (_limit._limit.has_value()) {
return std::min(_limit._limit.value(), getSizeEstimateBeforeLimit());
} else {
return getSizeEstimateBeforeLimit();
}
}
virtual uint64_t getSizeEstimate() final;

private:
virtual uint64_t getSizeEstimateBeforeLimit() = 0;
Expand Down
22 changes: 22 additions & 0 deletions src/util/ConcurrentCache.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,28 @@ class ConcurrentCache {
suitedForCache);
}

// If the result is contained in the cache, read and return it. Otherwise,
// compute it, but do not store it in the cache. The interface is the same as
// for the above two functions, therefore some of the arguments are unused.
ResultAndCacheStatus computeButDontStore(
const Key& key,
const InvocableWithConvertibleReturnType<Value> auto& computeFunction,
bool onlyReadFromCache,
[[maybe_unused]] const InvocableWithConvertibleReturnType<
bool, const Value&> auto& suitedForCache) {
{
auto resultPtr = _cacheAndInProgressMap.wlock()->_cache[key];
if (resultPtr != nullptr) {
return {std::move(resultPtr), CacheStatus::cachedNotPinned};
}
}
if (onlyReadFromCache) {
return {nullptr, CacheStatus::notInCacheAndNotComputed};
}
auto value = std::make_shared<Value>(computeFunction());
return {std::move(value), CacheStatus::computed};
}

// Insert `value` into the cache, if the `key` is not already present. In case
// `pinned` is true and the key is already present, the existing value is
// pinned in case it is not pinned yet.
Expand Down
36 changes: 36 additions & 0 deletions test/ConcurrentCacheTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,3 +530,39 @@ TEST(ConcurrentCache, testTryInsertIfNotPresentDoesWorkCorrectly) {

expectContainsSingleElementAtKey0(true, "jkl");
}

TEST(ConcurrentCache, computeButDontStore) {
SimpleConcurrentLruCache cache{};

// The last argument of `computeOnce...`: For the sake of this test, all
// results are suitable for the cache. Note: In the `computeButDontStore`
// function this argument is ignored, because the results are never stored in
// the cache.
auto alwaysSuitable = [](auto&&) { return true; };
// Store the element in the cache.
cache.computeOnce(
42, []() { return "42"; }, false, alwaysSuitable);

// The result is read from the cache, so we get "42", not "blubb".
auto res = cache.computeButDontStore(
42, []() { return "blubb"; }, false, alwaysSuitable);
EXPECT_EQ(*res._resultPointer, "42");

// The same with `onlyReadFromCache` == true;
res = cache.computeButDontStore(
42, []() { return "blubb"; }, true, alwaysSuitable);
EXPECT_EQ(*res._resultPointer, "42");

cache.clearAll();

// Compute, but don't store.
res = cache.computeButDontStore(
42, []() { return "blubb"; }, false, alwaysSuitable);
EXPECT_EQ(*res._resultPointer, "blubb");

// Nothing is stored in the cache, so we cannot read it.
EXPECT_FALSE(cache.getIfContained(42).has_value());
res = cache.computeButDontStore(
42, []() { return "blubb"; }, true, alwaysSuitable);
EXPECT_EQ(res._resultPointer, nullptr);
}
37 changes: 37 additions & 0 deletions test/OperationTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -653,3 +653,40 @@ TEST(Operation, checkLazyOperationIsNotCachedIfUnlikelyToFitInCache) {
EXPECT_FALSE(
qec->getQueryTreeCache().cacheContains(makeQueryCacheKey("test")));
}

TEST(OperationTest, disableCaching) {
auto qec = getQec();
qec->getQueryTreeCache().clearAll();
std::vector<IdTable> idTablesVector{};
idTablesVector.push_back(makeIdTableFromVector({{3, 4}}));
idTablesVector.push_back(makeIdTableFromVector({{7, 8}, {9, 123}}));
ValuesForTesting valuesForTesting{
qec, std::move(idTablesVector), {Variable{"?x"}, Variable{"?y"}}, true};

QueryCacheKey cacheKey{valuesForTesting.getCacheKey(),
qec->locatedTriplesSnapshot().index_};

// By default, the result of `valuesForTesting` is cached because it is
// sufficiently small, no matter if it was computed lazily or fully
// materialized.
EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey));
valuesForTesting.getResult(true);
EXPECT_TRUE(qec->getQueryTreeCache().cacheContains(cacheKey));
qec->getQueryTreeCache().clearAll();
EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey));
valuesForTesting.getResult(false);
EXPECT_TRUE(qec->getQueryTreeCache().cacheContains(cacheKey));

// We now disable caching for the `valuesForTesting`. Then the result is never
// cached, no matter if it is computed lazily or fully materialized.
valuesForTesting.disableStoringInCache();
qec->getQueryTreeCache().clearAll();

EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey));
valuesForTesting.getResult(true);
EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey));
qec->getQueryTreeCache().clearAll();
EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey));
valuesForTesting.getResult(false);
EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey));
}

0 comments on commit 70964d6

Please sign in to comment.