From 70964d6e5aaaf0d5f507c384c78933327865342a Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Thu, 12 Dec 2024 18:54:24 +0100 Subject: [PATCH] Allow operations to not store their result in the cache (#1665) Each operation now has a `bool` that determines whether the results can be stored in the cache or not (whether it is actually stored depends on other circumstances, like the available cache size). That `bool` does not have to be fixed when the operation is created, but can be changed. For example, this is useful for index scans that only return a subset of their full result (because of another constraining operation, like a join or a filter). --- src/engine/Operation.cpp | 39 ++++++++++++++++++++++++++++++------ src/engine/Operation.h | 38 +++++++++++++++++------------------ src/util/ConcurrentCache.h | 22 ++++++++++++++++++++ test/ConcurrentCacheTest.cpp | 36 +++++++++++++++++++++++++++++++++ test/OperationTest.cpp | 37 ++++++++++++++++++++++++++++++++++ 5 files changed, 147 insertions(+), 25 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index e4d7136527..1a9f53fa76 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -232,7 +232,8 @@ CacheValue Operation::runComputationAndPrepareForCache( auto maxSize = std::min(RuntimeParameters().get<"lazy-result-max-cache-size">(), cache.getMaxSizeSingleEntry()); - if (!result.isFullyMaterialized() && !unlikelyToFitInCache(maxSize)) { + if (canResultBeCached() && !result.isFullyMaterialized() && + !unlikelyToFitInCache(maxSize)) { AD_CONTRACT_CHECK(!pinned); result.cacheDuringConsumption( [maxSize]( @@ -316,11 +317,16 @@ std::shared_ptr Operation::getResult( bool onlyReadFromCache = computationMode == ComputationMode::ONLY_IF_CACHED; - auto result = - pinResult ? cache.computeOncePinned(cacheKey, cacheSetup, - onlyReadFromCache, suitedForCache) - : cache.computeOnce(cacheKey, cacheSetup, onlyReadFromCache, - suitedForCache); + auto result = [&]() { + auto compute = [&](auto&&... args) { + if (!canResultBeCached()) { + return cache.computeButDontStore(AD_FWD(args)...); + } + return pinResult ? cache.computeOncePinned(AD_FWD(args)...) + : cache.computeOnce(AD_FWD(args)...); + }; + return compute(cacheKey, cacheSetup, onlyReadFromCache, suitedForCache); + }(); if (result._resultPointer == nullptr) { AD_CORRECTNESS_CHECK(onlyReadFromCache); @@ -596,3 +602,24 @@ void Operation::signalQueryUpdate() const { _executionContext->signalQueryUpdate(*_rootRuntimeInfo); } } + +// _____________________________________________________________________________ +std::string Operation::getCacheKey() const { + auto result = getCacheKeyImpl(); + if (_limit._limit.has_value()) { + absl::StrAppend(&result, " LIMIT ", _limit._limit.value()); + } + if (_limit._offset != 0) { + absl::StrAppend(&result, " OFFSET ", _limit._offset); + } + return result; +} + +// _____________________________________________________________________________ +uint64_t Operation::getSizeEstimate() { + if (_limit._limit.has_value()) { + return std::min(_limit._limit.value(), getSizeEstimateBeforeLimit()); + } else { + return getSizeEstimateBeforeLimit(); + } +} diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 3e06a9498e..9e649cb0a4 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -90,6 +90,9 @@ class Operation { // limit/offset is applied post computation. bool externalLimitApplied_ = false; + // See the documentation of the getter function below. + bool canResultBeCached_ = true; + public: // Holds a `PrefilterExpression` with its corresponding `Variable`. using PrefilterVariablePair = sparqlExpression::PrefilterExprVariablePair; @@ -162,20 +165,23 @@ class Operation { // Get a unique, not ambiguous string representation for a subtree. // This should act like an ID for each subtree. // Calls `getCacheKeyImpl` and adds the information about the `LIMIT` clause. - virtual string getCacheKey() const final { - auto result = getCacheKeyImpl(); - if (_limit._limit.has_value()) { - absl::StrAppend(&result, " LIMIT ", _limit._limit.value()); - } - if (_limit._offset != 0) { - absl::StrAppend(&result, " OFFSET ", _limit._offset); - } - return result; - } + virtual std::string getCacheKey() const final; + + // If this function returns `false`, then the result of this `Operation` will + // never be stored in the cache. It might however be read from the cache. + // This can be used, if the operation actually only returns a subset of the + // actual result because it has been constrained by a parent operation (e.g. + // an IndexScan that has been prefiltered by another operation which it is + // joined with). + virtual bool canResultBeCached() const { return canResultBeCached_; } + + // After calling this function, `canResultBeCached()` will return `false` (see + // above for details). + virtual void disableStoringInCache() final { canResultBeCached_ = false; } private: - // The individual implementation of `getCacheKey` (see above) that has to be - // customized by every child class. + // The individual implementation of `getCacheKey` (see above) that has to + // be customized by every child class. virtual string getCacheKeyImpl() const = 0; public: @@ -186,13 +192,7 @@ class Operation { virtual size_t getCostEstimate() = 0; - virtual uint64_t getSizeEstimate() final { - if (_limit._limit.has_value()) { - return std::min(_limit._limit.value(), getSizeEstimateBeforeLimit()); - } else { - return getSizeEstimateBeforeLimit(); - } - } + virtual uint64_t getSizeEstimate() final; private: virtual uint64_t getSizeEstimateBeforeLimit() = 0; diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index 2f22efde8c..21262da12b 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -208,6 +208,28 @@ class ConcurrentCache { suitedForCache); } + // If the result is contained in the cache, read and return it. Otherwise, + // compute it, but do not store it in the cache. The interface is the same as + // for the above two functions, therefore some of the arguments are unused. + ResultAndCacheStatus computeButDontStore( + const Key& key, + const InvocableWithConvertibleReturnType auto& computeFunction, + bool onlyReadFromCache, + [[maybe_unused]] const InvocableWithConvertibleReturnType< + bool, const Value&> auto& suitedForCache) { + { + auto resultPtr = _cacheAndInProgressMap.wlock()->_cache[key]; + if (resultPtr != nullptr) { + return {std::move(resultPtr), CacheStatus::cachedNotPinned}; + } + } + if (onlyReadFromCache) { + return {nullptr, CacheStatus::notInCacheAndNotComputed}; + } + auto value = std::make_shared(computeFunction()); + return {std::move(value), CacheStatus::computed}; + } + // Insert `value` into the cache, if the `key` is not already present. In case // `pinned` is true and the key is already present, the existing value is // pinned in case it is not pinned yet. diff --git a/test/ConcurrentCacheTest.cpp b/test/ConcurrentCacheTest.cpp index f52eca0561..9dbfbde509 100644 --- a/test/ConcurrentCacheTest.cpp +++ b/test/ConcurrentCacheTest.cpp @@ -530,3 +530,39 @@ TEST(ConcurrentCache, testTryInsertIfNotPresentDoesWorkCorrectly) { expectContainsSingleElementAtKey0(true, "jkl"); } + +TEST(ConcurrentCache, computeButDontStore) { + SimpleConcurrentLruCache cache{}; + + // The last argument of `computeOnce...`: For the sake of this test, all + // results are suitable for the cache. Note: In the `computeButDontStore` + // function this argument is ignored, because the results are never stored in + // the cache. + auto alwaysSuitable = [](auto&&) { return true; }; + // Store the element in the cache. + cache.computeOnce( + 42, []() { return "42"; }, false, alwaysSuitable); + + // The result is read from the cache, so we get "42", not "blubb". + auto res = cache.computeButDontStore( + 42, []() { return "blubb"; }, false, alwaysSuitable); + EXPECT_EQ(*res._resultPointer, "42"); + + // The same with `onlyReadFromCache` == true; + res = cache.computeButDontStore( + 42, []() { return "blubb"; }, true, alwaysSuitable); + EXPECT_EQ(*res._resultPointer, "42"); + + cache.clearAll(); + + // Compute, but don't store. + res = cache.computeButDontStore( + 42, []() { return "blubb"; }, false, alwaysSuitable); + EXPECT_EQ(*res._resultPointer, "blubb"); + + // Nothing is stored in the cache, so we cannot read it. + EXPECT_FALSE(cache.getIfContained(42).has_value()); + res = cache.computeButDontStore( + 42, []() { return "blubb"; }, true, alwaysSuitable); + EXPECT_EQ(res._resultPointer, nullptr); +} diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp index c1ad709a4f..4ad1f1313c 100644 --- a/test/OperationTest.cpp +++ b/test/OperationTest.cpp @@ -653,3 +653,40 @@ TEST(Operation, checkLazyOperationIsNotCachedIfUnlikelyToFitInCache) { EXPECT_FALSE( qec->getQueryTreeCache().cacheContains(makeQueryCacheKey("test"))); } + +TEST(OperationTest, disableCaching) { + auto qec = getQec(); + qec->getQueryTreeCache().clearAll(); + std::vector idTablesVector{}; + idTablesVector.push_back(makeIdTableFromVector({{3, 4}})); + idTablesVector.push_back(makeIdTableFromVector({{7, 8}, {9, 123}})); + ValuesForTesting valuesForTesting{ + qec, std::move(idTablesVector), {Variable{"?x"}, Variable{"?y"}}, true}; + + QueryCacheKey cacheKey{valuesForTesting.getCacheKey(), + qec->locatedTriplesSnapshot().index_}; + + // By default, the result of `valuesForTesting` is cached because it is + // sufficiently small, no matter if it was computed lazily or fully + // materialized. + EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey)); + valuesForTesting.getResult(true); + EXPECT_TRUE(qec->getQueryTreeCache().cacheContains(cacheKey)); + qec->getQueryTreeCache().clearAll(); + EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey)); + valuesForTesting.getResult(false); + EXPECT_TRUE(qec->getQueryTreeCache().cacheContains(cacheKey)); + + // We now disable caching for the `valuesForTesting`. Then the result is never + // cached, no matter if it is computed lazily or fully materialized. + valuesForTesting.disableStoringInCache(); + qec->getQueryTreeCache().clearAll(); + + EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey)); + valuesForTesting.getResult(true); + EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey)); + qec->getQueryTreeCache().clearAll(); + EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey)); + valuesForTesting.getResult(false); + EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey)); +}