From b969cccad274b12277dfb2ff465cfef30849e84f Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 25 Nov 2024 00:43:42 +0100 Subject: [PATCH 1/4] Introduce extra runtime parameter for lazy results --- src/engine/Operation.cpp | 8 ++++---- src/global/RuntimeParameters.h | 3 ++- test/OperationTest.cpp | 7 ++++--- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 9c6ae814aa..0e09841fa0 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -5,6 +5,7 @@ #include "engine/Operation.h" #include "engine/QueryExecutionTree.h" +#include "global/RuntimeParameters.h" #include "util/OnDestructionDontThrowDuringStackUnwinding.h" #include "util/TransparentFunctors.h" @@ -179,14 +180,13 @@ CacheValue Operation::runComputationAndPrepareForCache( !unlikelyToFitInCache(cache.getMaxSizeSingleEntry())) { AD_CONTRACT_CHECK(!pinned); result.cacheDuringConsumption( - [maxSize = cache.getMaxSizeSingleEntry()]( - const std::optional& currentIdTablePair, - const Result::IdTableVocabPair& newIdTable) { + [](const std::optional& currentIdTablePair, + const Result::IdTableVocabPair& newIdTable) { auto currentSize = currentIdTablePair.has_value() ? CacheValue::getSize(currentIdTablePair.value().idTable_) : 0_B; - return maxSize >= + return RuntimeParameters().get<"lazy-result-max-cache-size">() >= currentSize + CacheValue::getSize(newIdTable.idTable_); }, [runtimeInfo = getRuntimeInfoPointer(), &cache, diff --git a/src/global/RuntimeParameters.h b/src/global/RuntimeParameters.h index 67c3381c2b..6c00382947 100644 --- a/src/global/RuntimeParameters.h +++ b/src/global/RuntimeParameters.h @@ -50,7 +50,8 @@ inline auto& RuntimeParameters() { Bool<"group-by-disable-index-scan-optimizations">{false}, SizeT<"service-max-value-rows">{10'000}, SizeT<"query-planning-budget">{1500}, - Bool<"throw-on-unbound-variables">{false}}; + Bool<"throw-on-unbound-variables">{false}, + MemorySizeParameter<"lazy-result-max-cache-size">{500_MB}}; }(); return params; } diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp index 0e93b8e9fc..ea1dd7a78f 100644 --- a/test/OperationTest.cpp +++ b/test/OperationTest.cpp @@ -8,6 +8,7 @@ #include "engine/NeutralElementOperation.h" #include "engine/ValuesForTesting.h" +#include "global/RuntimeParameters.h" #include "util/IdTableHelpers.h" #include "util/IndexTestHelpers.h" #include "util/OperationTestHelpers.h" @@ -582,21 +583,21 @@ TEST(Operation, checkLazyOperationIsNotCachedIfTooLarge) { ad_utility::Timer timer{ad_utility::Timer::InitialStatus::Started}; - auto originalSize = qec->getQueryTreeCache().getMaxSizeSingleEntry(); + auto originalSize = RuntimeParameters().get<"lazy-result-max-cache-size">(); // Too small for storage - qec->getQueryTreeCache().setMaxSizeSingleEntry(1_B); + RuntimeParameters().set<"lazy-result-max-cache-size">(1_B); auto cacheValue = valuesForTesting.runComputationAndPrepareForCache( timer, ComputationMode::LAZY_IF_SUPPORTED, "test", false); EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); - qec->getQueryTreeCache().setMaxSizeSingleEntry(originalSize); for ([[maybe_unused]] Result::IdTableVocabPair& _ : cacheValue.resultTable().idTables()) { } EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); + RuntimeParameters().set<"lazy-result-max-cache-size">(originalSize); } // _____________________________________________________________________________ From 19ac8abb62475cd5e2f9c6e06d255dc3824cf0b4 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 25 Nov 2024 17:51:22 +0100 Subject: [PATCH 2/4] Address PR comments --- src/engine/Operation.cpp | 8 +++++--- src/global/RuntimeParameters.h | 2 ++ test/OperationTest.cpp | 24 ++++++++++++++---------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 431a78f155..fb27329884 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -183,14 +183,16 @@ CacheValue Operation::runComputationAndPrepareForCache( if (!result.isFullyMaterialized() && !unlikelyToFitInCache(cache.getMaxSizeSingleEntry())) { AD_CONTRACT_CHECK(!pinned); + auto maxSize = RuntimeParameters().get<"lazy-result-max-cache-size">(); result.cacheDuringConsumption( - [](const std::optional& currentIdTablePair, - const Result::IdTableVocabPair& newIdTable) { + [maxSize]( + const std::optional& currentIdTablePair, + const Result::IdTableVocabPair& newIdTable) { auto currentSize = currentIdTablePair.has_value() ? CacheValue::getSize(currentIdTablePair.value().idTable_) : 0_B; - return RuntimeParameters().get<"lazy-result-max-cache-size">() >= + return maxSize >= currentSize + CacheValue::getSize(newIdTable.idTable_); }, [runtimeInfo = getRuntimeInfoPointer(), &cache, diff --git a/src/global/RuntimeParameters.h b/src/global/RuntimeParameters.h index 6c00382947..76f1a7eee7 100644 --- a/src/global/RuntimeParameters.h +++ b/src/global/RuntimeParameters.h @@ -51,6 +51,8 @@ inline auto& RuntimeParameters() { SizeT<"service-max-value-rows">{10'000}, SizeT<"query-planning-budget">{1500}, Bool<"throw-on-unbound-variables">{false}, + // Control up until which size lazy results should be cached. Caching + // does cause significant overhead for this case. MemorySizeParameter<"lazy-result-max-cache-size">{500_MB}}; }(); return params; diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp index ea1dd7a78f..16ecabf307 100644 --- a/test/OperationTest.cpp +++ b/test/OperationTest.cpp @@ -12,6 +12,7 @@ #include "util/IdTableHelpers.h" #include "util/IndexTestHelpers.h" #include "util/OperationTestHelpers.h" +#include "util/RuntimeParametersTestHelpers.h" using namespace ad_utility::testing; using namespace ::testing; @@ -583,21 +584,24 @@ TEST(Operation, checkLazyOperationIsNotCachedIfTooLarge) { ad_utility::Timer timer{ad_utility::Timer::InitialStatus::Started}; - auto originalSize = RuntimeParameters().get<"lazy-result-max-cache-size">(); - - // Too small for storage - RuntimeParameters().set<"lazy-result-max-cache-size">(1_B); - - auto cacheValue = valuesForTesting.runComputationAndPrepareForCache( - timer, ComputationMode::LAZY_IF_SUPPORTED, "test", false); - EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); + std::optional cacheValue = std::nullopt; + { + // Too small for storage, make sure to change back before consuming + // generator to additionally assert sure it is not re-read on every + // iteration. + auto cleanup = + setRuntimeParameterForTest<"lazy-result-max-cache-size">(1_B); + + cacheValue = valuesForTesting.runComputationAndPrepareForCache( + timer, ComputationMode::LAZY_IF_SUPPORTED, "test", false); + EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); + } for ([[maybe_unused]] Result::IdTableVocabPair& _ : - cacheValue.resultTable().idTables()) { + cacheValue->resultTable().idTables()) { } EXPECT_FALSE(qec->getQueryTreeCache().cacheContains("test")); - RuntimeParameters().set<"lazy-result-max-cache-size">(originalSize); } // _____________________________________________________________________________ From 68909b322d14c4d95d10cf33c9bed2c251431aa4 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Mon, 25 Nov 2024 19:07:17 +0100 Subject: [PATCH 3/4] Use minimum of max cache entry size and dedicated lazy cache size --- src/engine/Operation.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index fb27329884..0b221f0c62 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -180,10 +180,11 @@ CacheValue Operation::runComputationAndPrepareForCache( const std::string& cacheKey, bool pinned) { auto& cache = _executionContext->getQueryTreeCache(); auto result = runComputation(timer, computationMode); - if (!result.isFullyMaterialized() && - !unlikelyToFitInCache(cache.getMaxSizeSingleEntry())) { + auto maxSize = + std::min(RuntimeParameters().get<"lazy-result-max-cache-size">(), + cache.getMaxSizeSingleEntry()); + if (!result.isFullyMaterialized() && !unlikelyToFitInCache(maxSize)) { AD_CONTRACT_CHECK(!pinned); - auto maxSize = RuntimeParameters().get<"lazy-result-max-cache-size">(); result.cacheDuringConsumption( [maxSize]( const std::optional& currentIdTablePair, From 2556f917757cad3a45ff02b29f23768bb7a44142 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 3 Dec 2024 21:26:40 +0100 Subject: [PATCH 4/4] Reduce caching size and make cli parameter --- src/ServerMain.cpp | 5 +++++ src/global/RuntimeParameters.h | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/ServerMain.cpp b/src/ServerMain.cpp index 64406f08cf..f50d12cba2 100644 --- a/src/ServerMain.cpp +++ b/src/ServerMain.cpp @@ -82,6 +82,11 @@ int main(int argc, char** argv) { optionFactory.getProgramOption<"cache-max-size-single-entry">(), "Maximum size for a single cache entry. That is, " "results larger than this will not be cached unless pinned."); + add("lazy-result-max-cache-size,E", + optionFactory.getProgramOption<"lazy-result-max-cache-size">(), + "Maximum size up to which lazy results will be cached by aggregating " + "partial results. Caching does cause significant overhead for this " + "case."); add("cache-max-num-entries,k", optionFactory.getProgramOption<"cache-max-num-entries">(), "Maximum number of entries in the cache. If exceeded, remove " diff --git a/src/global/RuntimeParameters.h b/src/global/RuntimeParameters.h index 76f1a7eee7..8e60725ffe 100644 --- a/src/global/RuntimeParameters.h +++ b/src/global/RuntimeParameters.h @@ -53,7 +53,7 @@ inline auto& RuntimeParameters() { Bool<"throw-on-unbound-variables">{false}, // Control up until which size lazy results should be cached. Caching // does cause significant overhead for this case. - MemorySizeParameter<"lazy-result-max-cache-size">{500_MB}}; + MemorySizeParameter<"lazy-result-max-cache-size">{5_MB}}; }(); return params; }