diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index a21afc093c..fbf6cad996 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -16,5 +16,5 @@ add_library(engine qlever_target_link_libraries(engine util index parser sparqlExpressions http SortPerformanceEstimator Boost::iostreams) add_library(scanSpecification ScanSpecification.cpp) -qlever_target_link_libraries(scanSpecification index) +qlever_target_link_libraries(scanSpecification index engine) qlever_target_link_libraries(engine util index parser sparqlExpressions scanSpecification http SortPerformanceEstimator Boost::iostreams) diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp index ed4541d19a..c73361e001 100644 --- a/src/engine/CartesianProductJoin.cpp +++ b/src/engine/CartesianProductJoin.cpp @@ -157,7 +157,7 @@ ProtoResult CartesianProductJoin::computeResult( const auto& table = subResults.back()->idTable(); // Early stopping: If one of the results is empty, we can stop early. - if (table.size() == 0) { + if (table.empty()) { break; } @@ -165,6 +165,7 @@ ProtoResult CartesianProductJoin::computeResult( // zero variables), we can simply ignore it here. if (table.numRows() == 1 && table.numColumns() == 0) { subResults.pop_back(); + continue; } // Example for the following calculation: If we have a LIMIT of 1000 and // the first child already has a result of size 100, then the second child diff --git a/src/engine/CartesianProductJoin.h b/src/engine/CartesianProductJoin.h index 96047d62fa..779adf1dba 100644 --- a/src/engine/CartesianProductJoin.h +++ b/src/engine/CartesianProductJoin.h @@ -2,9 +2,7 @@ // Chair of Algorithms and Data Structures. // Author: Johannes Kalmbach -#ifndef QLEVER_CARTESIANPRODUCTJOIN_H -#define QLEVER_CARTESIANPRODUCTJOIN_H - +#pragma once #include "engine/Operation.h" #include "engine/QueryExecutionTree.h" @@ -92,5 +90,3 @@ class CartesianProductJoin : public Operation { std::span inputColumn, size_t groupSize, size_t offset); }; - -#endif // QLEVER_CARTESIANPRODUCTJOIN_H diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index b29e2c6098..4293aa2c9c 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -251,7 +251,7 @@ void Operation::updateRuntimeInformationOnSuccess( // ____________________________________________________________________________________________________________________ void Operation::updateRuntimeInformationOnSuccess( - const ConcurrentLruCache ::ResultAndCacheStatus& resultAndCacheStatus, + const QueryResultCache::ResultAndCacheStatus& resultAndCacheStatus, Milliseconds duration) { updateRuntimeInformationOnSuccess( *resultAndCacheStatus._resultPointer->resultTable(), diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 920b09594f..9e23ee3fcf 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -263,7 +263,7 @@ class Operation { // Create and store the complete runtime information for this operation after // it has either been successfully computed or read from the cache. virtual void updateRuntimeInformationOnSuccess( - const ConcurrentLruCache::ResultAndCacheStatus& resultAndCacheStatus, + const QueryResultCache::ResultAndCacheStatus& resultAndCacheStatus, Milliseconds duration) final; // Similar to the function above, but the components are specified manually. diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 8311c32eb7..e7c1ff37f6 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -53,15 +53,8 @@ class CacheValue { // Threadsafe LRU cache for (partial) query results, that // checks on insertion, if the result is currently being computed // by another query. -using ConcurrentLruCache = ad_utility::ConcurrentCache< +using QueryResultCache = ad_utility::ConcurrentCache< ad_utility::LRUCache>; -class QueryResultCache : public ConcurrentLruCache { - public: - virtual ~QueryResultCache() = default; - void clearAll() override { ConcurrentLruCache::clearAll(); } - // Inherit the constructor. - using ConcurrentLruCache::ConcurrentLruCache; -}; // Execution context for queries. // Holds references to index and engine, implements caching. diff --git a/src/engine/ScanSpecification.cpp b/src/engine/ScanSpecification.cpp index 4ede911a10..ee8a0543d3 100644 --- a/src/engine/ScanSpecification.cpp +++ b/src/engine/ScanSpecification.cpp @@ -4,8 +4,16 @@ #include "engine/ScanSpecification.h" +#include "index/Index.h" #include "index/IndexImpl.h" +// ____________________________________________________________________________ +std::optional +ScanSpecificationAsTripleComponent::toScanSpecification( + const Index& index) const { + return toScanSpecification(index.getImpl()); +} + // ____________________________________________________________________________ std::optional ScanSpecificationAsTripleComponent::toScanSpecification( @@ -51,10 +59,31 @@ ScanSpecificationAsTripleComponent::ScanSpecificationAsTripleComponent(T col0, col1_ = toNulloptIfVariable(col1); col2_ = toNulloptIfVariable(col2); - if (!col0.has_value()) { - AD_CONTRACT_CHECK(!col1.has_value()); + if (!col0_.has_value()) { + AD_CONTRACT_CHECK(!col1_.has_value()); + } + if (!col1_.has_value()) { + AD_CONTRACT_CHECK(!col2_.has_value()); + } +} + +// ____________________________________________________________________________ +size_t ScanSpecificationAsTripleComponent::numColumns() const { + auto i = [](const auto& x) -> size_t { + return static_cast(x.has_value()); + }; + return 3 - i(col0_) - i(col1_) - i(col2_); +} + +// _____________________________________________________________________________ +void ScanSpecification::validate() const { + bool c0 = col0Id_.has_value(); + bool c1 = col1Id_.has_value(); + bool c2 = col2Id_.has_value(); + if (!c0) { + AD_CORRECTNESS_CHECK(!c1 && !c2); } - if (!col1.has_value()) { - AD_CONTRACT_CHECK(!col2.has_value()); + if (!c1) { + AD_CORRECTNESS_CHECK(!c2); } } diff --git a/src/engine/ScanSpecification.h b/src/engine/ScanSpecification.h index dfdf931a56..de8e915cee 100644 --- a/src/engine/ScanSpecification.h +++ b/src/engine/ScanSpecification.h @@ -11,6 +11,7 @@ // Forward declaration class IndexImpl; +class Index; // The specification of a scan operation for a given permutation. // Can either be a full scan (all three elements are `std::nullopt`), @@ -27,17 +28,7 @@ class ScanSpecification { T col2Id_; friend class ScanSpecificationAsTripleComponent; - void validate() const { - bool c0 = col0Id_.has_value(); - bool c1 = col1Id_.has_value(); - bool c2 = col2Id_.has_value(); - if (!c0) { - AD_CORRECTNESS_CHECK(!c1 && !c2); - } - if (!c1) { - AD_CORRECTNESS_CHECK(!c2); - } - } + void validate() const; public: ScanSpecification(T col0Id, T col1Id, T col2Id) @@ -48,6 +39,8 @@ class ScanSpecification { const T& col1Id() const { return col1Id_; } const T& col2Id() const { return col2Id_; } + bool operator==(const ScanSpecification&) const = default; + // Only used in tests. void setCol1Id(T col1Id) { col1Id_ = col1Id; @@ -80,7 +73,9 @@ class ScanSpecificationAsTripleComponent { // `LocalVocab` of the UPDATE triples here. std::optional toScanSpecification( const IndexImpl& index) const; - size_t numColumns() const { - return 3 - col0_.has_value() - col1_.has_value() - col2_.has_value(); - } + std::optional toScanSpecification( + const Index& index) const; + + // The number of columns that the corresponding index scan will have. + size_t numColumns() const; }; diff --git a/src/engine/idTable/IdTable.h b/src/engine/idTable/IdTable.h index 4377b02445..c8fb92d84e 100644 --- a/src/engine/idTable/IdTable.h +++ b/src/engine/idTable/IdTable.h @@ -1,4 +1,4 @@ -// Copyright 2021, University of Freiburg, Chair of Algorithms and Data +/// Copyright 2021, University of Freiburg, Chair of Algorithms and Data // Structures. Author: Johannes Kalmbach #pragma once diff --git a/src/parser/CMakeLists.txt b/src/parser/CMakeLists.txt index 29ca836c71..8b1efdcb1e 100644 --- a/src/parser/CMakeLists.txt +++ b/src/parser/CMakeLists.txt @@ -25,5 +25,5 @@ add_library(parser Iri.cpp Literal.cpp LiteralOrIri.cpp) -qlever_target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping re2::re2 util engine) +qlever_target_link_libraries(parser sparqlParser parserData sparqlExpressions rdfEscaping re2::re2 util engine index) diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index e76e2980b7..de8d0f270e 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -206,7 +206,7 @@ class ConcurrentCache { } /// Clear the cache, including the pinned entries. - virtual void clearAll() { _cacheAndInProgressMap.wlock()->_cache.clearAll(); } + void clearAll() { _cacheAndInProgressMap.wlock()->_cache.clearAll(); } /// Delete elements from the unpinned part of the cache of total size /// at least `size`; diff --git a/test/ValuesForTestingTest.cpp b/test/ValuesForTestingTest.cpp index 95ce76ce46..8c4b86d019 100644 --- a/test/ValuesForTestingTest.cpp +++ b/test/ValuesForTestingTest.cpp @@ -25,9 +25,9 @@ TEST(ValuesForTesting, valuesForTesting) { ASSERT_EQ(v.getMultiplicity(0), 42.0); ASSERT_EQ(v.getMultiplicity(1), 84.0); - ASSERT_THAT( - v.getCacheKey(), - ::testing::StartsWith("Values for testing with 2 columns. V:3 V:12")); + ASSERT_THAT(v.getCacheKey(), + ::testing::StartsWith( + "Values for testing with 2 columns and 3 rows. V:3 V:12")); ASSERT_THAT(v.getCacheKey(), ::testing::EndsWith("Supports limit: 0")); ASSERT_EQ(v.getDescriptor(), "explicit values for testing"); ASSERT_TRUE(v.resultSortedOn().empty()); @@ -36,3 +36,13 @@ TEST(ValuesForTesting, valuesForTesting) { auto result = v.getResult(); ASSERT_EQ(result->idTable(), table); } + +// ____________________________________________________________________________ +TEST(ValuesForTesting, cornerCasesCacheKey) { + auto empty = makeIdTableFromVector({}); + auto neutral = makeIdTableFromVector({{}}); + + ValuesForTesting vEmpty{getQec(), empty.clone(), {}}; + ValuesForTesting vNeutral{getQec(), neutral.clone(), {}}; + EXPECT_NE(vEmpty.getCacheKey(), vNeutral.getCacheKey()); +} diff --git a/test/engine/CMakeLists.txt b/test/engine/CMakeLists.txt index 4f6e09ff5f..d64860a72d 100644 --- a/test/engine/CMakeLists.txt +++ b/test/engine/CMakeLists.txt @@ -4,3 +4,4 @@ addLinkAndDiscoverTest(CartesianProductJoinTest engine) addLinkAndDiscoverTest(TextIndexScanForWordTest engine) addLinkAndDiscoverTest(TextIndexScanForEntityTest engine) addLinkAndDiscoverTest(DistinctTest engine) +addLinkAndDiscoverTestSerial(ScanSpecificationTest engine index parser scanSpecification) diff --git a/test/engine/CartesianProductJoinTest.cpp b/test/engine/CartesianProductJoinTest.cpp index f07bb11218..cd05e31b9b 100644 --- a/test/engine/CartesianProductJoinTest.cpp +++ b/test/engine/CartesianProductJoinTest.cpp @@ -90,9 +90,21 @@ void testCartesianProduct(VectorTable expected, std::vector inputs, TEST(CartesianProductJoin, computeResult) { // Simple base cases. VectorTable v{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}; + VectorTable empty{}; testCartesianProduct(v, {v}); - testCartesianProduct({}, {{}, v, {}}); - testCartesianProduct({}, {{}, {}}); + testCartesianProduct(empty, {empty, v, empty}); + testCartesianProduct(empty, {empty, empty}); + + // Test cases where some or all of the inputs are Neutral elements (1 row, + // zero columns) that are automatically filtered out by the + // `CartesianProductJoin`. + VectorTable neutral{{}}; + testCartesianProduct(neutral, {neutral}); + testCartesianProduct(v, {v, neutral}); + testCartesianProduct(v, {neutral, v, neutral}); + testCartesianProduct(neutral, {neutral, neutral, neutral}); + testCartesianProduct(empty, {neutral, empty, neutral}); + testCartesianProduct(empty, {neutral, empty, v}); // Fails because of an empty input. EXPECT_ANY_THROW(makeJoin({})); diff --git a/test/engine/ScanSpecificationTest.cpp b/test/engine/ScanSpecificationTest.cpp new file mode 100644 index 0000000000..63c4386d67 --- /dev/null +++ b/test/engine/ScanSpecificationTest.cpp @@ -0,0 +1,81 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Kalmbach + +#include + +#include "../util/GTestHelpers.h" +#include "../util/IndexTestHelpers.h" +#include "engine/ScanSpecification.h" + +// _____________________________________________________________________________ +TEST(ScanSpecification, validate) { + Id i = Id::makeFromInt(42); + auto n = std::nullopt; + using S = ScanSpecification; + EXPECT_NO_THROW(S(i, i, i)); + EXPECT_NO_THROW(S(i, i, n)); + EXPECT_NO_THROW(S(i, n, n)); + EXPECT_NO_THROW(S(n, n, n)); + + EXPECT_ANY_THROW(S(n, i, i)); + EXPECT_ANY_THROW(S(n, n, i)); + EXPECT_ANY_THROW(S(n, i, n)); + EXPECT_ANY_THROW(S(i, n, i)); +} + +// _____________________________________________________________________________ +TEST(ScanSpecification, ScanSpecificationAsTripleComponent) { + Id i = Id::makeFromInt(42); + TripleComponent iTc{42}; + auto n = std::nullopt; + using S = ScanSpecification; + using STc = ScanSpecificationAsTripleComponent; + + EXPECT_ANY_THROW(STc(n, iTc, iTc)); + EXPECT_ANY_THROW(STc(n, n, iTc)); + EXPECT_ANY_THROW(STc(n, iTc, n)); + EXPECT_ANY_THROW(STc(iTc, n, iTc)); + + const auto& index = ad_utility::testing::getQec()->getIndex(); + auto toScanSpec = [&index](const STc& s) { + return s.toScanSpecification(index); + }; + + // Match that a `ScanSpecificationAsTripleComponent` has the expected number + // of columns, and yields the expected `ScanSpecification` when + // `toScanSpecification` is called on it. + auto matchScanSpec = + [&toScanSpec](const std::optional spec, + size_t numColumns = 0) -> ::testing::Matcher { + auto innerMatcher = [&toScanSpec, &spec] { + return ::testing::ResultOf(toScanSpec, ::testing::Eq(spec)); + }; + if (!spec.has_value()) { + return innerMatcher(); + } else { + return ::testing::AllOf( + innerMatcher(), + AD_PROPERTY(STc, numColumns, ::testing::Eq(numColumns))); + } + }; + EXPECT_THAT(STc(iTc, iTc, iTc), matchScanSpec(S(i, i, i), 0)); + EXPECT_THAT(STc(iTc, iTc, n), matchScanSpec(S(i, i, n), 1)); + EXPECT_THAT(STc(iTc, n, n), matchScanSpec(S(i, n, n), 2)); + EXPECT_THAT(STc(n, n, n), matchScanSpec(S(n, n, n), 3)); + + // Test the resolution of vocab entries. + auto getId = ad_utility::testing::makeGetId(index); + auto x = getId(""); + TripleComponent xIri = TripleComponent::Iri::fromIriref(""); + + EXPECT_THAT(STc(xIri, xIri, xIri), matchScanSpec(S(x, x, x), 0)); + + // For an entry that is not in the vocabulary, the complete result of + // `toScanSpecification` is `nullopt`. + TripleComponent notInVocab = + TripleComponent::Iri::fromIriref(""); + EXPECT_THAT(STc(notInVocab, xIri, xIri), matchScanSpec(std::nullopt)); + EXPECT_THAT(STc(xIri, notInVocab, xIri), matchScanSpec(std::nullopt)); + EXPECT_THAT(STc(xIri, xIri, notInVocab), matchScanSpec(std::nullopt)); +} diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h index dcbf130da8..ac7e363a95 100644 --- a/test/engine/ValuesForTesting.h +++ b/test/engine/ValuesForTesting.h @@ -65,7 +65,8 @@ class ValuesForTesting : public Operation { // ___________________________________________________________________________ string getCacheKeyImpl() const override { std::stringstream str; - str << "Values for testing with " << table_.numColumns() << " columns. "; + str << "Values for testing with " << table_.numColumns() << " columns and " + << table_.numRows() << " rows. "; if (table_.numRows() > 1000) { str << ad_utility::FastRandomIntGenerator{}(); } else {