From 05089b7b4a715bc4883ea259acd0d60775c0399e Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Fri, 5 Jul 2024 17:44:33 +0200 Subject: [PATCH] Everything compiles. TODO: * Binary search expressions (at least partially). * Fix unit tests * Clean everything up. --- benchmark/CMakeLists.txt | 2 +- src/engine/LocalVocab.cpp | 19 ++++-- .../RelationalExpressionHelpers.h | 3 - src/global/IndexTypes.h | 10 ++- src/global/ValueId.h | 64 +++++++++++++++---- src/global/ValueIdComparators.h | 12 +++- src/index/Vocabulary.h | 3 +- test/RandomExpressionTest.cpp | 16 ++--- test/RelationalExpressionTest.cpp | 9 ++- 9 files changed, 99 insertions(+), 39 deletions(-) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index ed86be5554..2ea7e202f6 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -31,6 +31,6 @@ addAndLinkBenchmark(JoinAlgorithmBenchmark testUtil memorySize) addAndLinkBenchmark(IdTableCompressedWriterBenchmark engine testUtil) -addAndLinkBenchmark(ParallelMergeBenchmark) +addAndLinkBenchmark(ParallelMergeBenchmark testUtil) addAndLinkBenchmark(GroupByHashMapBenchmark engine testUtil gtest gmock) diff --git a/src/engine/LocalVocab.cpp b/src/engine/LocalVocab.cpp index ff452ec2a7..8acde5f3a9 100644 --- a/src/engine/LocalVocab.cpp +++ b/src/engine/LocalVocab.cpp @@ -81,15 +81,20 @@ std::vector LocalVocab::getAllWordsForTesting() // TODO Consider moving the cheap case (if precomputed) into the // header. -std::pair LocalVocabEntry::lowerBoundInIndex() const { +auto LocalVocabEntry::lowerBoundInIndex() const -> BoundsInIndex { if (indexStatus != IndexStatus::NOT_LOOKED_UP) { - return {lowerBoundInIndex_, indexStatus == EQUAL}; + return {lowerBoundInIndex_, upperBoundInIndex_, exactMatchInIndex_, + indexStatus == EQUAL}; } const IndexImpl& index = IndexImpl::staticGlobalSingletonIndex(); - std::pair result; - auto& [vocabIndex, isContained] = result; - isContained = index.getVocab().getId(toStringRepresentation(), &vocabIndex); - indexStatus = isContained ? IndexStatus::EQUAL : IndexStatus::GREATER; - lowerBoundInIndex_ = vocabIndex; + BoundsInIndex result; + const auto& vocab = index.getVocab(); + result.lowerBound_ = vocab.lower_bound(toStringRepresentation()); + result.upperBound_ = vocab.upper_bound(toStringRepresentation()); + result.isContained_ = + index.getVocab().getId(toStringRepresentation(), &result.exactMatch_); + indexStatus = result.isContained_ ? IndexStatus::EQUAL : IndexStatus::GREATER; + lowerBoundInIndex_ = result.lowerBound_; + upperBoundInIndex_ = result.upperBound_; return result; } diff --git a/src/engine/sparqlExpressions/RelationalExpressionHelpers.h b/src/engine/sparqlExpressions/RelationalExpressionHelpers.h index 34601ea8d8..5daa2c7c4e 100644 --- a/src/engine/sparqlExpressions/RelationalExpressionHelpers.h +++ b/src/engine/sparqlExpressions/RelationalExpressionHelpers.h @@ -160,10 +160,7 @@ auto makeValueId(const S& value, const EvaluationContext* context) { } else { static_assert(ad_utility::isSimilar); - // TODO We have to reinstate the correct handling of the equal - // ranges. return Id::makeFromLocalVocabIndex(&value); - // return getRangeFromVocab(value, context); } }; diff --git a/src/global/IndexTypes.h b/src/global/IndexTypes.h index abf56abe46..a7cf8fc342 100644 --- a/src/global/IndexTypes.h +++ b/src/global/IndexTypes.h @@ -34,12 +34,20 @@ class alignas(16) LocalVocabEntry private: using Base = ad_utility::triple_component::LiteralOrIri; mutable CopyableAtomic lowerBoundInIndex_; + mutable CopyableAtomic upperBoundInIndex_; + mutable CopyableAtomic exactMatchInIndex_; enum IndexStatus { NOT_LOOKED_UP, GREATER, EQUAL }; mutable CopyableAtomic indexStatus = NOT_LOOKED_UP; public: + struct BoundsInIndex { + VocabIndex lowerBound_; + VocabIndex upperBound_; + VocabIndex exactMatch_; + bool isContained_; + }; using Base::Base; - std::pair lowerBoundInIndex() const; + BoundsInIndex lowerBoundInIndex() const; LocalVocabEntry(const Base& base) : Base{base} {} LocalVocabEntry(Base&& base) noexcept : Base{std::move(base)} {} diff --git a/src/global/ValueId.h b/src/global/ValueId.h index 3e18543db6..eb2ae84cfa 100644 --- a/src/global/ValueId.h +++ b/src/global/ValueId.h @@ -137,14 +137,12 @@ class ValueId { if (type == LocalVocabIndex && otherType == LocalVocabIndex) [[unlikely]] { return *getLocalVocabIndex() == *other.getLocalVocabIndex(); } else if (type == VocabIndex) { - auto [lowerBound, isContained] = - (other.getLocalVocabIndex())->lowerBoundInIndex(); - return isContained && lowerBound == getVocabIndex(); + auto x = (other.getLocalVocabIndex())->lowerBoundInIndex(); + return x.isContained_ && x.exactMatch_ == getVocabIndex(); } else if (otherType == VocabIndex) { // TODO Code duplication. - auto [lowerBound, isContained] = - (getLocalVocabIndex())->lowerBoundInIndex(); - return isContained && lowerBound == other.getVocabIndex(); + auto x = (getLocalVocabIndex())->lowerBoundInIndex(); + return x.isContained_ && x.lowerBound_ == other.getVocabIndex(); } return _bits == other._bits; } @@ -167,21 +165,21 @@ class ValueId { if (type == LocalVocabIndex && otherType == LocalVocabIndex) [[unlikely]] { return *getLocalVocabIndex() <=> *other.getLocalVocabIndex(); } else if (type == VocabIndex) { - auto [lowerBound, isContained] = - (other.getLocalVocabIndex())->lowerBoundInIndex(); + auto x = (other.getLocalVocabIndex())->lowerBoundInIndex(); + auto lowerBound = x.exactMatch_; if (lowerBound == getVocabIndex()) { - return isContained ? std::strong_ordering::equal - : std::strong_ordering::less; + return x.isContained_ ? std::strong_ordering::equal + : std::strong_ordering::less; } else { return getVocabIndex() <=> lowerBound; } } else if (otherType == VocabIndex) { // TODO Code duplication. - auto [lowerBound, isContained] = - (getLocalVocabIndex())->lowerBoundInIndex(); + auto x = (getLocalVocabIndex())->lowerBoundInIndex(); + auto lowerBound = x.exactMatch_; if (lowerBound == other.getVocabIndex()) { - return isContained ? std::strong_ordering::equal - : std::strong_ordering::greater; + return x.isContained_ ? std::strong_ordering::equal + : std::strong_ordering::greater; } else { return lowerBound <=> other.getVocabIndex(); } @@ -189,6 +187,44 @@ class ValueId { return _bits <=> other._bits; } + constexpr std::strong_ordering compareQuarternary( + const ValueId& other) const { + using enum Datatype; + auto type = getDatatype(); + auto otherType = other.getDatatype(); + if (type != LocalVocabIndex && otherType != LocalVocabIndex) { + return _bits <=> other._bits; + } + if (type == LocalVocabIndex && otherType == LocalVocabIndex) [[unlikely]] { + return *getLocalVocabIndex() <=> *other.getLocalVocabIndex(); + } else if (type == VocabIndex) { + auto x = (other.getLocalVocabIndex())->lowerBoundInIndex(); + auto lowerBound = x.lowerBound_; + auto upperBound = x.upperBound_; + auto idx = getVocabIndex(); + if (idx < lowerBound) { + return std::strong_ordering::less; + } else if (idx >= upperBound) { + return std::strong_ordering::greater; + } else { + return std::strong_ordering::equal; + } + } else if (otherType == VocabIndex) { + // TODO Code duplication. + auto x = (getLocalVocabIndex())->lowerBoundInIndex(); + auto lowerBound = x.lowerBound_; + auto upperBound = x.upperBound_; + if (upperBound <= other.getVocabIndex()) { + return std::strong_ordering::less; + } else if (lowerBound > other.getVocabIndex()) { + return std::strong_ordering::greater; + } else { + return std::strong_ordering::equal; + } + } + return _bits <=> other._bits; + } + /// Get the underlying bit representation, e.g. for compression etc. [[nodiscard]] constexpr T getBits() const noexcept { return _bits; } /// Construct from the underlying bit representation. `bits` must have been diff --git a/src/global/ValueIdComparators.h b/src/global/ValueIdComparators.h index 5cfb98d731..9ae8ac2ed1 100644 --- a/src/global/ValueIdComparators.h +++ b/src/global/ValueIdComparators.h @@ -16,6 +16,15 @@ namespace valueIdComparators { // Equal, NotEqual, GreaterEqual, GreaterThan. enum struct Comparison { LT, LE, EQ, NE, GE, GT }; +inline int orderingToInt(std::strong_ordering o) { + if (o == std::strong_ordering::less) { + return -1; + } else if (o == std::strong_ordering::greater) { + return 1; + } + return 0; +} + // This enum can be used to configure the behavior of the `compareIds` method // below in the case when two `Id`s have incompatible datatypes (e.g. // `VocabIndex` and a numeric type, or `Undefined` and any other type). @@ -470,7 +479,8 @@ ComparisonResult compareIdsImpl(ValueId a, ValueId b, auto comparator) { // on ValueIds already does the right thing. if (a.getDatatype() == Datatype::LocalVocabIndex || b.getDatatype() == Datatype::LocalVocabIndex) { - return fromBool(std::invoke(comparator, a, b)); + return fromBool( + std::invoke(comparator, orderingToInt(a.compareQuarternary(b)), 0)); } auto visitor = [comparator]( diff --git a/src/index/Vocabulary.h b/src/index/Vocabulary.h index 96940ea265..5b052fcf33 100644 --- a/src/index/Vocabulary.h +++ b/src/index/Vocabulary.h @@ -244,7 +244,8 @@ class Vocabulary { const SortLevel level = SortLevel::QUARTERNARY) const; // _______________________________________________________________ - IndexType upper_bound(const string& word, const SortLevel level) const; + IndexType upper_bound(const string& word, + const SortLevel level = SortLevel::QUARTERNARY) const; const ExternalVocabulary& getExternalVocab() const { return externalVocabulary_; diff --git a/test/RandomExpressionTest.cpp b/test/RandomExpressionTest.cpp index cbfe4aa580..7430aba778 100644 --- a/test/RandomExpressionTest.cpp +++ b/test/RandomExpressionTest.cpp @@ -100,8 +100,8 @@ TEST(UuidExpression, evaluateStrUuidExpression) { // check that none of the results equals all previous results std::unordered_set strUuids; for (auto uuid : resultVector) { - ASSERT_TRUE(std::holds_alternative(uuid)); - LiteralOrIri litUuid = std::get(uuid); + ASSERT_TRUE(std::holds_alternative(uuid)); + LiteralOrIri litUuid = std::get(uuid); ASSERT_TRUE(litUuid.isLiteral()); std::string_view strUuid = asStringViewUnsafe(litUuid.getLiteral().getContent()); @@ -113,8 +113,8 @@ TEST(UuidExpression, evaluateStrUuidExpression) { auto resultAsVariant2 = StrUuidExpression{}.evaluate(&evaluationContext); ASSERT_TRUE(std::holds_alternative(resultAsVariant2)); IdOrLiteralOrIri litOrIriUuid = std::get(resultAsVariant2); - ASSERT_TRUE(std::holds_alternative(litOrIriUuid)); - ASSERT_TRUE(std::get(litOrIriUuid).isLiteral()); + ASSERT_TRUE(std::holds_alternative(litOrIriUuid)); + ASSERT_TRUE(std::get(litOrIriUuid).isLiteral()); } TEST(UuidExpression, evaluateUuidExpression) { @@ -132,8 +132,8 @@ TEST(UuidExpression, evaluateUuidExpression) { // check that none of the results equals all of the other results std::unordered_set strUuids; for (auto uuid : resultVector) { - ASSERT_TRUE(std::holds_alternative(uuid)); - LiteralOrIri litUuid = std::get(uuid); + ASSERT_TRUE(std::holds_alternative(uuid)); + LiteralOrIri litUuid = std::get(uuid); ASSERT_TRUE(litUuid.isIri()); std::string_view iriUuid = asStringViewUnsafe(litUuid.getIri().getContent()); @@ -145,6 +145,6 @@ TEST(UuidExpression, evaluateUuidExpression) { auto resultAsVariant2 = UuidExpression{}.evaluate(&evaluationContext); ASSERT_TRUE(std::holds_alternative(resultAsVariant2)); IdOrLiteralOrIri litOrIriUuid = std::get(resultAsVariant2); - ASSERT_TRUE(std::holds_alternative(litOrIriUuid)); - ASSERT_TRUE(std::get(litOrIriUuid).isIri()); + ASSERT_TRUE(std::holds_alternative(litOrIriUuid)); + ASSERT_TRUE(std::get(litOrIriUuid).isIri()); } diff --git a/test/RelationalExpressionTest.cpp b/test/RelationalExpressionTest.cpp index a930722137..222df701c0 100644 --- a/test/RelationalExpressionTest.cpp +++ b/test/RelationalExpressionTest.cpp @@ -472,9 +472,12 @@ auto testNotComparableHelper(T leftValue, U rightValue, LocalVocab localVocab; IdTable table{alloc}; sparqlExpression::EvaluationContext context{ - *getQec(), map, - table, alloc, - localVocab, std::make_shared>()}; + *TestContext{}.qec, + map, + table, + alloc, + localVocab, + std::make_shared>()}; AD_CONTRACT_CHECK(rightValue.size() == 5); context._beginIndex = 0; context._endIndex = 5;