From 026d3faebccd8b4c561dbdbdb582eb19a7b1c828 Mon Sep 17 00:00:00 2001 From: unex <63149623+UNEXENU@users.noreply.github.com> Date: Thu, 19 Sep 2024 12:20:00 +0200 Subject: [PATCH 01/11] add BlankNode support for the service-clause --- src/engine/QueryExecutionContext.h | 13 ++++ src/engine/Service.cpp | 18 +++-- src/engine/Service.h | 5 +- .../SparqlExpressionValueGetters.cpp | 5 ++ src/global/Constants.h | 4 ++ src/global/IndexTypes.h | 1 + src/global/ValueId.h | 14 +++- src/global/ValueIdComparators.h | 2 + test/ServiceTest.cpp | 66 +++++++++++-------- 9 files changed, 93 insertions(+), 35 deletions(-) diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 0c17ea684b..d27c14c2c0 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -6,6 +6,7 @@ #pragma once +#include #include #include #include @@ -14,6 +15,7 @@ #include "engine/Result.h" #include "engine/RuntimeInformation.h" #include "engine/SortPerformanceEstimator.h" +#include "global/Constants.h" #include "global/Id.h" #include "index/Index.h" #include "util/Cache.h" @@ -116,6 +118,17 @@ class QueryExecutionContext { updateCallback_(nlohmann::ordered_json(runtimeInformation).dump()); } + // Getter for the global `NEXT_NEWBLANKNODEINDEX` counter. + [[nodiscard]] static int getNextNewBlankNodeIndex() { + if (NEXT_NEWBLANKNODEINDEX == std::numeric_limits::max()) + [[unlikely]] { + throw std::runtime_error( + "Counter for blank nodes overflowed. Tell the developers to reset " + "it."); + } + return NEXT_NEWBLANKNODEINDEX++; + } + bool _pinSubtrees; bool _pinResult; diff --git a/src/engine/Service.cpp b/src/engine/Service.cpp index 291fa6ae7f..6f4b79fc9a 100644 --- a/src/engine/Service.cpp +++ b/src/engine/Service.cpp @@ -15,6 +15,7 @@ #include "parser/RdfParser.h" #include "parser/TokenizerCtre.h" #include "util/Exception.h" +#include "util/HashMap.h" #include "util/HashSet.h" #include "util/StringUtils.h" #include "util/http/HttpUtils.h" @@ -201,6 +202,7 @@ void Service::writeJsonResult(const std::vector& vars, IdTableStatic idTable = std::move(*idTablePtr).toStatic(); checkCancellation(); std::vector numLocalVocabPerColumn(idTable.numColumns()); + ad_utility::HashMap blankNodeMap; auto writeBindings = [&](const nlohmann::json& bindings, size_t& rowIdx) { for (const auto& binding : bindings) { @@ -208,7 +210,7 @@ void Service::writeJsonResult(const std::vector& vars, for (size_t colIdx = 0; colIdx < vars.size(); ++colIdx) { TripleComponent tc = binding.contains(vars[colIdx]) - ? bindingToTripleComponent(binding[vars[colIdx]]) + ? bindingToTripleComponent(binding[vars[colIdx]], blankNodeMap) : TripleComponent::UNDEF(); Id id = std::move(tc).toValueId(getIndex().getVocab(), *localVocab); @@ -332,7 +334,8 @@ std::optional Service::getSiblingValuesClause() const { // ____________________________________________________________________________ TripleComponent Service::bindingToTripleComponent( - const nlohmann::json& binding) { + const nlohmann::json& binding, + ad_utility::HashMap& blankNodeMap) const { if (!binding.contains("type") || !binding.contains("value")) { throw std::runtime_error(absl::StrCat( "Missing type or value field in binding. The binding is: '", @@ -359,10 +362,13 @@ TripleComponent Service::bindingToTripleComponent( } else if (type == "uri") { tc = TripleComponent::Iri::fromIrirefWithoutBrackets(value); } else if (type == "bnode") { - throw std::runtime_error( - "Blank nodes in the result of a SERVICE are currently not supported. " - "For now, consider filtering them out using the ISBLANK function or " - "converting them via the STR function."); + if (!blankNodeMap.contains(value)) { + // create a new blankNodeIndex + blankNodeMap[value] = + Id::makeFromNewBlankNodeIndex(NewBlankNodeIndex::make( + getExecutionContext()->getNextNewBlankNodeIndex())); + } + tc = blankNodeMap[value]; } else { throw std::runtime_error(absl::StrCat("Type ", type, " is undefined. The binding is: '", diff --git a/src/engine/Service.h b/src/engine/Service.h index 865d20dd22..9572bd079f 100644 --- a/src/engine/Service.h +++ b/src/engine/Service.h @@ -99,8 +99,9 @@ class Service : public Operation { vector getChildren() override { return {}; } // Convert the given binding to TripleComponent. - static TripleComponent bindingToTripleComponent( - const nlohmann::json& binding); + TripleComponent bindingToTripleComponent( + const nlohmann::json& binding, + ad_utility::HashMap& blankNodeMap) const; private: // The string returned by this function is used as cache key. diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp index 0dc9b96279..43c07be186 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp @@ -30,6 +30,7 @@ NumericValue NumericValueGetter::operator()( case Datatype::WordVocabIndex: case Datatype::Date: case Datatype::BlankNodeIndex: + case Datatype::NewBlankNodeIndex: return NotNumeric{}; } AD_FAIL(); @@ -50,6 +51,7 @@ auto EffectiveBooleanValueGetter::operator()( return id.getBool() ? True : False; case Datatype::Undefined: case Datatype::BlankNodeIndex: + case Datatype::NewBlankNodeIndex: return Undef; case Datatype::VocabIndex: { auto index = id.getVocabIndex(); @@ -150,6 +152,7 @@ IntDoubleStr ToNumericValueGetter::operator()( case Datatype::WordVocabIndex: case Datatype::Date: case Datatype::BlankNodeIndex: + case Datatype::NewBlankNodeIndex: auto optString = LiteralFromIdGetter{}(id, context); if (optString.has_value()) { return std::move(optString.value()); @@ -192,6 +195,7 @@ OptIri DatatypeValueGetter::operator()(ValueId id, context); case Undefined: case BlankNodeIndex: + case NewBlankNodeIndex: case TextRecordIndex: case WordVocabIndex: return std::nullopt; @@ -246,6 +250,7 @@ T getValue(ValueId id, const sparqlExpression::EvaluationContext* context, case TextRecordIndex: case WordVocabIndex: case BlankNodeIndex: + case NewBlankNodeIndex: case Bool: case Int: case Double: diff --git a/src/global/Constants.h b/src/global/Constants.h index 9d1043571c..b8f76576be 100644 --- a/src/global/Constants.h +++ b/src/global/Constants.h @@ -7,6 +7,7 @@ #pragma once +#include #include #include #include @@ -245,3 +246,6 @@ constexpr inline size_t NUM_SORT_THREADS = 4; constexpr inline std::string_view EMPH_ON = "\033[1m"; /// ANSI escape sequence to print "normal" text again in the console. constexpr inline std::string_view EMPH_OFF = "\033[22m"; + +// Counter for internal Blank Node Ids. +inline std::atomic_uint64_t NEXT_NEWBLANKNODEINDEX{0}; diff --git a/src/global/IndexTypes.h b/src/global/IndexTypes.h index 372e9e57c7..dd47d2ad70 100644 --- a/src/global/IndexTypes.h +++ b/src/global/IndexTypes.h @@ -16,3 +16,4 @@ using LocalVocabIndex = const LocalVocabEntry*; using TextRecordIndex = ad_utility::TypedIndex; using WordVocabIndex = ad_utility::TypedIndex; using BlankNodeIndex = ad_utility::TypedIndex; +using NewBlankNodeIndex = ad_utility::TypedIndex; diff --git a/src/global/ValueId.h b/src/global/ValueId.h index fb122e413c..ef318078e6 100644 --- a/src/global/ValueId.h +++ b/src/global/ValueId.h @@ -30,7 +30,8 @@ enum struct Datatype { Date, WordVocabIndex, BlankNodeIndex, - MaxValue = BlankNodeIndex + NewBlankNodeIndex, + MaxValue = NewBlankNodeIndex // Note: Unfortunately we cannot easily get the size of an enum. // If members are added to this enum, then the `MaxValue` // alias must always be equal to the last member, @@ -60,6 +61,8 @@ constexpr std::string_view toString(Datatype type) { return "Date"; case Datatype::BlankNodeIndex: return "BlankNodeIndex"; + case Datatype::NewBlankNodeIndex: + return "NewBlankNodeIndex"; } // This line is reachable if we cast an arbitrary invalid int to this enum AD_FAIL(); @@ -278,6 +281,9 @@ class ValueId { static ValueId makeFromBlankNodeIndex(BlankNodeIndex index) { return makeFromIndex(index.get(), Datatype::BlankNodeIndex); } + static ValueId makeFromNewBlankNodeIndex(NewBlankNodeIndex index) { + return makeFromIndex(index.get(), Datatype::NewBlankNodeIndex); + } /// Obtain the unsigned index that this `ValueId` encodes. If `getDatatype() /// != [VocabIndex|TextRecordIndex|LocalVocabIndex]` then the result is @@ -298,6 +304,10 @@ class ValueId { [[nodiscard]] constexpr BlankNodeIndex getBlankNodeIndex() const noexcept { return BlankNodeIndex::make(removeDatatypeBits(_bits)); } + [[nodiscard]] constexpr NewBlankNodeIndex getNewBlankNodeIndex() + const noexcept { + return NewBlankNodeIndex::make(removeDatatypeBits(_bits)); + } // Store or load a `Date` object. static ValueId makeFromDate(DateYearOrDuration d) noexcept { @@ -362,6 +372,8 @@ class ValueId { return std::invoke(visitor, getDate()); case Datatype::BlankNodeIndex: return std::invoke(visitor, getBlankNodeIndex()); + case Datatype::NewBlankNodeIndex: + return std::invoke(visitor, getNewBlankNodeIndex()); } AD_FAIL(); } diff --git a/src/global/ValueIdComparators.h b/src/global/ValueIdComparators.h index 9f46aab872..372889f2ca 100644 --- a/src/global/ValueIdComparators.h +++ b/src/global/ValueIdComparators.h @@ -402,6 +402,7 @@ inline std::vector> getRangesForId( case Datatype::Bool: case Datatype::Date: case Datatype::BlankNodeIndex: + case Datatype::NewBlankNodeIndex: // For `Date` the trivial comparison via bits is also correct. return detail::simplifyRanges( detail::getRangesForIndexTypes(begin, end, valueId, comparison)); @@ -435,6 +436,7 @@ inline std::vector> getRangesForEqualIds( case Datatype::Undefined: case Datatype::Date: case Datatype::BlankNodeIndex: + case Datatype::NewBlankNodeIndex: AD_FAIL(); case Datatype::VocabIndex: case Datatype::LocalVocabIndex: diff --git a/test/ServiceTest.cpp b/test/ServiceTest.cpp index 0476d8ab0a..4ef9fce34c 100644 --- a/test/ServiceTest.cpp +++ b/test/ServiceTest.cpp @@ -10,6 +10,8 @@ #include #include "engine/Service.h" +#include "global/Constants.h" +#include "global/IndexTypes.h" #include "global/RuntimeParameters.h" #include "gmock/gmock.h" #include "parser/GraphPatternOperation.h" @@ -469,55 +471,67 @@ TEST_F(ServiceTest, getCacheKey) { EXPECT_NE(baseCacheKey, silentService.getCacheKey()); } -// Test that bindingToValueId behaves as expected. +// Test that bindingToTripleComponent behaves as expected. TEST_F(ServiceTest, bindingToTripleComponent) { - Index::Vocab vocabulary; - nlohmann::json binding; + ad_utility::HashMap blankNodeMap; + parsedQuery::Service parsedServiceClause{ + {Variable{"?x"}, Variable{"?y"}}, + TripleComponent::Iri::fromIriref(""), + "PREFIX doof: ", + "{ }", + false}; + Service service{testQec, parsedServiceClause}; + + auto bTTC = [&service, &blankNodeMap]( + const nlohmann::json& binding) -> TripleComponent { + return service.bindingToTripleComponent(binding, blankNodeMap); + }; // Missing type or value. - AD_EXPECT_THROW_WITH_MESSAGE( - Service::bindingToTripleComponent({{"type", "literal"}}), - ::testing::HasSubstr("Missing type or value")); - AD_EXPECT_THROW_WITH_MESSAGE( - Service::bindingToTripleComponent({{"value", "v"}}), - ::testing::HasSubstr("Missing type or value")); + AD_EXPECT_THROW_WITH_MESSAGE(bTTC({{"type", "literal"}}), + ::testing::HasSubstr("Missing type or value")); + AD_EXPECT_THROW_WITH_MESSAGE(bTTC({{"value", "v"}}), + ::testing::HasSubstr("Missing type or value")); EXPECT_EQ( - Service::bindingToTripleComponent( - {{"type", "literal"}, {"value", "42"}, {"datatype", XSD_INT_TYPE}}), + bTTC({{"type", "literal"}, {"value", "42"}, {"datatype", XSD_INT_TYPE}}), 42); EXPECT_EQ( - Service::bindingToTripleComponent( - {{"type", "literal"}, {"value", "Hallo Welt"}, {"xml:lang", "de"}}), + bTTC({{"type", "literal"}, {"value", "Hallo Welt"}, {"xml:lang", "de"}}), TripleComponent::Literal::literalWithoutQuotes("Hallo Welt", "@de")); - EXPECT_EQ(Service::bindingToTripleComponent( - {{"type", "literal"}, {"value", "Hello World"}}), + EXPECT_EQ(bTTC({{"type", "literal"}, {"value", "Hello World"}}), TripleComponent::Literal::literalWithoutQuotes("Hello World")); // Test literals with escape characters (there used to be a bug for those) EXPECT_EQ( - Service::bindingToTripleComponent( - {{"type", "literal"}, {"value", "Hello \\World"}}), + bTTC({{"type", "literal"}, {"value", "Hello \\World"}}), TripleComponent::Literal::fromEscapedRdfLiteral("\"Hello \\\\World\"")); EXPECT_EQ( - Service::bindingToTripleComponent( + bTTC( {{"type", "literal"}, {"value", "Hallo \\Welt"}, {"xml:lang", "de"}}), TripleComponent::Literal::fromEscapedRdfLiteral("\"Hallo \\\\Welt\"", "@de")); - EXPECT_EQ(Service::bindingToTripleComponent( - {{"type", "uri"}, {"value", "http://doof.org"}}), + EXPECT_EQ(bTTC({{"type", "uri"}, {"value", "http://doof.org"}}), TripleComponent::Iri::fromIrirefWithoutBrackets("http://doof.org")); - // Blank Node not supported yet. - EXPECT_ANY_THROW( - Service::bindingToTripleComponent({{"type", "bnode"}, {"value", "b"}})); + // Blank Nodes. + EXPECT_EQ(blankNodeMap.size(), 0); + EXPECT_EQ(bTTC({{"type", "bnode"}, {"value", "A"}}), + Id::makeFromNewBlankNodeIndex(NewBlankNodeIndex::make(0))); + EXPECT_EQ(bTTC({{"type", "bnode"}, {"value", "B"}}), + Id::makeFromNewBlankNodeIndex(NewBlankNodeIndex::make(1))); + EXPECT_EQ(blankNodeMap.size(), 2); + + // BlankNode exists already, known Id will be used. + EXPECT_EQ(bTTC({{"type", "bnode"}, {"value", "A"}}), + Id::makeFromNewBlankNodeIndex(NewBlankNodeIndex::make(0))); + // Invalid type -> throw. AD_EXPECT_THROW_WITH_MESSAGE( - Service::bindingToTripleComponent( - {{"type", "INVALID_TYPE"}, {"value", "v"}}), - ::testing::HasSubstr("Type INVALID_TYPE is undefined")); + bTTC({{"type", "INVALID_TYPE"}, {"value", "v"}}), + ::testing::HasSubstr("Type INVALID_TYPE is undefined.")); } From 961f8e15a47abecb4141e92723a2ae39656da3e0 Mon Sep 17 00:00:00 2001 From: unex <63149623+UNEXENU@users.noreply.github.com> Date: Fri, 20 Sep 2024 13:24:47 +0200 Subject: [PATCH 02/11] various improvements --- src/engine/ExportQueryExecutionTrees.cpp | 3 +++ src/engine/QueryExecutionContext.h | 26 ++++++++++++------- src/engine/Service.cpp | 14 +++++----- .../SparqlExpressionValueGetters.cpp | 10 +++---- .../SparqlExpressionValueGetters.h | 3 ++- src/global/Constants.h | 5 ++-- src/global/IndexTypes.h | 3 ++- src/global/ValueId.h | 20 +++++++------- src/global/ValueIdComparators.h | 4 +-- test/ServiceTest.cpp | 6 ++--- test/ValueIdTest.cpp | 1 + test/ValueIdTestHelpers.h | 3 +++ 12 files changed, 59 insertions(+), 39 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 5a74a34488..6e5035011f 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -215,6 +215,9 @@ ExportQueryExecutionTrees::idToStringAndTypeForEncodedValue(Id id) { case BlankNodeIndex: return std::pair{absl::StrCat("_:bn", id.getBlankNodeIndex().get()), nullptr}; + case LocalBlankNodeIndex: + return std::pair{absl::StrCat("_:bn", id.getLocalBlankNodeIndex().get()), + nullptr}; default: AD_FAIL(); } diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index d27c14c2c0..1382e4fca6 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -6,6 +6,7 @@ #pragma once +#include #include #include #include @@ -118,15 +119,22 @@ class QueryExecutionContext { updateCallback_(nlohmann::ordered_json(runtimeInformation).dump()); } - // Getter for the global `NEXT_NEWBLANKNODEINDEX` counter. - [[nodiscard]] static int getNextNewBlankNodeIndex() { - if (NEXT_NEWBLANKNODEINDEX == std::numeric_limits::max()) - [[unlikely]] { - throw std::runtime_error( - "Counter for blank nodes overflowed. Tell the developers to reset " - "it."); - } - return NEXT_NEWBLANKNODEINDEX++; + // Getter for the global `NEXT_LOCALBLANKNODEINDEX` counter. + // Use n to set the number of indices to be reserved for the caller. + [[nodiscard]] uint64_t getNextLocalBlankNodeIndex(uint64_t n = 1) { + uint64_t oldIdx = NEXT_LOCALBLANKNODEINDEX.load(); + uint64_t newIdx; + + do { + newIdx = oldIdx + n; + + if (newIdx == std::numeric_limits::max()) [[unlikely]] { + throw std::runtime_error( + "Counter for local blank nodes overflowed. Tell the developers to " + "reset it."); + } + } while (!NEXT_LOCALBLANKNODEINDEX.compare_exchange_weak(oldIdx, newIdx)); + return oldIdx; } bool _pinSubtrees; diff --git a/src/engine/Service.cpp b/src/engine/Service.cpp index 6f4b79fc9a..8685b047c1 100644 --- a/src/engine/Service.cpp +++ b/src/engine/Service.cpp @@ -202,6 +202,8 @@ void Service::writeJsonResult(const std::vector& vars, IdTableStatic idTable = std::move(*idTablePtr).toStatic(); checkCancellation(); std::vector numLocalVocabPerColumn(idTable.numColumns()); + // TODO We should include a memory limit, as soon as we can do proper + // memory-limited HashMaps. ad_utility::HashMap blankNodeMap; auto writeBindings = [&](const nlohmann::json& bindings, size_t& rowIdx) { @@ -362,13 +364,13 @@ TripleComponent Service::bindingToTripleComponent( } else if (type == "uri") { tc = TripleComponent::Iri::fromIrirefWithoutBrackets(value); } else if (type == "bnode") { - if (!blankNodeMap.contains(value)) { - // create a new blankNodeIndex - blankNodeMap[value] = - Id::makeFromNewBlankNodeIndex(NewBlankNodeIndex::make( - getExecutionContext()->getNextNewBlankNodeIndex())); + auto optEmplace = blankNodeMap.try_emplace(value, Id()); + if (optEmplace.second) { + optEmplace.first->second = + Id::makeFromLocalBlankNodeIndex(LocalBlankNodeIndex::make( + getExecutionContext()->getNextLocalBlankNodeIndex())); } - tc = blankNodeMap[value]; + tc = optEmplace.first->second; } else { throw std::runtime_error(absl::StrCat("Type ", type, " is undefined. The binding is: '", diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp index 43c07be186..d15a76931a 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp @@ -30,7 +30,7 @@ NumericValue NumericValueGetter::operator()( case Datatype::WordVocabIndex: case Datatype::Date: case Datatype::BlankNodeIndex: - case Datatype::NewBlankNodeIndex: + case Datatype::LocalBlankNodeIndex: return NotNumeric{}; } AD_FAIL(); @@ -51,7 +51,7 @@ auto EffectiveBooleanValueGetter::operator()( return id.getBool() ? True : False; case Datatype::Undefined: case Datatype::BlankNodeIndex: - case Datatype::NewBlankNodeIndex: + case Datatype::LocalBlankNodeIndex: return Undef; case Datatype::VocabIndex: { auto index = id.getVocabIndex(); @@ -152,7 +152,7 @@ IntDoubleStr ToNumericValueGetter::operator()( case Datatype::WordVocabIndex: case Datatype::Date: case Datatype::BlankNodeIndex: - case Datatype::NewBlankNodeIndex: + case Datatype::LocalBlankNodeIndex: auto optString = LiteralFromIdGetter{}(id, context); if (optString.has_value()) { return std::move(optString.value()); @@ -195,7 +195,7 @@ OptIri DatatypeValueGetter::operator()(ValueId id, context); case Undefined: case BlankNodeIndex: - case NewBlankNodeIndex: + case LocalBlankNodeIndex: case TextRecordIndex: case WordVocabIndex: return std::nullopt; @@ -250,7 +250,7 @@ T getValue(ValueId id, const sparqlExpression::EvaluationContext* context, case TextRecordIndex: case WordVocabIndex: case BlankNodeIndex: - case NewBlankNodeIndex: + case LocalBlankNodeIndex: case Bool: case Int: case Double: diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h index aecdf64564..f0ee001033 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h @@ -144,7 +144,8 @@ struct StringValueGetter : Mixin { struct IsBlankNodeValueGetter : Mixin { using Mixin::operator(); Id operator()(ValueId id, const EvaluationContext*) const { - return Id::makeFromBool(id.getDatatype() == Datatype::BlankNodeIndex); + return Id::makeFromBool(id.getDatatype() == Datatype::BlankNodeIndex || + id.getDatatype() == Datatype::LocalBlankNodeIndex); } Id operator()(const LiteralOrIri&, const EvaluationContext*) const { diff --git a/src/global/Constants.h b/src/global/Constants.h index b8f76576be..fa1fc17fbe 100644 --- a/src/global/Constants.h +++ b/src/global/Constants.h @@ -247,5 +247,6 @@ constexpr inline std::string_view EMPH_ON = "\033[1m"; /// ANSI escape sequence to print "normal" text again in the console. constexpr inline std::string_view EMPH_OFF = "\033[22m"; -// Counter for internal Blank Node Ids. -inline std::atomic_uint64_t NEXT_NEWBLANKNODEINDEX{0}; +// Counter for internal Blank Node Ids that are not part of the index, but were +// locally added (by SERVICE clauses or expressions). +inline std::atomic_uint64_t NEXT_LOCALBLANKNODEINDEX{0}; diff --git a/src/global/IndexTypes.h b/src/global/IndexTypes.h index dd47d2ad70..149d3d411c 100644 --- a/src/global/IndexTypes.h +++ b/src/global/IndexTypes.h @@ -16,4 +16,5 @@ using LocalVocabIndex = const LocalVocabEntry*; using TextRecordIndex = ad_utility::TypedIndex; using WordVocabIndex = ad_utility::TypedIndex; using BlankNodeIndex = ad_utility::TypedIndex; -using NewBlankNodeIndex = ad_utility::TypedIndex; +using LocalBlankNodeIndex = + ad_utility::TypedIndex; diff --git a/src/global/ValueId.h b/src/global/ValueId.h index ef318078e6..af2599ee07 100644 --- a/src/global/ValueId.h +++ b/src/global/ValueId.h @@ -30,8 +30,8 @@ enum struct Datatype { Date, WordVocabIndex, BlankNodeIndex, - NewBlankNodeIndex, - MaxValue = NewBlankNodeIndex + LocalBlankNodeIndex, + MaxValue = LocalBlankNodeIndex // Note: Unfortunately we cannot easily get the size of an enum. // If members are added to this enum, then the `MaxValue` // alias must always be equal to the last member, @@ -61,8 +61,8 @@ constexpr std::string_view toString(Datatype type) { return "Date"; case Datatype::BlankNodeIndex: return "BlankNodeIndex"; - case Datatype::NewBlankNodeIndex: - return "NewBlankNodeIndex"; + case Datatype::LocalBlankNodeIndex: + return "LocalBlankNodeIndex"; } // This line is reachable if we cast an arbitrary invalid int to this enum AD_FAIL(); @@ -281,8 +281,8 @@ class ValueId { static ValueId makeFromBlankNodeIndex(BlankNodeIndex index) { return makeFromIndex(index.get(), Datatype::BlankNodeIndex); } - static ValueId makeFromNewBlankNodeIndex(NewBlankNodeIndex index) { - return makeFromIndex(index.get(), Datatype::NewBlankNodeIndex); + static ValueId makeFromLocalBlankNodeIndex(LocalBlankNodeIndex index) { + return makeFromIndex(index.get(), Datatype::LocalBlankNodeIndex); } /// Obtain the unsigned index that this `ValueId` encodes. If `getDatatype() @@ -304,9 +304,9 @@ class ValueId { [[nodiscard]] constexpr BlankNodeIndex getBlankNodeIndex() const noexcept { return BlankNodeIndex::make(removeDatatypeBits(_bits)); } - [[nodiscard]] constexpr NewBlankNodeIndex getNewBlankNodeIndex() + [[nodiscard]] constexpr LocalBlankNodeIndex getLocalBlankNodeIndex() const noexcept { - return NewBlankNodeIndex::make(removeDatatypeBits(_bits)); + return LocalBlankNodeIndex::make(removeDatatypeBits(_bits)); } // Store or load a `Date` object. @@ -372,8 +372,8 @@ class ValueId { return std::invoke(visitor, getDate()); case Datatype::BlankNodeIndex: return std::invoke(visitor, getBlankNodeIndex()); - case Datatype::NewBlankNodeIndex: - return std::invoke(visitor, getNewBlankNodeIndex()); + case Datatype::LocalBlankNodeIndex: + return std::invoke(visitor, getLocalBlankNodeIndex()); } AD_FAIL(); } diff --git a/src/global/ValueIdComparators.h b/src/global/ValueIdComparators.h index 372889f2ca..46e891f308 100644 --- a/src/global/ValueIdComparators.h +++ b/src/global/ValueIdComparators.h @@ -402,7 +402,7 @@ inline std::vector> getRangesForId( case Datatype::Bool: case Datatype::Date: case Datatype::BlankNodeIndex: - case Datatype::NewBlankNodeIndex: + case Datatype::LocalBlankNodeIndex: // For `Date` the trivial comparison via bits is also correct. return detail::simplifyRanges( detail::getRangesForIndexTypes(begin, end, valueId, comparison)); @@ -436,7 +436,7 @@ inline std::vector> getRangesForEqualIds( case Datatype::Undefined: case Datatype::Date: case Datatype::BlankNodeIndex: - case Datatype::NewBlankNodeIndex: + case Datatype::LocalBlankNodeIndex: AD_FAIL(); case Datatype::VocabIndex: case Datatype::LocalVocabIndex: diff --git a/test/ServiceTest.cpp b/test/ServiceTest.cpp index 4ef9fce34c..139fb489ce 100644 --- a/test/ServiceTest.cpp +++ b/test/ServiceTest.cpp @@ -521,14 +521,14 @@ TEST_F(ServiceTest, bindingToTripleComponent) { // Blank Nodes. EXPECT_EQ(blankNodeMap.size(), 0); EXPECT_EQ(bTTC({{"type", "bnode"}, {"value", "A"}}), - Id::makeFromNewBlankNodeIndex(NewBlankNodeIndex::make(0))); + Id::makeFromLocalBlankNodeIndex(LocalBlankNodeIndex::make(0))); EXPECT_EQ(bTTC({{"type", "bnode"}, {"value", "B"}}), - Id::makeFromNewBlankNodeIndex(NewBlankNodeIndex::make(1))); + Id::makeFromLocalBlankNodeIndex(LocalBlankNodeIndex::make(1))); EXPECT_EQ(blankNodeMap.size(), 2); // BlankNode exists already, known Id will be used. EXPECT_EQ(bTTC({{"type", "bnode"}, {"value", "A"}}), - Id::makeFromNewBlankNodeIndex(NewBlankNodeIndex::make(0))); + Id::makeFromLocalBlankNodeIndex(LocalBlankNodeIndex::make(0))); // Invalid type -> throw. AD_EXPECT_THROW_WITH_MESSAGE( diff --git a/test/ValueIdTest.cpp b/test/ValueIdTest.cpp index ee27792a40..90238b64a1 100644 --- a/test/ValueIdTest.cpp +++ b/test/ValueIdTest.cpp @@ -323,6 +323,7 @@ TEST_F(ValueIdTest, toDebugString) { test(makeTextRecordId(37), "T:37"); test(makeWordVocabId(42), "W:42"); test(makeBlankNodeId(27), "B:27"); + test(makeLocalBlankNodeId(28), "L:28"); test(ValueId::makeFromDate( DateYearOrDuration{123456, DateYearOrDuration::Type::Year}), "D:123456"); diff --git a/test/ValueIdTestHelpers.h b/test/ValueIdTestHelpers.h index 5644caefee..958f3a4765 100644 --- a/test/ValueIdTestHelpers.h +++ b/test/ValueIdTestHelpers.h @@ -57,6 +57,9 @@ inline ValueId makeWordVocabId(uint64_t value) { inline ValueId makeBlankNodeId(uint64_t value) { return ValueId::makeFromBlankNodeIndex(BlankNodeIndex::make(value)); } +inline ValueId makeLocalBlankNodeId(uint64_t value) { + return ValueId::makeFromLocalBlankNodeIndex(LocalBlankNodeIndex::make(value)); +} inline uint64_t getVocabIndex(ValueId id) { return id.getVocabIndex().get(); } // TODO Make the tests more precise for the localVocabIndices. From 521d4cae74086a187bedbb92723cc4ac5895e1cc Mon Sep 17 00:00:00 2001 From: unex <63149623+UNEXENU@users.noreply.github.com> Date: Mon, 30 Sep 2024 10:50:53 +0200 Subject: [PATCH 03/11] added BlankNodeManager --- src/engine/LocalVocab.cpp | 5 +++ src/engine/LocalVocab.h | 6 +++ src/engine/Service.cpp | 9 +++-- src/engine/Service.h | 3 +- src/util/BlankNodeManager.cpp | 56 ++++++++++++++++++++++++++++ src/util/BlankNodeManager.h | 70 +++++++++++++++++++++++++++++++++++ src/util/CMakeLists.txt | 2 +- test/BlankNodeManagerTest.cpp | 55 +++++++++++++++++++++++++++ test/CMakeLists.txt | 2 + test/ServiceTest.cpp | 23 +++++++----- 10 files changed, 216 insertions(+), 15 deletions(-) create mode 100644 src/util/BlankNodeManager.cpp create mode 100644 src/util/BlankNodeManager.h create mode 100644 test/BlankNodeManagerTest.cpp diff --git a/src/engine/LocalVocab.cpp b/src/engine/LocalVocab.cpp index 3c87ff5bdc..f3d32702e1 100644 --- a/src/engine/LocalVocab.cpp +++ b/src/engine/LocalVocab.cpp @@ -77,3 +77,8 @@ std::vector LocalVocab::getAllWordsForTesting() } return result; } + +// _____________________________________________________________________________ +LocalBlankNodeIndex LocalVocab::getBlankNodeIndex() { + return LocalBlankNodeIndex::make(blankNodeManager_.getId()); +} diff --git a/src/engine/LocalVocab.h b/src/engine/LocalVocab.h index d0ec7ccfaa..04a3b561e5 100644 --- a/src/engine/LocalVocab.h +++ b/src/engine/LocalVocab.h @@ -13,6 +13,7 @@ #include "absl/container/node_hash_set.h" #include "global/Id.h" #include "parser/LiteralOrIri.h" +#include "util/BlankNodeManager.h" // A class for maintaining a local vocabulary with contiguous (local) IDs. This // is meant for words that are not part of the normal vocabulary (constructed @@ -38,6 +39,8 @@ class LocalVocab { auto& primaryWordSet() { return *primaryWordSet_; } const auto& primaryWordSet() const { return *primaryWordSet_; } + ad_utility::BlankNodeManager::LocalBlankNodeManager blankNodeManager_; + public: // Create a new, empty local vocabulary. LocalVocab() = default; @@ -90,6 +93,9 @@ class LocalVocab { // Return all the words from all the word sets as a vector. std::vector getAllWordsForTesting() const; + // Get a new LocalBlankNodeIndex using the LocalBlankNodeManager. + LocalBlankNodeIndex getBlankNodeIndex(); + private: // Common implementation for the two variants of // `getIndexAndAddIfNotContainedImpl` above. diff --git a/src/engine/Service.cpp b/src/engine/Service.cpp index 8685b047c1..f97e02c159 100644 --- a/src/engine/Service.cpp +++ b/src/engine/Service.cpp @@ -212,7 +212,8 @@ void Service::writeJsonResult(const std::vector& vars, for (size_t colIdx = 0; colIdx < vars.size(); ++colIdx) { TripleComponent tc = binding.contains(vars[colIdx]) - ? bindingToTripleComponent(binding[vars[colIdx]], blankNodeMap) + ? bindingToTripleComponent(binding[vars[colIdx]], blankNodeMap, + localVocab) : TripleComponent::UNDEF(); Id id = std::move(tc).toValueId(getIndex().getVocab(), *localVocab); @@ -337,7 +338,8 @@ std::optional Service::getSiblingValuesClause() const { // ____________________________________________________________________________ TripleComponent Service::bindingToTripleComponent( const nlohmann::json& binding, - ad_utility::HashMap& blankNodeMap) const { + ad_utility::HashMap& blankNodeMap, + LocalVocab* localVocab) const { if (!binding.contains("type") || !binding.contains("value")) { throw std::runtime_error(absl::StrCat( "Missing type or value field in binding. The binding is: '", @@ -367,8 +369,7 @@ TripleComponent Service::bindingToTripleComponent( auto optEmplace = blankNodeMap.try_emplace(value, Id()); if (optEmplace.second) { optEmplace.first->second = - Id::makeFromLocalBlankNodeIndex(LocalBlankNodeIndex::make( - getExecutionContext()->getNextLocalBlankNodeIndex())); + Id::makeFromLocalBlankNodeIndex(localVocab->getBlankNodeIndex()); } tc = optEmplace.first->second; } else { diff --git a/src/engine/Service.h b/src/engine/Service.h index 9572bd079f..43108ec081 100644 --- a/src/engine/Service.h +++ b/src/engine/Service.h @@ -101,7 +101,8 @@ class Service : public Operation { // Convert the given binding to TripleComponent. TripleComponent bindingToTripleComponent( const nlohmann::json& binding, - ad_utility::HashMap& blankNodeMap) const; + ad_utility::HashMap& blankNodeMap, + LocalVocab* localVocab) const; private: // The string returned by this function is used as cache key. diff --git a/src/util/BlankNodeManager.cpp b/src/util/BlankNodeManager.cpp new file mode 100644 index 0000000000..cf7cf8c231 --- /dev/null +++ b/src/util/BlankNodeManager.cpp @@ -0,0 +1,56 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Moritz Dom (domm@informatik.uni-freiburg.de) + +#include "util/BlankNodeManager.h" + +namespace ad_utility { + +// _____________________________________________________________________________ +BlankNodeManager::BlankNodeManager( + SlowRandomIntGenerator randomIntGenerator) + : randBlockIndex_(randomIntGenerator) {} + +// _____________________________________________________________________________ +BlankNodeManager::Block BlankNodeManager::allocateBlock() { + // The Random-Generation Algorithm's performance is reduced once the number of + // used blocks exceeds a limit. + AD_CORRECTNESS_CHECK(usedBlocksSet_.size() < totalAvailableBlocks_ / 256); + + // Lock this part, as it might be accessed concurrently by + // `LocalBlankNodeManager` instances. + mtx_.lock(); + uint64_t newBlockIndex = randBlockIndex_(); + while (usedBlocksSet_.contains(newBlockIndex)) { + newBlockIndex = randBlockIndex_(); + } + usedBlocksSet_.insert(newBlockIndex); + mtx_.unlock(); + + return Block(newBlockIndex); +} + +// _____________________________________________________________________________ +void BlankNodeManager::freeBlock(uint64_t blockIndex) { + usedBlocksSet_.erase(blockIndex); +} + +// _____________________________________________________________________________ +BlankNodeManager::Block::Block(uint64_t blockIndex) + : blockIdx_(blockIndex), nextIdx_(blockIndex * blockSize_) {} + +// _____________________________________________________________________________ +BlankNodeManager::Block::~Block() { + globalBlankNodeManager.freeBlock(blockIdx_); +} + +// _____________________________________________________________________________ +uint64_t BlankNodeManager::LocalBlankNodeManager::getId() { + if (blocks_.empty() || + blocks_.back().nextIdx_ == (blocks_.back().blockIdx_ + 1) * blockSize_) { + blocks_.emplace_back(globalBlankNodeManager.allocateBlock()); + } + return blocks_.back().nextIdx_++; +} + +} // namespace ad_utility diff --git a/src/util/BlankNodeManager.h b/src/util/BlankNodeManager.h new file mode 100644 index 0000000000..ce39e083ec --- /dev/null +++ b/src/util/BlankNodeManager.h @@ -0,0 +1,70 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Moritz Dom (domm@informatik.uni-freiburg.de) + +#pragma once + +#include + +#include "global/ValueId.h" +#include "util/HashSet.h" +#include "util/Random.h" + +namespace ad_utility { +/* + * Manager class for LocalBlankNodeIndex. + */ +class BlankNodeManager { + public: + static const uint blockSize_ = 1000; + static constexpr uint64_t totalAvailableBlocks_ = + (ValueId::maxIndex + 1) / blockSize_; + + private: + // Int Generator yielding random block indices. + SlowRandomIntGenerator randBlockIndex_; + // Mutex for Block allocation. + std::mutex mtx_; + + protected: + // Tracks blocks currently hold by instances of `LocalBlankNodeManager`. + HashSet usedBlocksSet_; + + public: + explicit BlankNodeManager( + SlowRandomIntGenerator randomIntGenerator = + SlowRandomIntGenerator{0, totalAvailableBlocks_ - 1}); + ~BlankNodeManager() = default; + + // A Local BlankNode Ids Block of size `blockSize_`. + struct Block { + explicit Block(uint64_t blockIndex); + ~Block(); + uint64_t blockIdx_; + uint64_t nextIdx_; + }; + + // Manages the LocalBlankNodes used within a LocalVocab. + class LocalBlankNodeManager { + public: + LocalBlankNodeManager() = default; + ~LocalBlankNodeManager() = default; + + // Get a new id. + [[nodiscard]] uint64_t getId(); + + protected: + // Reserved blocks. + std::vector blocks_; + }; + + // Allocate and retrieve a block of free ids. + [[nodiscard]] Block allocateBlock(); + + // Free a block of ids. + void freeBlock(uint64_t blockIndex); +}; + +} // namespace ad_utility + +inline ad_utility::BlankNodeManager globalBlankNodeManager; diff --git a/src/util/CMakeLists.txt b/src/util/CMakeLists.txt index 815dbf827c..fda9bc82a0 100644 --- a/src/util/CMakeLists.txt +++ b/src/util/CMakeLists.txt @@ -1,5 +1,5 @@ add_subdirectory(ConfigManager) add_subdirectory(MemorySize) add_subdirectory(http) -add_library(util GeoSparqlHelpers.cpp antlr/ANTLRErrorHandling.cpp ParseException.cpp Conversions.cpp Date.cpp DateYearDuration.cpp Duration.cpp antlr/GenerateAntlrExceptionMetadata.cpp CancellationHandle.cpp StringUtils.cpp LazyJsonParser.cpp) +add_library(util GeoSparqlHelpers.cpp antlr/ANTLRErrorHandling.cpp ParseException.cpp Conversions.cpp Date.cpp DateYearDuration.cpp Duration.cpp antlr/GenerateAntlrExceptionMetadata.cpp CancellationHandle.cpp StringUtils.cpp LazyJsonParser.cpp BlankNodeManager.cpp) qlever_target_link_libraries(util re2::re2) diff --git a/test/BlankNodeManagerTest.cpp b/test/BlankNodeManagerTest.cpp new file mode 100644 index 0000000000..b19efc0e47 --- /dev/null +++ b/test/BlankNodeManagerTest.cpp @@ -0,0 +1,55 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Moritz Dom (domm@informatik.uni-freiburg.de) + +#include + +#include "util/BlankNodeManager.h" +#include "util/Random.h" + +using namespace ad_utility; + +class BlankNodeManagerTest : public BlankNodeManager { + public: + BlankNodeManagerTest() : BlankNodeManager() {} + BlankNodeManagerTest(SlowRandomIntGenerator randomIntGenerator) + : BlankNodeManager(randomIntGenerator) {} + + using BlankNodeManager::usedBlocksSet_; +}; + +class LocalBlankNodeManagerTest + : public BlankNodeManager::LocalBlankNodeManager { + public: + using BlankNodeManager::LocalBlankNodeManager::blocks_; +}; + +TEST(blockAllocation, BlankNodeManagerTest) { + using Block = BlankNodeManager::Block; + BlankNodeManagerTest bnm; + + EXPECT_EQ(bnm.usedBlocksSet_.size(), 0); + Block b = bnm.allocateBlock(); + EXPECT_EQ(bnm.usedBlocksSet_.size(), 1); + bnm.freeBlock(b.blockIdx_); + EXPECT_EQ(bnm.usedBlocksSet_.size(), 0); + // double free + EXPECT_NO_THROW(bnm.freeBlock(b.blockIdx_)); +} + +TEST(LocalBlankNodeManagerGetID, BlankNodeManagerTest) { + using LocalBNM = LocalBlankNodeManagerTest; + LocalBNM l; + // initially the LocalBlankNodeManager doesn't have any blocks + EXPECT_EQ(l.blocks_.size(), 0); + + // A new Block is allocated, if + // no blocks are allocated yet + uint64_t id = l.getId(); + EXPECT_EQ(l.blocks_.size(), 1); + + // or the ids of the last block are all used + l.blocks_.back().nextIdx_ = id + BlankNodeManager::blockSize_; + id = l.getId(); + EXPECT_EQ(l.blocks_.size(), 2); +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d9a5d2a0fe..ff280023d3 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -402,3 +402,5 @@ addLinkAndDiscoverTest(CacheableGeneratorTest) addLinkAndDiscoverTest(FilterTest engine) addLinkAndDiscoverTest(ResultTest engine) + +addLinkAndDiscoverTest(BlankNodeManagerTest) diff --git a/test/ServiceTest.cpp b/test/ServiceTest.cpp index 139fb489ce..436a24d88f 100644 --- a/test/ServiceTest.cpp +++ b/test/ServiceTest.cpp @@ -481,10 +481,11 @@ TEST_F(ServiceTest, bindingToTripleComponent) { "{ }", false}; Service service{testQec, parsedServiceClause}; + LocalVocab localVocab{}; - auto bTTC = [&service, &blankNodeMap]( - const nlohmann::json& binding) -> TripleComponent { - return service.bindingToTripleComponent(binding, blankNodeMap); + auto bTTC = [&service, &blankNodeMap, + &localVocab](const nlohmann::json& binding) -> TripleComponent { + return service.bindingToTripleComponent(binding, blankNodeMap, &localVocab); }; // Missing type or value. @@ -520,15 +521,19 @@ TEST_F(ServiceTest, bindingToTripleComponent) { // Blank Nodes. EXPECT_EQ(blankNodeMap.size(), 0); - EXPECT_EQ(bTTC({{"type", "bnode"}, {"value", "A"}}), - Id::makeFromLocalBlankNodeIndex(LocalBlankNodeIndex::make(0))); - EXPECT_EQ(bTTC({{"type", "bnode"}, {"value", "B"}}), - Id::makeFromLocalBlankNodeIndex(LocalBlankNodeIndex::make(1))); + + Id a = + bTTC({{"type", "bnode"}, {"value", "A"}}).toValueIdIfNotString().value(); + Id b = + bTTC({{"type", "bnode"}, {"value", "B"}}).toValueIdIfNotString().value(); + EXPECT_NE(a, b); + EXPECT_EQ(blankNodeMap.size(), 2); // BlankNode exists already, known Id will be used. - EXPECT_EQ(bTTC({{"type", "bnode"}, {"value", "A"}}), - Id::makeFromLocalBlankNodeIndex(LocalBlankNodeIndex::make(0))); + Id a2 = + bTTC({{"type", "bnode"}, {"value", "A"}}).toValueIdIfNotString().value(); + EXPECT_EQ(a, a2); // Invalid type -> throw. AD_EXPECT_THROW_WITH_MESSAGE( From a43ae9c077c992e2b2750f20cebce67b379ca036 Mon Sep 17 00:00:00 2001 From: unex <63149623+UNEXENU@users.noreply.github.com> Date: Mon, 7 Oct 2024 10:25:07 +0200 Subject: [PATCH 04/11] cleanup tests --- src/engine/QueryExecutionContext.h | 18 ------------- src/global/Constants.h | 4 --- src/util/BlankNodeManager.h | 11 +++++--- test/BlankNodeManagerTest.cpp | 41 ++++++++++-------------------- 4 files changed, 22 insertions(+), 52 deletions(-) diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 1382e4fca6..6ac59363e8 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -119,24 +119,6 @@ class QueryExecutionContext { updateCallback_(nlohmann::ordered_json(runtimeInformation).dump()); } - // Getter for the global `NEXT_LOCALBLANKNODEINDEX` counter. - // Use n to set the number of indices to be reserved for the caller. - [[nodiscard]] uint64_t getNextLocalBlankNodeIndex(uint64_t n = 1) { - uint64_t oldIdx = NEXT_LOCALBLANKNODEINDEX.load(); - uint64_t newIdx; - - do { - newIdx = oldIdx + n; - - if (newIdx == std::numeric_limits::max()) [[unlikely]] { - throw std::runtime_error( - "Counter for local blank nodes overflowed. Tell the developers to " - "reset it."); - } - } while (!NEXT_LOCALBLANKNODEINDEX.compare_exchange_weak(oldIdx, newIdx)); - return oldIdx; - } - bool _pinSubtrees; bool _pinResult; diff --git a/src/global/Constants.h b/src/global/Constants.h index 00386b50bc..60d1d3d551 100644 --- a/src/global/Constants.h +++ b/src/global/Constants.h @@ -260,10 +260,6 @@ constexpr inline std::string_view EMPH_ON = "\033[1m"; /// ANSI escape sequence to print "normal" text again in the console. constexpr inline std::string_view EMPH_OFF = "\033[22m"; -// Counter for internal Blank Node Ids that are not part of the index, but were -// locally added (by SERVICE clauses or expressions). -inline std::atomic_uint64_t NEXT_LOCALBLANKNODEINDEX{0}; - // Allowed range for geographical coordinates from WTK Text constexpr inline double COORDINATE_LAT_MAX = 90.0; constexpr inline double COORDINATE_LNG_MAX = 180.0; diff --git a/src/util/BlankNodeManager.h b/src/util/BlankNodeManager.h index ce39e083ec..0b449c9f68 100644 --- a/src/util/BlankNodeManager.h +++ b/src/util/BlankNodeManager.h @@ -4,6 +4,8 @@ #pragma once +#include + #include #include "global/ValueId.h" @@ -26,8 +28,7 @@ class BlankNodeManager { // Mutex for Block allocation. std::mutex mtx_; - protected: - // Tracks blocks currently hold by instances of `LocalBlankNodeManager`. + // Tracks blocks currently used by instances of `LocalBlankNodeManager`. HashSet usedBlocksSet_; public: @@ -53,9 +54,11 @@ class BlankNodeManager { // Get a new id. [[nodiscard]] uint64_t getId(); - protected: + private: // Reserved blocks. std::vector blocks_; + + FRIEND_TEST(BlankNodeManager, LocalBlankNodeManagerGetID); }; // Allocate and retrieve a block of free ids. @@ -63,6 +66,8 @@ class BlankNodeManager { // Free a block of ids. void freeBlock(uint64_t blockIndex); + + FRIEND_TEST(BlankNodeManager, blockAllocation); }; } // namespace ad_utility diff --git a/test/BlankNodeManagerTest.cpp b/test/BlankNodeManagerTest.cpp index b19efc0e47..30303f1c2f 100644 --- a/test/BlankNodeManagerTest.cpp +++ b/test/BlankNodeManagerTest.cpp @@ -7,39 +7,24 @@ #include "util/BlankNodeManager.h" #include "util/Random.h" -using namespace ad_utility; - -class BlankNodeManagerTest : public BlankNodeManager { - public: - BlankNodeManagerTest() : BlankNodeManager() {} - BlankNodeManagerTest(SlowRandomIntGenerator randomIntGenerator) - : BlankNodeManager(randomIntGenerator) {} - - using BlankNodeManager::usedBlocksSet_; -}; - -class LocalBlankNodeManagerTest - : public BlankNodeManager::LocalBlankNodeManager { - public: - using BlankNodeManager::LocalBlankNodeManager::blocks_; -}; - -TEST(blockAllocation, BlankNodeManagerTest) { +namespace ad_utility { +TEST(BlankNodeManager, blockAllocation) { using Block = BlankNodeManager::Block; - BlankNodeManagerTest bnm; + auto& bnm = globalBlankNodeManager; EXPECT_EQ(bnm.usedBlocksSet_.size(), 0); - Block b = bnm.allocateBlock(); - EXPECT_EQ(bnm.usedBlocksSet_.size(), 1); - bnm.freeBlock(b.blockIdx_); + + { + Block b = bnm.allocateBlock(); + EXPECT_EQ(bnm.usedBlocksSet_.size(), 1); + } + + // Blocks are removed from the globalBlankNodeManager once they are destroyed. EXPECT_EQ(bnm.usedBlocksSet_.size(), 0); - // double free - EXPECT_NO_THROW(bnm.freeBlock(b.blockIdx_)); } -TEST(LocalBlankNodeManagerGetID, BlankNodeManagerTest) { - using LocalBNM = LocalBlankNodeManagerTest; - LocalBNM l; +TEST(BlankNodeManager, LocalBlankNodeManagerGetID) { + BlankNodeManager::LocalBlankNodeManager l; // initially the LocalBlankNodeManager doesn't have any blocks EXPECT_EQ(l.blocks_.size(), 0); @@ -53,3 +38,5 @@ TEST(LocalBlankNodeManagerGetID, BlankNodeManagerTest) { id = l.getId(); EXPECT_EQ(l.blocks_.size(), 2); } + +} // namespace ad_utility From 54c15a50163927d7dafba6afb69e93cadcbe6ab2 Mon Sep 17 00:00:00 2001 From: unex <63149623+UNEXENU@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:04:04 +0200 Subject: [PATCH 05/11] Removed LocalBlankNodeIndex-type --- src/engine/ExportQueryExecutionTrees.cpp | 3 --- .../SparqlExpressionValueGetters.cpp | 5 ----- .../SparqlExpressionValueGetters.h | 3 +-- src/global/IndexTypes.h | 2 -- src/global/ValueId.h | 14 +------------- src/global/ValueIdComparators.h | 2 -- test/ValueIdTestHelpers.h | 3 --- 7 files changed, 2 insertions(+), 30 deletions(-) diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 22c1ef890d..3913117016 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -217,9 +217,6 @@ ExportQueryExecutionTrees::idToStringAndTypeForEncodedValue(Id id) { case BlankNodeIndex: return std::pair{absl::StrCat("_:bn", id.getBlankNodeIndex().get()), nullptr}; - case LocalBlankNodeIndex: - return std::pair{absl::StrCat("_:bn", id.getLocalBlankNodeIndex().get()), - nullptr}; default: AD_FAIL(); } diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp index 3c1ece13e6..3ed18e7b99 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.cpp @@ -32,7 +32,6 @@ NumericValue NumericValueGetter::operator()( case Datatype::Date: case Datatype::GeoPoint: case Datatype::BlankNodeIndex: - case Datatype::LocalBlankNodeIndex: return NotNumeric{}; } AD_FAIL(); @@ -53,7 +52,6 @@ auto EffectiveBooleanValueGetter::operator()( return id.getBool() ? True : False; case Datatype::Undefined: case Datatype::BlankNodeIndex: - case Datatype::LocalBlankNodeIndex: return Undef; case Datatype::VocabIndex: { auto index = id.getVocabIndex(); @@ -157,7 +155,6 @@ IntDoubleStr ToNumericValueGetter::operator()( case Datatype::WordVocabIndex: case Datatype::Date: case Datatype::BlankNodeIndex: - case Datatype::LocalBlankNodeIndex: auto optString = LiteralFromIdGetter{}(id, context); if (optString.has_value()) { return std::move(optString.value()); @@ -202,7 +199,6 @@ OptIri DatatypeValueGetter::operator()(ValueId id, context); case Undefined: case BlankNodeIndex: - case LocalBlankNodeIndex: case TextRecordIndex: case WordVocabIndex: return std::nullopt; @@ -257,7 +253,6 @@ T getValue(ValueId id, const sparqlExpression::EvaluationContext* context, case TextRecordIndex: case WordVocabIndex: case BlankNodeIndex: - case LocalBlankNodeIndex: case Bool: case Int: case Double: diff --git a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h index bd93bb193c..6e7cd310ec 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h +++ b/src/engine/sparqlExpressions/SparqlExpressionValueGetters.h @@ -145,8 +145,7 @@ struct StringValueGetter : Mixin { struct IsBlankNodeValueGetter : Mixin { using Mixin::operator(); Id operator()(ValueId id, const EvaluationContext*) const { - return Id::makeFromBool(id.getDatatype() == Datatype::BlankNodeIndex || - id.getDatatype() == Datatype::LocalBlankNodeIndex); + return Id::makeFromBool(id.getDatatype() == Datatype::BlankNodeIndex); } Id operator()(const LiteralOrIri&, const EvaluationContext*) const { diff --git a/src/global/IndexTypes.h b/src/global/IndexTypes.h index 149d3d411c..372e9e57c7 100644 --- a/src/global/IndexTypes.h +++ b/src/global/IndexTypes.h @@ -16,5 +16,3 @@ using LocalVocabIndex = const LocalVocabEntry*; using TextRecordIndex = ad_utility::TypedIndex; using WordVocabIndex = ad_utility::TypedIndex; using BlankNodeIndex = ad_utility::TypedIndex; -using LocalBlankNodeIndex = - ad_utility::TypedIndex; diff --git a/src/global/ValueId.h b/src/global/ValueId.h index dbaaab83c0..fe429b98fa 100644 --- a/src/global/ValueId.h +++ b/src/global/ValueId.h @@ -33,8 +33,7 @@ enum struct Datatype { GeoPoint, WordVocabIndex, BlankNodeIndex, - LocalBlankNodeIndex, - MaxValue = LocalBlankNodeIndex + MaxValue = BlankNodeIndex // Note: Unfortunately we cannot easily get the size of an enum. // If members are added to this enum, then the `MaxValue` // alias must always be equal to the last member, @@ -66,8 +65,6 @@ constexpr std::string_view toString(Datatype type) { return "GeoPoint"; case Datatype::BlankNodeIndex: return "BlankNodeIndex"; - case Datatype::LocalBlankNodeIndex: - return "LocalBlankNodeIndex"; } // This line is reachable if we cast an arbitrary invalid int to this enum AD_FAIL(); @@ -290,9 +287,6 @@ class ValueId { static ValueId makeFromBlankNodeIndex(BlankNodeIndex index) { return makeFromIndex(index.get(), Datatype::BlankNodeIndex); } - static ValueId makeFromLocalBlankNodeIndex(LocalBlankNodeIndex index) { - return makeFromIndex(index.get(), Datatype::LocalBlankNodeIndex); - } /// Obtain the unsigned index that this `ValueId` encodes. If `getDatatype() /// != [VocabIndex|TextRecordIndex|LocalVocabIndex]` then the result is @@ -313,10 +307,6 @@ class ValueId { [[nodiscard]] constexpr BlankNodeIndex getBlankNodeIndex() const noexcept { return BlankNodeIndex::make(removeDatatypeBits(_bits)); } - [[nodiscard]] constexpr LocalBlankNodeIndex getLocalBlankNodeIndex() - const noexcept { - return LocalBlankNodeIndex::make(removeDatatypeBits(_bits)); - } // Store or load a `Date` object. static ValueId makeFromDate(DateYearOrDuration d) noexcept { @@ -396,8 +386,6 @@ class ValueId { return std::invoke(visitor, getGeoPoint()); case Datatype::BlankNodeIndex: return std::invoke(visitor, getBlankNodeIndex()); - case Datatype::LocalBlankNodeIndex: - return std::invoke(visitor, getLocalBlankNodeIndex()); } AD_FAIL(); } diff --git a/src/global/ValueIdComparators.h b/src/global/ValueIdComparators.h index 2eeb135962..f5ae8dea9a 100644 --- a/src/global/ValueIdComparators.h +++ b/src/global/ValueIdComparators.h @@ -403,7 +403,6 @@ inline std::vector> getRangesForId( case Datatype::Date: case Datatype::GeoPoint: case Datatype::BlankNodeIndex: - case Datatype::LocalBlankNodeIndex: // For `Date` the trivial comparison via bits is also correct. return detail::simplifyRanges( detail::getRangesForIndexTypes(begin, end, valueId, comparison)); @@ -438,7 +437,6 @@ inline std::vector> getRangesForEqualIds( case Datatype::Date: case Datatype::GeoPoint: case Datatype::BlankNodeIndex: - case Datatype::LocalBlankNodeIndex: AD_FAIL(); case Datatype::VocabIndex: case Datatype::LocalVocabIndex: diff --git a/test/ValueIdTestHelpers.h b/test/ValueIdTestHelpers.h index 958f3a4765..5644caefee 100644 --- a/test/ValueIdTestHelpers.h +++ b/test/ValueIdTestHelpers.h @@ -57,9 +57,6 @@ inline ValueId makeWordVocabId(uint64_t value) { inline ValueId makeBlankNodeId(uint64_t value) { return ValueId::makeFromBlankNodeIndex(BlankNodeIndex::make(value)); } -inline ValueId makeLocalBlankNodeId(uint64_t value) { - return ValueId::makeFromLocalBlankNodeIndex(LocalBlankNodeIndex::make(value)); -} inline uint64_t getVocabIndex(ValueId id) { return id.getVocabIndex().get(); } // TODO Make the tests more precise for the localVocabIndices. From 735bc5f904f3b37d73dcac10c0965e89689011c6 Mon Sep 17 00:00:00 2001 From: unex <63149623+UNEXENU@users.noreply.github.com> Date: Thu, 10 Oct 2024 13:10:43 +0200 Subject: [PATCH 06/11] add BlankNodeManager to Index --- src/engine/LocalVocab.cpp | 11 +++++-- src/engine/LocalVocab.h | 8 +++-- src/engine/Service.cpp | 12 ++++---- src/index/Index.cpp | 4 +++ src/index/Index.h | 3 ++ src/index/IndexImpl.cpp | 9 ++++++ src/index/IndexImpl.h | 3 ++ src/index/VocabularyMerger.h | 2 +- src/util/BlankNodeManager.cpp | 49 +++++++++++++++++------------- src/util/BlankNodeManager.h | 56 ++++++++++++++++++++++------------- src/util/Synchronized.h | 2 +- test/BlankNodeManagerTest.cpp | 34 ++++++++++++++------- test/LocalVocabTest.cpp | 8 +++++ test/ServiceTest.cpp | 4 ++- 14 files changed, 141 insertions(+), 64 deletions(-) diff --git a/src/engine/LocalVocab.cpp b/src/engine/LocalVocab.cpp index f3d32702e1..02007922ec 100644 --- a/src/engine/LocalVocab.cpp +++ b/src/engine/LocalVocab.cpp @@ -79,6 +79,13 @@ std::vector LocalVocab::getAllWordsForTesting() } // _____________________________________________________________________________ -LocalBlankNodeIndex LocalVocab::getBlankNodeIndex() { - return LocalBlankNodeIndex::make(blankNodeManager_.getId()); +BlankNodeIndex LocalVocab::getBlankNodeIndex( + ad_utility::BlankNodeManager* blankNodeManager) { + // Initialize the `localBlankNodeManager_` if it doesn't exist yet. + if (!localBlankNodeManager_) [[unlikely]] { + localBlankNodeManager_ = + std::make_unique( + blankNodeManager); + } + return BlankNodeIndex::make(localBlankNodeManager_->getId()); } diff --git a/src/engine/LocalVocab.h b/src/engine/LocalVocab.h index 04a3b561e5..d78e263682 100644 --- a/src/engine/LocalVocab.h +++ b/src/engine/LocalVocab.h @@ -39,7 +39,8 @@ class LocalVocab { auto& primaryWordSet() { return *primaryWordSet_; } const auto& primaryWordSet() const { return *primaryWordSet_; } - ad_utility::BlankNodeManager::LocalBlankNodeManager blankNodeManager_; + std::unique_ptr + localBlankNodeManager_; public: // Create a new, empty local vocabulary. @@ -93,8 +94,9 @@ class LocalVocab { // Return all the words from all the word sets as a vector. std::vector getAllWordsForTesting() const; - // Get a new LocalBlankNodeIndex using the LocalBlankNodeManager. - LocalBlankNodeIndex getBlankNodeIndex(); + // Get a new BlankNodeIndex using the LocalBlankNodeManager. + [[nodiscard]] BlankNodeIndex getBlankNodeIndex( + ad_utility::BlankNodeManager* blankNodeManager); private: // Common implementation for the two variants of diff --git a/src/engine/Service.cpp b/src/engine/Service.cpp index f97e02c159..267e530705 100644 --- a/src/engine/Service.cpp +++ b/src/engine/Service.cpp @@ -348,6 +348,8 @@ TripleComponent Service::bindingToTripleComponent( const auto type = binding["type"].get(); const auto value = binding["value"].get(); + auto blankNodeManagerPtr = + getExecutionContext()->getIndex().getBlankNodeManager(); TripleComponent tc; if (type == "literal") { @@ -366,12 +368,12 @@ TripleComponent Service::bindingToTripleComponent( } else if (type == "uri") { tc = TripleComponent::Iri::fromIrirefWithoutBrackets(value); } else if (type == "bnode") { - auto optEmplace = blankNodeMap.try_emplace(value, Id()); - if (optEmplace.second) { - optEmplace.first->second = - Id::makeFromLocalBlankNodeIndex(localVocab->getBlankNodeIndex()); + auto [it, wasNew] = blankNodeMap.try_emplace(value, Id()); + if (wasNew) { + it->second = Id::makeFromBlankNodeIndex( + localVocab->getBlankNodeIndex(blankNodeManagerPtr)); } - tc = optEmplace.first->second; + tc = it->second; } else { throw std::runtime_error(absl::StrCat("Type ", type, " is undefined. The binding is: '", diff --git a/src/index/Index.cpp b/src/index/Index.cpp index 38cbfe52ed..6e5380d053 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -56,6 +56,10 @@ auto Index::getTextVocab() const -> const TextVocab& { return pimpl_->getTextVocab(); } +ad_utility::BlankNodeManager* Index::getBlankNodeManager() const { + return pimpl_->blankNodeManager_.get(); +} + // ____________________________________________________________________________ size_t Index::getCardinality(const TripleComponent& comp, Permutation::Enum p) const { diff --git a/src/index/Index.h b/src/index/Index.h index e0c9f26e81..271a11ab89 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -110,6 +110,9 @@ class Index { Vocabulary; [[nodiscard]] const TextVocab& getTextVocab() const; + // Get a (non-owning) pointer to the BlankNodeManager of this Index. + ad_utility::BlankNodeManager* getBlankNodeManager() const; + // -------------------------------------------------------------------------- // RDF RETRIEVAL // -------------------------------------------------------------------------- diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 3000d11e87..563799d671 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -340,6 +340,9 @@ void IndexImpl::createFromFile(const string& filename, Index::Filetype type) { configurationJson_["has-all-permutations"] = true; } + configurationJson_["num-blank-nodes-total"] = + indexBuilderData.vocabularyMetaData_.getNextBlankNodeIndex(); + // Dump the configuration again in case the permutations have added some // information. writeConfiguration(); @@ -970,6 +973,12 @@ void IndexImpl::readConfiguration() { loadDataMember("num-objects", numObjects_, NumNormalAndInternal{}); loadDataMember("num-triples", numTriples_, NumNormalAndInternal{}); + // Initialize BlankNodeManager + uint64_t numBlankNodesTotal; + loadDataMember("num-blank-nodes-total", numBlankNodesTotal); + blankNodeManager_ = + std::make_unique(numBlankNodesTotal); + // Compute unique ID for this index. // // TODO: This is a simplistic way. It would be better to incorporate bytes diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index ad5d73a836..01cf346ca9 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -106,6 +106,9 @@ class IndexImpl { using NumNormalAndInternal = Index::NumNormalAndInternal; + // BlankNodeManager, initialized during `readConfiguration` + std::unique_ptr blankNodeManager_{nullptr}; + // Private data members. private: string onDiskBase_; diff --git a/src/index/VocabularyMerger.h b/src/index/VocabularyMerger.h index 11ba956e19..3c171d2c65 100644 --- a/src/index/VocabularyMerger.h +++ b/src/index/VocabularyMerger.h @@ -62,7 +62,7 @@ struct VocabularyMetaData { Id begin() const { return begin_; } Id end() const { return end_; } - // Return true iff the `id` belongs to this range. + // Return true if the `id` belongs to this range. bool contains(Id id) const { return begin_ <= id && id < end_; } private: diff --git a/src/util/BlankNodeManager.cpp b/src/util/BlankNodeManager.cpp index cf7cf8c231..d37d99322e 100644 --- a/src/util/BlankNodeManager.cpp +++ b/src/util/BlankNodeManager.cpp @@ -7,48 +7,57 @@ namespace ad_utility { // _____________________________________________________________________________ -BlankNodeManager::BlankNodeManager( - SlowRandomIntGenerator randomIntGenerator) - : randBlockIndex_(randomIntGenerator) {} +BlankNodeManager::BlankNodeManager(uint64_t minIndex) + : minIndex_(minIndex), + randBlockIndex_( + SlowRandomIntGenerator(0, totalAvailableBlocks_ - 1)) {} // _____________________________________________________________________________ BlankNodeManager::Block BlankNodeManager::allocateBlock() { // The Random-Generation Algorithm's performance is reduced once the number of // used blocks exceeds a limit. - AD_CORRECTNESS_CHECK(usedBlocksSet_.size() < totalAvailableBlocks_ / 256); + AD_CORRECTNESS_CHECK(usedBlocksSet_.rlock()->size() < + totalAvailableBlocks_ / 256); - // Lock this part, as it might be accessed concurrently by - // `LocalBlankNodeManager` instances. - mtx_.lock(); uint64_t newBlockIndex = randBlockIndex_(); - while (usedBlocksSet_.contains(newBlockIndex)) { - newBlockIndex = randBlockIndex_(); + { + auto usedBlocksSetPtr_ = usedBlocksSet_.wlock(); + while (usedBlocksSetPtr_->contains(newBlockIndex)) { + newBlockIndex = randBlockIndex_(); + } + usedBlocksSetPtr_->insert(newBlockIndex); } - usedBlocksSet_.insert(newBlockIndex); - mtx_.unlock(); - - return Block(newBlockIndex); + return Block(newBlockIndex, minIndex_ + newBlockIndex * blockSize_); } // _____________________________________________________________________________ void BlankNodeManager::freeBlock(uint64_t blockIndex) { - usedBlocksSet_.erase(blockIndex); + usedBlocksSet_.wlock()->erase(blockIndex); } // _____________________________________________________________________________ -BlankNodeManager::Block::Block(uint64_t blockIndex) - : blockIdx_(blockIndex), nextIdx_(blockIndex * blockSize_) {} +BlankNodeManager::Block::Block(uint64_t blockIndex, uint64_t startIndex) + : blockIdx_(blockIndex), nextIdx_(startIndex) {} + +// _____________________________________________________________________________ +BlankNodeManager::LocalBlankNodeManager::LocalBlankNodeManager( + BlankNodeManager* blankNodeManager) + : blankNodeManager_(blankNodeManager) {} // _____________________________________________________________________________ -BlankNodeManager::Block::~Block() { - globalBlankNodeManager.freeBlock(blockIdx_); +BlankNodeManager::LocalBlankNodeManager::~LocalBlankNodeManager() { + for (auto block : blocks_) { + blankNodeManager_->freeBlock(block.blockIdx_); + } } // _____________________________________________________________________________ uint64_t BlankNodeManager::LocalBlankNodeManager::getId() { if (blocks_.empty() || - blocks_.back().nextIdx_ == (blocks_.back().blockIdx_ + 1) * blockSize_) { - blocks_.emplace_back(globalBlankNodeManager.allocateBlock()); + blocks_.back().nextIdx_ == + (blankNodeManager_->minIndex_ + blocks_.back().blockIdx_ + 1) * + blockSize_) { + blocks_.emplace_back(blankNodeManager_->allocateBlock()); } return blocks_.back().nextIdx_++; } diff --git a/src/util/BlankNodeManager.h b/src/util/BlankNodeManager.h index 0b449c9f68..7781da67ac 100644 --- a/src/util/BlankNodeManager.h +++ b/src/util/BlankNodeManager.h @@ -11,45 +11,58 @@ #include "global/ValueId.h" #include "util/HashSet.h" #include "util/Random.h" +#include "util/Synchronized.h" namespace ad_utility { /* - * Manager class for LocalBlankNodeIndex. + * Manager class for Blank node indices added after indexing time. */ class BlankNodeManager { public: - static const uint blockSize_ = 1000; - static constexpr uint64_t totalAvailableBlocks_ = - (ValueId::maxIndex + 1) / blockSize_; + // Minimum index. + const uint64_t minIndex_; + + // Number of indices that make up a single block. + static constexpr uint blockSize_ = 1000; + + // Number of blocks available. + const uint64_t totalAvailableBlocks_ = + (ValueId::maxIndex - minIndex_ + 1) / blockSize_; private: // Int Generator yielding random block indices. SlowRandomIntGenerator randBlockIndex_; - // Mutex for Block allocation. - std::mutex mtx_; // Tracks blocks currently used by instances of `LocalBlankNodeManager`. - HashSet usedBlocksSet_; + Synchronized> usedBlocksSet_; public: - explicit BlankNodeManager( - SlowRandomIntGenerator randomIntGenerator = - SlowRandomIntGenerator{0, totalAvailableBlocks_ - 1}); + // Constructor, where `minIndex` is the minimum index such that all managed + // indices are in [`minIndex_`, `ValueId::maxIndex`]. Currently `minIndex_` is + // determined by the number of BlankNodes in the current Index. + explicit BlankNodeManager(uint64_t minIndex = 0); ~BlankNodeManager() = default; - // A Local BlankNode Ids Block of size `blockSize_`. - struct Block { - explicit Block(uint64_t blockIndex); - ~Block(); + // A BlankNodeIndex Block of size `blockSize_`. + class Block { + // Intentional private constructor, allowing only the BlankNodeManager to + // create Blocks (for a `LocalBlankNodeManager`). + explicit Block(uint64_t blockIndex, uint64_t startIndex); + friend class BlankNodeManager; + + public: + ~Block() = default; + // The index of this block. uint64_t blockIdx_; + // The next free index within this block. uint64_t nextIdx_; }; - // Manages the LocalBlankNodes used within a LocalVocab. + // Manages the BlankNodes used within a LocalVocab. class LocalBlankNodeManager { public: - LocalBlankNodeManager() = default; - ~LocalBlankNodeManager() = default; + explicit LocalBlankNodeManager(BlankNodeManager* blankNodeManager); + ~LocalBlankNodeManager(); // Get a new id. [[nodiscard]] uint64_t getId(); @@ -58,18 +71,21 @@ class BlankNodeManager { // Reserved blocks. std::vector blocks_; + // Reference of the BlankNodeManager, used to free the reserved blocks. + BlankNodeManager* const blankNodeManager_; + FRIEND_TEST(BlankNodeManager, LocalBlankNodeManagerGetID); }; + void setInitialIndex(uint64_t idx); + // Allocate and retrieve a block of free ids. [[nodiscard]] Block allocateBlock(); // Free a block of ids. void freeBlock(uint64_t blockIndex); - FRIEND_TEST(BlankNodeManager, blockAllocation); + FRIEND_TEST(BlankNodeManager, blockAllocationAndFree); }; } // namespace ad_utility - -inline ad_utility::BlankNodeManager globalBlankNodeManager; diff --git a/src/util/Synchronized.h b/src/util/Synchronized.h index d699e612ca..657abc210d 100644 --- a/src/util/Synchronized.h +++ b/src/util/Synchronized.h @@ -116,7 +116,7 @@ class Synchronized { return f(data_); } - /// const overload of with WriteLock + /// const overload of `withWriteLock` template auto withWriteLock(F f) const { std::lock_guard l(mutex()); diff --git a/test/BlankNodeManagerTest.cpp b/test/BlankNodeManagerTest.cpp index 30303f1c2f..fb131a8f11 100644 --- a/test/BlankNodeManagerTest.cpp +++ b/test/BlankNodeManagerTest.cpp @@ -5,26 +5,28 @@ #include #include "util/BlankNodeManager.h" -#include "util/Random.h" namespace ad_utility { -TEST(BlankNodeManager, blockAllocation) { - using Block = BlankNodeManager::Block; - auto& bnm = globalBlankNodeManager; - - EXPECT_EQ(bnm.usedBlocksSet_.size(), 0); +TEST(BlankNodeManager, blockAllocationAndFree) { + BlankNodeManager bnm(0); + EXPECT_EQ(bnm.usedBlocksSet_.rlock()->size(), 0); { - Block b = bnm.allocateBlock(); - EXPECT_EQ(bnm.usedBlocksSet_.size(), 1); + // LocalBlankNodeManager allocates a new block + BlankNodeManager::LocalBlankNodeManager lbnm(&bnm); + [[maybe_unused]] uint64_t id = lbnm.getId(); + EXPECT_EQ(bnm.usedBlocksSet_.rlock()->size(), 1); } - // Blocks are removed from the globalBlankNodeManager once they are destroyed. - EXPECT_EQ(bnm.usedBlocksSet_.size(), 0); + // The Blocks allocated by the LocalBlankNodeManager are freed/removed from + // the set once it is destroyed + EXPECT_EQ(bnm.usedBlocksSet_.rlock()->size(), 0); } TEST(BlankNodeManager, LocalBlankNodeManagerGetID) { - BlankNodeManager::LocalBlankNodeManager l; + BlankNodeManager bnm(0); + BlankNodeManager::LocalBlankNodeManager l(&bnm); + // initially the LocalBlankNodeManager doesn't have any blocks EXPECT_EQ(l.blocks_.size(), 0); @@ -39,4 +41,14 @@ TEST(BlankNodeManager, LocalBlankNodeManagerGetID) { EXPECT_EQ(l.blocks_.size(), 2); } +TEST(BlankNodeManager, maxNumOfBlocks) { + // Mock a high `minIndex_` to simulate reduced space in the usedBlocksSet_ + BlankNodeManager bnm(ValueId::maxIndex - 256 * BlankNodeManager::blockSize_ + + 2); + auto allocateBlock = [&bnm]() { + [[maybe_unused]] auto _ = bnm.allocateBlock(); + }; + EXPECT_ANY_THROW(allocateBlock()); +} + } // namespace ad_utility diff --git a/test/LocalVocabTest.cpp b/test/LocalVocabTest.cpp index d381c05c8d..ba0400c165 100644 --- a/test/LocalVocabTest.cpp +++ b/test/LocalVocabTest.cpp @@ -369,3 +369,11 @@ TEST(LocalVocab, propagation) { // TODO Maybe add tests for the new TextIndexScanFor... classes, // they never introduce any local vocab. } + +TEST(LocalVocab, getBlankNodeIndex) { + ad_utility::BlankNodeManager bnm(0); + LocalVocab v; + BlankNodeIndex a = v.getBlankNodeIndex(&bnm); + BlankNodeIndex b = v.getBlankNodeIndex(&bnm); + EXPECT_NE(a, b); +} diff --git a/test/ServiceTest.cpp b/test/ServiceTest.cpp index aff363ff95..4825368596 100644 --- a/test/ServiceTest.cpp +++ b/test/ServiceTest.cpp @@ -535,11 +535,13 @@ TEST_F(ServiceTest, bindingToTripleComponent) { bTTC({{"type", "bnode"}, {"value", "A"}}).toValueIdIfNotString().value(); Id b = bTTC({{"type", "bnode"}, {"value", "B"}}).toValueIdIfNotString().value(); + EXPECT_EQ(a.getDatatype(), Datatype::BlankNodeIndex); + EXPECT_EQ(b.getDatatype(), Datatype::BlankNodeIndex); EXPECT_NE(a, b); EXPECT_EQ(blankNodeMap.size(), 2); - // BlankNode exists already, known Id will be used. + // This BlankNode exists already, known Id will be used. Id a2 = bTTC({{"type", "bnode"}, {"value", "A"}}).toValueIdIfNotString().value(); EXPECT_EQ(a, a2); From 8ba39a50aaf873b7d18f93825711e565b5598a16 Mon Sep 17 00:00:00 2001 From: unex <63149623+UNEXENU@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:31:38 +0200 Subject: [PATCH 07/11] remove deprecated function call --- test/ValueIdTest.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/test/ValueIdTest.cpp b/test/ValueIdTest.cpp index 0768aaa86d..452bfe260c 100644 --- a/test/ValueIdTest.cpp +++ b/test/ValueIdTest.cpp @@ -323,7 +323,6 @@ TEST_F(ValueIdTest, toDebugString) { test(makeTextRecordId(37), "T:37"); test(makeWordVocabId(42), "W:42"); test(makeBlankNodeId(27), "B:27"); - test(makeLocalBlankNodeId(28), "L:28"); test(ValueId::makeFromDate( DateYearOrDuration{123456, DateYearOrDuration::Type::Year}), "D:123456"); From 26067c38c3b162606a49e61e16bbc55f5f65b27a Mon Sep 17 00:00:00 2001 From: unex <63149623+UNEXENU@users.noreply.github.com> Date: Wed, 16 Oct 2024 17:04:39 +0200 Subject: [PATCH 08/11] various improvements --- src/engine/LocalVocab.cpp | 1 + src/engine/QueryExecutionContext.h | 4 ---- src/global/Constants.h | 2 -- src/index/Index.cpp | 1 + src/index/IndexFormatVersion.h | 2 +- src/util/BlankNodeManager.cpp | 30 ++++++++++++------------------ src/util/BlankNodeManager.h | 25 ++++++++++++++++--------- test/BlankNodeManagerTest.cpp | 10 ++++++---- test/IndexTest.cpp | 11 +++++++++++ test/LocalVocabTest.cpp | 1 + 10 files changed, 49 insertions(+), 38 deletions(-) diff --git a/src/engine/LocalVocab.cpp b/src/engine/LocalVocab.cpp index 02007922ec..4b82639ef9 100644 --- a/src/engine/LocalVocab.cpp +++ b/src/engine/LocalVocab.cpp @@ -81,6 +81,7 @@ std::vector LocalVocab::getAllWordsForTesting() // _____________________________________________________________________________ BlankNodeIndex LocalVocab::getBlankNodeIndex( ad_utility::BlankNodeManager* blankNodeManager) { + AD_CONTRACT_CHECK(blankNodeManager); // Initialize the `localBlankNodeManager_` if it doesn't exist yet. if (!localBlankNodeManager_) [[unlikely]] { localBlankNodeManager_ = diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 6ac59363e8..e82e8fd0ef 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -6,17 +6,13 @@ #pragma once -#include -#include #include -#include #include #include "engine/QueryPlanningCostFactors.h" #include "engine/Result.h" #include "engine/RuntimeInformation.h" #include "engine/SortPerformanceEstimator.h" -#include "global/Constants.h" #include "global/Id.h" #include "index/Index.h" #include "util/Cache.h" diff --git a/src/global/Constants.h b/src/global/Constants.h index f87a423bed..b2bd6807ef 100644 --- a/src/global/Constants.h +++ b/src/global/Constants.h @@ -7,10 +7,8 @@ #pragma once -#include #include #include -#include #include #include #include diff --git a/src/index/Index.cpp b/src/index/Index.cpp index 6e5380d053..55abab79e0 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -57,6 +57,7 @@ auto Index::getTextVocab() const -> const TextVocab& { } ad_utility::BlankNodeManager* Index::getBlankNodeManager() const { + AD_CONTRACT_CHECK(pimpl_->blankNodeManager_); return pimpl_->blankNodeManager_.get(); } diff --git a/src/index/IndexFormatVersion.h b/src/index/IndexFormatVersion.h index 84bee08318..d9b9110fee 100644 --- a/src/index/IndexFormatVersion.h +++ b/src/index/IndexFormatVersion.h @@ -36,5 +36,5 @@ struct IndexFormatVersion { // The actual index version. Change it once the binary format of the index // changes. inline const IndexFormatVersion& indexFormatVersion{ - 1532, DateYearOrDuration{Date{2024, 10, 4}}}; + 1504, DateYearOrDuration{Date{2024, 10, 15}}}; } // namespace qlever diff --git a/src/util/BlankNodeManager.cpp b/src/util/BlankNodeManager.cpp index d37d99322e..799537f418 100644 --- a/src/util/BlankNodeManager.cpp +++ b/src/util/BlankNodeManager.cpp @@ -16,25 +16,20 @@ BlankNodeManager::BlankNodeManager(uint64_t minIndex) BlankNodeManager::Block BlankNodeManager::allocateBlock() { // The Random-Generation Algorithm's performance is reduced once the number of // used blocks exceeds a limit. - AD_CORRECTNESS_CHECK(usedBlocksSet_.rlock()->size() < - totalAvailableBlocks_ / 256); + AD_CORRECTNESS_CHECK( + usedBlocksSet_.rlock()->size() < totalAvailableBlocks_ / 256, + absl::StrCat("Critical high number of blank node blocks in use: ", + usedBlocksSet_.rlock()->size(), " blocks")); uint64_t newBlockIndex = randBlockIndex_(); - { - auto usedBlocksSetPtr_ = usedBlocksSet_.wlock(); - while (usedBlocksSetPtr_->contains(newBlockIndex)) { - newBlockIndex = randBlockIndex_(); - } - usedBlocksSetPtr_->insert(newBlockIndex); + auto usedBlocksSetPtr = usedBlocksSet_.wlock(); + while (usedBlocksSetPtr->contains(newBlockIndex)) { + newBlockIndex = randBlockIndex_(); } + usedBlocksSetPtr->insert(newBlockIndex); return Block(newBlockIndex, minIndex_ + newBlockIndex * blockSize_); } -// _____________________________________________________________________________ -void BlankNodeManager::freeBlock(uint64_t blockIndex) { - usedBlocksSet_.wlock()->erase(blockIndex); -} - // _____________________________________________________________________________ BlankNodeManager::Block::Block(uint64_t blockIndex, uint64_t startIndex) : blockIdx_(blockIndex), nextIdx_(startIndex) {} @@ -46,18 +41,17 @@ BlankNodeManager::LocalBlankNodeManager::LocalBlankNodeManager( // _____________________________________________________________________________ BlankNodeManager::LocalBlankNodeManager::~LocalBlankNodeManager() { + auto ptr = blankNodeManager_->usedBlocksSet_.wlock(); for (auto block : blocks_) { - blankNodeManager_->freeBlock(block.blockIdx_); + ptr->erase(block.blockIdx_); } } // _____________________________________________________________________________ uint64_t BlankNodeManager::LocalBlankNodeManager::getId() { - if (blocks_.empty() || - blocks_.back().nextIdx_ == - (blankNodeManager_->minIndex_ + blocks_.back().blockIdx_ + 1) * - blockSize_) { + if (blocks_.empty() || blocks_.back().nextIdx_ == idxAfterCurrentBlock_) { blocks_.emplace_back(blankNodeManager_->allocateBlock()); + idxAfterCurrentBlock_ = blocks_.back().nextIdx_ + blockSize_; } return blocks_.back().nextIdx_++; } diff --git a/src/util/BlankNodeManager.h b/src/util/BlankNodeManager.h index 7781da67ac..0ad5310252 100644 --- a/src/util/BlankNodeManager.h +++ b/src/util/BlankNodeManager.h @@ -15,11 +15,17 @@ namespace ad_utility { /* - * Manager class for Blank node indices added after indexing time. + * Manager class owned by an `Index` to manage currently available indices for + * blank nodes to be added during runtime. The intention is to use the same + * `BlankNodeIndex`-Datatype as for blank nodes given at indexing time, by + * setting their count as the minimum index for the ones added at runtime. + * A `LocalVocab` can register new blank nodes (e.g. resulting from a `Service` + * operation) by obtaining a `Block` of currently unused indices using it's own + * `LocalBlankNodeManager` from the `BlankNodeManager`. */ class BlankNodeManager { public: - // Minimum index. + // Minimum blank node index. const uint64_t minIndex_; // Number of indices that make up a single block. @@ -38,7 +44,7 @@ class BlankNodeManager { public: // Constructor, where `minIndex` is the minimum index such that all managed - // indices are in [`minIndex_`, `ValueId::maxIndex`]. Currently `minIndex_` is + // indices are in [`minIndex_`, `ValueId::maxIndex`]. `minIndex_` is // determined by the number of BlankNodes in the current Index. explicit BlankNodeManager(uint64_t minIndex = 0); ~BlankNodeManager() = default; @@ -53,7 +59,7 @@ class BlankNodeManager { public: ~Block() = default; // The index of this block. - uint64_t blockIdx_; + const uint64_t blockIdx_; // The next free index within this block. uint64_t nextIdx_; }; @@ -64,6 +70,9 @@ class BlankNodeManager { explicit LocalBlankNodeManager(BlankNodeManager* blankNodeManager); ~LocalBlankNodeManager(); + LocalBlankNodeManager(LocalBlankNodeManager&) = delete; + LocalBlankNodeManager& operator=(const LocalBlankNodeManager&) = delete; + // Get a new id. [[nodiscard]] uint64_t getId(); @@ -74,17 +83,15 @@ class BlankNodeManager { // Reference of the BlankNodeManager, used to free the reserved blocks. BlankNodeManager* const blankNodeManager_; + // The first index after the current Block. + uint64_t idxAfterCurrentBlock_{0}; + FRIEND_TEST(BlankNodeManager, LocalBlankNodeManagerGetID); }; - void setInitialIndex(uint64_t idx); - // Allocate and retrieve a block of free ids. [[nodiscard]] Block allocateBlock(); - // Free a block of ids. - void freeBlock(uint64_t blockIndex); - FRIEND_TEST(BlankNodeManager, blockAllocationAndFree); }; diff --git a/test/BlankNodeManagerTest.cpp b/test/BlankNodeManagerTest.cpp index fb131a8f11..3dc3a35d14 100644 --- a/test/BlankNodeManagerTest.cpp +++ b/test/BlankNodeManagerTest.cpp @@ -4,7 +4,9 @@ #include +#include "gmock/gmock.h" #include "util/BlankNodeManager.h" +#include "util/GTestHelpers.h" namespace ad_utility { TEST(BlankNodeManager, blockAllocationAndFree) { @@ -45,10 +47,10 @@ TEST(BlankNodeManager, maxNumOfBlocks) { // Mock a high `minIndex_` to simulate reduced space in the usedBlocksSet_ BlankNodeManager bnm(ValueId::maxIndex - 256 * BlankNodeManager::blockSize_ + 2); - auto allocateBlock = [&bnm]() { - [[maybe_unused]] auto _ = bnm.allocateBlock(); - }; - EXPECT_ANY_THROW(allocateBlock()); + AD_EXPECT_THROW_WITH_MESSAGE( + [[maybe_unused]] auto _ = bnm.allocateBlock(), + ::testing::HasSubstr( + "Critical high number of blank node blocks in use:")); } } // namespace ad_utility diff --git a/test/IndexTest.cpp b/test/IndexTest.cpp index 0d45c4aecb..d206c1e9e7 100644 --- a/test/IndexTest.cpp +++ b/test/IndexTest.cpp @@ -486,3 +486,14 @@ TEST(IndexTest, trivialGettersAndSetters) { EXPECT_EQ(index.memoryLimitIndexBuilding(), 7_kB); EXPECT_EQ(std::as_const(index).memoryLimitIndexBuilding(), 7_kB); } + +TEST(IndexTest, getBlankNodeManager) { + // The `blankNodeManager_` is initialized after initializing the Index itself. + // Therefore we expect a throw when the getter is called by an + // uninitialized Index. + Index index{ad_utility::makeUnlimitedAllocator()}; + EXPECT_ANY_THROW(index.getBlankNodeManager()); + + const Index& index2 = getQec("")->getIndex(); + EXPECT_NO_THROW(index2.getBlankNodeManager()); +} diff --git a/test/LocalVocabTest.cpp b/test/LocalVocabTest.cpp index ba0400c165..a9058d3a68 100644 --- a/test/LocalVocabTest.cpp +++ b/test/LocalVocabTest.cpp @@ -370,6 +370,7 @@ TEST(LocalVocab, propagation) { // they never introduce any local vocab. } +// _____________________________________________________________________________ TEST(LocalVocab, getBlankNodeIndex) { ad_utility::BlankNodeManager bnm(0); LocalVocab v; From c354fa0eced72fa94ac2316a42a65d72b50dcde7 Mon Sep 17 00:00:00 2001 From: unex <63149623+UNEXENU@users.noreply.github.com> Date: Thu, 17 Oct 2024 17:18:18 +0200 Subject: [PATCH 09/11] fix tests --- src/engine/LocalVocab.cpp | 3 +-- src/engine/LocalVocab.h | 2 +- src/util/BlankNodeManager.h | 8 ++++++-- test/BlankNodeManagerTest.cpp | 12 ++++++++++-- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/engine/LocalVocab.cpp b/src/engine/LocalVocab.cpp index 4b82639ef9..4fd51e76dc 100644 --- a/src/engine/LocalVocab.cpp +++ b/src/engine/LocalVocab.cpp @@ -85,8 +85,7 @@ BlankNodeIndex LocalVocab::getBlankNodeIndex( // Initialize the `localBlankNodeManager_` if it doesn't exist yet. if (!localBlankNodeManager_) [[unlikely]] { localBlankNodeManager_ = - std::make_unique( - blankNodeManager); + ad_utility::BlankNodeManager::LocalBlankNodeManager(blankNodeManager); } return BlankNodeIndex::make(localBlankNodeManager_->getId()); } diff --git a/src/engine/LocalVocab.h b/src/engine/LocalVocab.h index d78e263682..be9a7c4499 100644 --- a/src/engine/LocalVocab.h +++ b/src/engine/LocalVocab.h @@ -39,7 +39,7 @@ class LocalVocab { auto& primaryWordSet() { return *primaryWordSet_; } const auto& primaryWordSet() const { return *primaryWordSet_; } - std::unique_ptr + std::optional localBlankNodeManager_; public: diff --git a/src/util/BlankNodeManager.h b/src/util/BlankNodeManager.h index 0ad5310252..e8e9c90d87 100644 --- a/src/util/BlankNodeManager.h +++ b/src/util/BlankNodeManager.h @@ -70,9 +70,13 @@ class BlankNodeManager { explicit LocalBlankNodeManager(BlankNodeManager* blankNodeManager); ~LocalBlankNodeManager(); - LocalBlankNodeManager(LocalBlankNodeManager&) = delete; + // No copy, as the managed blocks shall not be duplicated. + LocalBlankNodeManager(const LocalBlankNodeManager&) = delete; LocalBlankNodeManager& operator=(const LocalBlankNodeManager&) = delete; + LocalBlankNodeManager(LocalBlankNodeManager&&) = default; + LocalBlankNodeManager& operator=(LocalBlankNodeManager&&) = default; + // Get a new id. [[nodiscard]] uint64_t getId(); @@ -81,7 +85,7 @@ class BlankNodeManager { std::vector blocks_; // Reference of the BlankNodeManager, used to free the reserved blocks. - BlankNodeManager* const blankNodeManager_; + BlankNodeManager* blankNodeManager_; // The first index after the current Block. uint64_t idxAfterCurrentBlock_{0}; diff --git a/test/BlankNodeManagerTest.cpp b/test/BlankNodeManagerTest.cpp index 3dc3a35d14..a3a05c9c9d 100644 --- a/test/BlankNodeManagerTest.cpp +++ b/test/BlankNodeManagerTest.cpp @@ -20,9 +20,17 @@ TEST(BlankNodeManager, blockAllocationAndFree) { EXPECT_EQ(bnm.usedBlocksSet_.rlock()->size(), 1); } - // The Blocks allocated by the LocalBlankNodeManager are freed/removed from - // the set once it is destroyed + // Once the LocalBlankNodeManager is destroyed, all Blocks allocated through + // it are freed/removed from the BlankNodeManager's set. EXPECT_EQ(bnm.usedBlocksSet_.rlock()->size(), 0); + + // Mock randomIntGenerator to let the block index generation collide. + bnm.randBlockIndex_ = SlowRandomIntGenerator(0, 1); + [[maybe_unused]] auto _ = bnm.allocateBlock(); + for (int i = 0; i < 30; ++i) { + auto block = bnm.allocateBlock(); + bnm.usedBlocksSet_.wlock()->erase(block.blockIdx_); + } } TEST(BlankNodeManager, LocalBlankNodeManagerGetID) { From 9a0184167e548a278cba33746bd67b85f1c8d273 Mon Sep 17 00:00:00 2001 From: unex <63149623+UNEXENU@users.noreply.github.com> Date: Fri, 18 Oct 2024 09:59:01 +0200 Subject: [PATCH 10/11] various improvements --- src/engine/LocalVocab.cpp | 3 +-- src/index/Index.cpp | 4 ++-- src/index/IndexImpl.cpp | 6 ++++++ src/index/IndexImpl.h | 8 +++++--- src/util/BlankNodeManager.cpp | 17 ++++++++++------- src/util/BlankNodeManager.h | 1 + test/BlankNodeManagerTest.cpp | 24 +++++++++++++++++++++--- test/IndexTest.cpp | 10 ++++++++++ 8 files changed, 56 insertions(+), 17 deletions(-) diff --git a/src/engine/LocalVocab.cpp b/src/engine/LocalVocab.cpp index 4fd51e76dc..ee0285c532 100644 --- a/src/engine/LocalVocab.cpp +++ b/src/engine/LocalVocab.cpp @@ -84,8 +84,7 @@ BlankNodeIndex LocalVocab::getBlankNodeIndex( AD_CONTRACT_CHECK(blankNodeManager); // Initialize the `localBlankNodeManager_` if it doesn't exist yet. if (!localBlankNodeManager_) [[unlikely]] { - localBlankNodeManager_ = - ad_utility::BlankNodeManager::LocalBlankNodeManager(blankNodeManager); + localBlankNodeManager_.emplace(blankNodeManager); } return BlankNodeIndex::make(localBlankNodeManager_->getId()); } diff --git a/src/index/Index.cpp b/src/index/Index.cpp index a7b2e301ab..142ace17b5 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -51,9 +51,9 @@ auto Index::getTextVocab() const -> const TextVocab& { return pimpl_->getTextVocab(); } +// ____________________________________________________________________________ ad_utility::BlankNodeManager* Index::getBlankNodeManager() const { - AD_CONTRACT_CHECK(pimpl_->blankNodeManager_); - return pimpl_->blankNodeManager_.get(); + return pimpl_->getBlankNodeManager(); } // ____________________________________________________________________________ diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index fb0403d4ee..a2222b4b77 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -1695,3 +1695,9 @@ std::unique_ptr> IndexImpl::makeSorterPtr( std::string_view permutationName) const { return makeSorterImpl(permutationName); } + +// _____________________________________________________________________________ +ad_utility::BlankNodeManager* IndexImpl::getBlankNodeManager() const { + AD_CONTRACT_CHECK(blankNodeManager_); + return blankNodeManager_.get(); +} diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 2a54168b17..c9fe149517 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -114,9 +114,6 @@ class IndexImpl { using NumNormalAndInternal = Index::NumNormalAndInternal; - // BlankNodeManager, initialized during `readConfiguration` - std::unique_ptr blankNodeManager_{nullptr}; - // Private data members. private: string onDiskBase_; @@ -187,6 +184,9 @@ class IndexImpl { std::optional idOfHasPatternDuringIndexBuilding_; std::optional idOfInternalGraphDuringIndexBuilding_; + // BlankNodeManager, initialized during `readConfiguration` + std::unique_ptr blankNodeManager_{nullptr}; + public: explicit IndexImpl(ad_utility::AllocatorWithLimit allocator); @@ -258,6 +258,8 @@ class IndexImpl { const auto& getTextVocab() const { return textVocab_; }; + ad_utility::BlankNodeManager* getBlankNodeManager() const; + // -------------------------------------------------------------------------- // -- RETRIEVAL --- // -------------------------------------------------------------------------- diff --git a/src/util/BlankNodeManager.cpp b/src/util/BlankNodeManager.cpp index 799537f418..cff86c08fa 100644 --- a/src/util/BlankNodeManager.cpp +++ b/src/util/BlankNodeManager.cpp @@ -16,18 +16,20 @@ BlankNodeManager::BlankNodeManager(uint64_t minIndex) BlankNodeManager::Block BlankNodeManager::allocateBlock() { // The Random-Generation Algorithm's performance is reduced once the number of // used blocks exceeds a limit. + auto numBlocks = usedBlocksSet_.rlock()->size(); AD_CORRECTNESS_CHECK( - usedBlocksSet_.rlock()->size() < totalAvailableBlocks_ / 256, + numBlocks < totalAvailableBlocks_ / 256, absl::StrCat("Critical high number of blank node blocks in use: ", - usedBlocksSet_.rlock()->size(), " blocks")); + numBlocks, " blocks")); - uint64_t newBlockIndex = randBlockIndex_(); auto usedBlocksSetPtr = usedBlocksSet_.wlock(); - while (usedBlocksSetPtr->contains(newBlockIndex)) { - newBlockIndex = randBlockIndex_(); + while (true) { + auto blockIdx = randBlockIndex_(); + if (!usedBlocksSetPtr->contains(blockIdx)) { + usedBlocksSetPtr->insert(blockIdx); + return Block(blockIdx, minIndex_ + blockIdx * blockSize_); + } } - usedBlocksSetPtr->insert(newBlockIndex); - return Block(newBlockIndex, minIndex_ + newBlockIndex * blockSize_); } // _____________________________________________________________________________ @@ -43,6 +45,7 @@ BlankNodeManager::LocalBlankNodeManager::LocalBlankNodeManager( BlankNodeManager::LocalBlankNodeManager::~LocalBlankNodeManager() { auto ptr = blankNodeManager_->usedBlocksSet_.wlock(); for (auto block : blocks_) { + AD_CONTRACT_CHECK(ptr->contains(block.blockIdx_)); ptr->erase(block.blockIdx_); } } diff --git a/src/util/BlankNodeManager.h b/src/util/BlankNodeManager.h index e8e9c90d87..a39616e94f 100644 --- a/src/util/BlankNodeManager.h +++ b/src/util/BlankNodeManager.h @@ -97,6 +97,7 @@ class BlankNodeManager { [[nodiscard]] Block allocateBlock(); FRIEND_TEST(BlankNodeManager, blockAllocationAndFree); + FRIEND_TEST(BlankNodeManager, moveLocalBlankNodeManager); }; } // namespace ad_utility diff --git a/test/BlankNodeManagerTest.cpp b/test/BlankNodeManagerTest.cpp index a3a05c9c9d..8a6e89ea6b 100644 --- a/test/BlankNodeManagerTest.cpp +++ b/test/BlankNodeManagerTest.cpp @@ -9,9 +9,10 @@ #include "util/GTestHelpers.h" namespace ad_utility { +// _____________________________________________________________________________ TEST(BlankNodeManager, blockAllocationAndFree) { BlankNodeManager bnm(0); - EXPECT_EQ(bnm.usedBlocksSet_.rlock()->size(), 0); + EXPECT_TRUE(bnm.usedBlocksSet_.rlock()->empty()); { // LocalBlankNodeManager allocates a new block @@ -22,7 +23,7 @@ TEST(BlankNodeManager, blockAllocationAndFree) { // Once the LocalBlankNodeManager is destroyed, all Blocks allocated through // it are freed/removed from the BlankNodeManager's set. - EXPECT_EQ(bnm.usedBlocksSet_.rlock()->size(), 0); + EXPECT_TRUE(bnm.usedBlocksSet_.rlock()->empty()); // Mock randomIntGenerator to let the block index generation collide. bnm.randBlockIndex_ = SlowRandomIntGenerator(0, 1); @@ -33,6 +34,7 @@ TEST(BlankNodeManager, blockAllocationAndFree) { } } +// _____________________________________________________________________________ TEST(BlankNodeManager, LocalBlankNodeManagerGetID) { BlankNodeManager bnm(0); BlankNodeManager::LocalBlankNodeManager l(&bnm); @@ -51,8 +53,9 @@ TEST(BlankNodeManager, LocalBlankNodeManagerGetID) { EXPECT_EQ(l.blocks_.size(), 2); } +// _____________________________________________________________________________ TEST(BlankNodeManager, maxNumOfBlocks) { - // Mock a high `minIndex_` to simulate reduced space in the usedBlocksSet_ + // Mock a high `minIndex_` to simulate reduced space in the `usedBlocksSet_` BlankNodeManager bnm(ValueId::maxIndex - 256 * BlankNodeManager::blockSize_ + 2); AD_EXPECT_THROW_WITH_MESSAGE( @@ -61,4 +64,19 @@ TEST(BlankNodeManager, maxNumOfBlocks) { "Critical high number of blank node blocks in use:")); } +// _____________________________________________________________________________ +TEST(BlankNodeManager, moveLocalBlankNodeManager) { + // This ensures that the `blocks_` of the `LocalBlankNodeManager` are moved + // correctly, such that they're freed/removed from the `BlankNodeManager` + // set only once. + BlankNodeManager bnm(0); + EXPECT_NO_THROW({ + BlankNodeManager::LocalBlankNodeManager l1(&bnm); + auto l2(std::move(l1)); + BlankNodeManager::LocalBlankNodeManager l3(&bnm); + l3 = std::move(l2); + }); + EXPECT_TRUE(bnm.usedBlocksSet_.rlock()->empty()); +} + } // namespace ad_utility diff --git a/test/IndexTest.cpp b/test/IndexTest.cpp index 59005958a7..ecb72ec189 100644 --- a/test/IndexTest.cpp +++ b/test/IndexTest.cpp @@ -535,6 +535,16 @@ TEST(IndexTest, getBlankNodeManager) { Index index{ad_utility::makeUnlimitedAllocator()}; EXPECT_ANY_THROW(index.getBlankNodeManager()); + // Index is initialized -> no throw const Index& index2 = getQec("")->getIndex(); EXPECT_NO_THROW(index2.getBlankNodeManager()); + + // Given an Index, ensure that the BlankNodeManager's `minIndex_` is set to + // the number of blank nodes the Index is initialized with. + std::string kb = + "_:a .\n" + "_:b .\n" + "_:c ."; + const Index& index3 = getQec(kb)->getIndex(); + EXPECT_EQ(index3.getBlankNodeManager()->minIndex_, 3); } From f572c7acea16d6b9763f652185b4d4f73e6e7092 Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Fri, 18 Oct 2024 18:55:38 +0200 Subject: [PATCH 11/11] Update IndexFormatVersion.h --- src/index/IndexFormatVersion.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/index/IndexFormatVersion.h b/src/index/IndexFormatVersion.h index d9b9110fee..a21fcc96e0 100644 --- a/src/index/IndexFormatVersion.h +++ b/src/index/IndexFormatVersion.h @@ -36,5 +36,5 @@ struct IndexFormatVersion { // The actual index version. Change it once the binary format of the index // changes. inline const IndexFormatVersion& indexFormatVersion{ - 1504, DateYearOrDuration{Date{2024, 10, 15}}}; + 1504, DateYearOrDuration{Date{2024, 10, 18}}}; } // namespace qlever