From ea9d39c6b61f051767660861b597ab64ef5a3b43 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Fri, 12 Jul 2024 03:12:23 +0200 Subject: [PATCH 01/38] ql:contains-word now can show the respective word-score. --- src/engine/TextIndexScanForWord.cpp | 6 ++++-- src/index/FTSAlgorithms.cpp | 7 +++++-- src/index/IndexImpl.Text.cpp | 10 ++++++++-- src/parser/sparqlParser/SparqlQleverVisitor.cpp | 1 + 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/engine/TextIndexScanForWord.cpp b/src/engine/TextIndexScanForWord.cpp index 5a7fa19425..4f12e34aff 100644 --- a/src/engine/TextIndexScanForWord.cpp +++ b/src/engine/TextIndexScanForWord.cpp @@ -20,9 +20,10 @@ Result TextIndexScanForWord::computeResult( if (!isPrefix_) { IdTable smallIdTable{getExecutionContext()->getAllocator()}; - smallIdTable.setNumColumns(1); + smallIdTable.setNumColumns(2); smallIdTable.resize(idTable.numRows()); std::ranges::copy(idTable.getColumn(0), smallIdTable.getColumn(0).begin()); + std::ranges::copy(idTable.getColumn(2), smallIdTable.getColumn(1).begin()); return {std::move(smallIdTable), resultSortedOn(), LocalVocab{}}; } @@ -46,12 +47,13 @@ VariableToColumnMap TextIndexScanForWord::computeVariableToColumnMap() const { addDefinedVar(textRecordVar_.getMatchingWordVariable( std::string_view(word_).substr(0, word_.size() - 1))); } + addDefinedVar(textRecordVar_.getScoreVariable(word_)); return vcmap; } // _____________________________________________________________________________ size_t TextIndexScanForWord::getResultWidth() const { - return 1 + (isPrefix_ ? 1 : 0); + return 2 + (isPrefix_ ? 1 : 0); } // _____________________________________________________________________________ diff --git a/src/index/FTSAlgorithms.cpp b/src/index/FTSAlgorithms.cpp index 0589c5ffee..087f97a1fe 100644 --- a/src/index/FTSAlgorithms.cpp +++ b/src/index/FTSAlgorithms.cpp @@ -10,19 +10,21 @@ // _____________________________________________________________________________ IdTable FTSAlgorithms::filterByRange(const IdRange& idRange, const IdTable& idTablePreFilter) { - AD_CONTRACT_CHECK(idTablePreFilter.numColumns() == 2); + AD_CONTRACT_CHECK(idTablePreFilter.numColumns() == 3); LOG(DEBUG) << "Filtering " << idTablePreFilter.getColumn(0).size() << " elements by ID range...\n"; IdTable idTableResult{idTablePreFilter.getAllocator()}; - idTableResult.setNumColumns(2); + idTableResult.setNumColumns(3); idTableResult.resize(idTablePreFilter.getColumn(0).size()); decltype(auto) resultCidColumn = idTableResult.getColumn(0); decltype(auto) resultWidColumn = idTableResult.getColumn(1); + decltype(auto) resultSidColumn = idTableResult.getColumn(2); size_t nofResultElements = 0; decltype(auto) preFilterCidColumn = idTablePreFilter.getColumn(0); decltype(auto) preFilterWidColumn = idTablePreFilter.getColumn(1); + decltype(auto) preFilterSidColumn = idTablePreFilter.getColumn(2); // TODO Use views::zip. for (size_t i = 0; i < preFilterWidColumn.size(); ++i) { // TODO proper Ids for the text stuff. @@ -36,6 +38,7 @@ IdTable FTSAlgorithms::filterByRange(const IdRange& idRange, preFilterWidColumn[i].getWordVocabIndex() <= idRange.last()) { resultCidColumn[nofResultElements] = preFilterCidColumn[i]; resultWidColumn[nofResultElements] = preFilterWidColumn[i]; + resultSidColumn[nofResultElements] = preFilterSidColumn[i]; nofResultElements++; } } diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp index 68d1ab87d0..85cde4e643 100644 --- a/src/index/IndexImpl.Text.cpp +++ b/src/index/IndexImpl.Text.cpp @@ -719,7 +719,7 @@ std::string_view IndexImpl::wordIdToString(WordIndex wordIndex) const { IdTable IndexImpl::readWordCl( const TextBlockMetaData& tbmd, const ad_utility::AllocatorWithLimit& allocator) const { - IdTable idTable{2, allocator}; + IdTable idTable{3, allocator}; vector cids = readGapComprList( tbmd._cl._nofElements, tbmd._cl._startContextlist, static_cast(tbmd._cl._startWordlist - tbmd._cl._startContextlist), @@ -735,6 +735,12 @@ IdTable IndexImpl::readWordCl( idTable.getColumn(1).begin(), [](WordIndex id) { return Id::makeFromWordVocabIndex(WordVocabIndex::make(id)); }); + std::ranges::transform( + readFreqComprList( + tbmd._cl._nofElements, tbmd._cl._startScorelist, + static_cast(tbmd._cl._lastByte + 1 - + tbmd._cl._startScorelist)), + idTable.getColumn(2).begin(), &Id::makeFromInt); return idTable; } @@ -773,7 +779,7 @@ IdTable IndexImpl::getWordPostingsForTerm( const ad_utility::AllocatorWithLimit& allocator) const { LOG(DEBUG) << "Getting word postings for term: " << term << '\n'; IdTable idTable{allocator}; - idTable.setNumColumns(term.ends_with('*') ? 2 : 1); + idTable.setNumColumns(term.ends_with('*') ? 3 : 2); auto optionalTbmd = getTextBlockMetadataForWordOrPrefix(term); if (!optionalTbmd.has_value()) { return idTable; diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 3f2049988c..839b4863fc 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -1279,6 +1279,7 @@ void Visitor::setMatchingWordAndScoreVisibleIfPresent( } for (std::string_view s : std::vector( absl::StrSplit(name.substr(1, name.size() - 2), ' '))) { + addVisibleVariable(var->getScoreVariable(std::string(s))); if (!s.ends_with('*')) { continue; } From 30736efed14b6fa8c289724f0d9083cd31f551fe Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Fri, 12 Jul 2024 16:37:22 +0200 Subject: [PATCH 02/38] Fixed tests and formatted files. --- src/index/IndexImpl.Text.cpp | 7 +++---- test/QueryPlannerTestHelpers.h | 2 +- test/engine/TextIndexScanForWordTest.cpp | 16 +++++++++------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp index 85cde4e643..f5a88b8b50 100644 --- a/src/index/IndexImpl.Text.cpp +++ b/src/index/IndexImpl.Text.cpp @@ -736,10 +736,9 @@ IdTable IndexImpl::readWordCl( return Id::makeFromWordVocabIndex(WordVocabIndex::make(id)); }); std::ranges::transform( - readFreqComprList( - tbmd._cl._nofElements, tbmd._cl._startScorelist, - static_cast(tbmd._cl._lastByte + 1 - - tbmd._cl._startScorelist)), + readFreqComprList(tbmd._cl._nofElements, tbmd._cl._startScorelist, + static_cast(tbmd._cl._lastByte + 1 - + tbmd._cl._startScorelist)), idTable.getColumn(2).begin(), &Id::makeFromInt); return idTable; } diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index c33f9a2faa..d7a0121552 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -104,7 +104,7 @@ constexpr auto TextIndexScanForWord = [](Variable textRecordVar, string word) -> QetMatcher { return RootOperation<::TextIndexScanForWord>(AllOf( AD_PROPERTY(::TextIndexScanForWord, getResultWidth, - Eq(1 + word.ends_with('*'))), + Eq(2 + word.ends_with('*'))), AD_PROPERTY(::TextIndexScanForWord, textRecordVar, Eq(textRecordVar)), AD_PROPERTY(::TextIndexScanForWord, word, word))); }; diff --git a/test/engine/TextIndexScanForWordTest.cpp b/test/engine/TextIndexScanForWordTest.cpp index 57f4b46e02..af9e8c109a 100644 --- a/test/engine/TextIndexScanForWordTest.cpp +++ b/test/engine/TextIndexScanForWordTest.cpp @@ -5,6 +5,7 @@ #include #include +#include "../printers/VariablePrinters.h" #include "../util/GTestHelpers.h" #include "../util/IdTableHelpers.h" #include "../util/IndexTestHelpers.h" @@ -29,17 +30,18 @@ TEST(TextIndexScanForWord, WordScanPrefix) { TextIndexScanForWord s1{qec, Variable{"?text1"}, "test*"}; TextIndexScanForWord s2{qec, Variable{"?text2"}, "test*"}; - ASSERT_EQ(s1.getResultWidth(), 2); + ASSERT_EQ(s1.getResultWidth(), 3); auto result = s1.computeResultOnlyForTesting(); - ASSERT_EQ(result.idTable().numColumns(), 2); + ASSERT_EQ(result.idTable().numColumns(), 3); ASSERT_EQ(result.idTable().size(), 3); s2.getExternallyVisibleVariableColumns(); using enum ColumnIndexAndTypeInfo::UndefStatus; VariableToColumnMap expectedVariables{ {Variable{"?text2"}, {0, AlwaysDefined}}, - {Variable{"?ql_matchingword_text2_test"}, {1, AlwaysDefined}}}; + {Variable{"?ql_matchingword_text2_test"}, {1, AlwaysDefined}}, + {Variable{"?ql_score_text2_fixedEntity_test_42_"}, {2, AlwaysDefined}}}; EXPECT_THAT(s2.getExternallyVisibleVariableColumns(), ::testing::UnorderedElementsAreArray(expectedVariables)); @@ -60,10 +62,10 @@ TEST(TextIndexScanForWord, WordScanBasic) { TextIndexScanForWord s1{qec, Variable{"?text1"}, "test"}; - ASSERT_EQ(s1.getResultWidth(), 1); + ASSERT_EQ(s1.getResultWidth(), 2); auto result = s1.computeResultOnlyForTesting(); - ASSERT_EQ(result.idTable().numColumns(), 1); + ASSERT_EQ(result.idTable().numColumns(), 2); ASSERT_EQ(result.idTable().size(), 2); ASSERT_EQ("\"he failed the test\"", @@ -73,10 +75,10 @@ TEST(TextIndexScanForWord, WordScanBasic) { TextIndexScanForWord s2{qec, Variable{"?text1"}, "testing"}; - ASSERT_EQ(s2.getResultWidth(), 1); + ASSERT_EQ(s2.getResultWidth(), 2); result = s2.computeResultOnlyForTesting(); - ASSERT_EQ(result.idTable().numColumns(), 1); + ASSERT_EQ(result.idTable().numColumns(), 2); ASSERT_EQ(result.idTable().size(), 1); ASSERT_EQ("\"testing can help\"", From e752db84561f375a200351fb7de2ecd73a8b036e Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Sat, 27 Jul 2024 16:00:28 +0200 Subject: [PATCH 03/38] New formatting for Word Score Variables. Changed where necessary and adapted unit tests. Missing e2e tests. --- src/engine/QueryPlanner.cpp | 4 +- src/engine/TextIndexScanForEntity.cpp | 4 +- src/engine/TextIndexScanForWord.cpp | 2 +- src/parser/data/Variable.cpp | 27 ++++++++- src/parser/data/Variable.h | 11 +++- .../sparqlParser/SparqlQleverVisitor.cpp | 7 ++- test/QueryPlannerTest.cpp | 60 ++++++++++--------- test/engine/TextIndexScanForWordTest.cpp | 2 +- 8 files changed, 77 insertions(+), 40 deletions(-) diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index a1978b3bd1..c2b2de5753 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -899,14 +899,14 @@ QueryPlanner::SubtreePlan QueryPlanner::getTextLeafPlan( : *(node._variables.begin()); plan = makeSubtreePlan(_qec, cvar, evar, word); textLimits[cvar].entityVars_.push_back(evar); - textLimits[cvar].scoreVars_.push_back(cvar.getScoreVariable(evar)); + textLimits[cvar].scoreVars_.push_back(cvar.getEntityScoreVariable(evar)); } else { // Fixed entity case AD_CORRECTNESS_CHECK(node._variables.size() == 1); plan = makeSubtreePlan( _qec, cvar, node.triple_.o_.toString(), word); textLimits[cvar].scoreVars_.push_back( - cvar.getScoreVariable(node.triple_.o_.toString())); + cvar.getEntityScoreVariable(node.triple_.o_.toString())); } } else { plan = makeSubtreePlan(_qec, cvar, word); diff --git a/src/engine/TextIndexScanForEntity.cpp b/src/engine/TextIndexScanForEntity.cpp index 80150ed29d..1ce64ad552 100644 --- a/src/engine/TextIndexScanForEntity.cpp +++ b/src/engine/TextIndexScanForEntity.cpp @@ -48,10 +48,10 @@ VariableToColumnMap TextIndexScanForEntity::computeVariableToColumnMap() const { }; addDefinedVar(textRecordVar_); if (hasFixedEntity()) { - addDefinedVar(textRecordVar_.getScoreVariable(fixedEntity())); + addDefinedVar(textRecordVar_.getEntityScoreVariable(fixedEntity())); } else { addDefinedVar(entityVariable()); - addDefinedVar(textRecordVar_.getScoreVariable(entityVariable())); + addDefinedVar(textRecordVar_.getEntityScoreVariable(entityVariable())); } return vcmap; } diff --git a/src/engine/TextIndexScanForWord.cpp b/src/engine/TextIndexScanForWord.cpp index 4f12e34aff..a95900459b 100644 --- a/src/engine/TextIndexScanForWord.cpp +++ b/src/engine/TextIndexScanForWord.cpp @@ -47,7 +47,7 @@ VariableToColumnMap TextIndexScanForWord::computeVariableToColumnMap() const { addDefinedVar(textRecordVar_.getMatchingWordVariable( std::string_view(word_).substr(0, word_.size() - 1))); } - addDefinedVar(textRecordVar_.getScoreVariable(word_)); + addDefinedVar(textRecordVar_.getWordScoreVariable(word_, isPrefix_)); return vcmap; } diff --git a/src/parser/data/Variable.cpp b/src/parser/data/Variable.cpp index 8b7a3207e7..3ee5bbc59e 100644 --- a/src/parser/data/Variable.cpp +++ b/src/parser/data/Variable.cpp @@ -55,7 +55,7 @@ Variable::Variable(std::string name) : _name{std::move(name)} { } // _____________________________________________________________________________ -Variable Variable::getScoreVariable( +Variable Variable::getEntityScoreVariable( const std::variant& varOrEntity) const { std::string_view type; std::string entity; @@ -78,6 +78,31 @@ Variable Variable::getScoreVariable( absl::StrCat(SCORE_VARIABLE_PREFIX, name().substr(1), type, entity)}; } +// _____________________________________________________________________________ +Variable Variable::getWordScoreVariable(const std::string& word, + const bool& isPrefix) const { + std::string_view type; + std::string_view wordToConvert; + std::string convertedWord; + if (isPrefix) { + wordToConvert = std::string_view(word.data(), word.size() - 1); + type = "prefix_"; + } else { + wordToConvert = std::string_view(word); + type = "word_"; + } + convertedWord += "_"; + for (char c : wordToConvert) { + if (isalpha(static_cast(c))) { + convertedWord += c; + } else { + absl::StrAppend(&convertedWord, "_", std::to_string(c), "_"); + } + } + return Variable{absl::StrCat(SCORE_VARIABLE_PREFIX, type, name().substr(1), + convertedWord)}; +} + // _____________________________________________________________________________ Variable Variable::getMatchingWordVariable(std::string_view term) const { return Variable{ diff --git a/src/parser/data/Variable.h b/src/parser/data/Variable.h index 9d23dc8ba1..c725c1792e 100644 --- a/src/parser/data/Variable.h +++ b/src/parser/data/Variable.h @@ -43,9 +43,18 @@ class Variable { // `?ql_someTextVar_fixedEntity_someFixedEntity`. // Note that if the the fixed entity contains non ascii characters they are // converted to numbers and escaped. - Variable getScoreVariable( + Variable getEntityScoreVariable( const std::variant& varOrEntity) const; + // Converts `?someTextVar` and `someWord` into + // `?ql_score_word_someTextVar_someWord. + // Converts `?someTextVar` and `somePrefix*` into + // `?ql_score_prefix_someTextVar_somePrefix`. + // Note that if the word contains non ascii characters they are converted to + // numbers and escaped. + Variable getWordScoreVariable(const std::string& word, + const bool& isPrefix) const; + // Convert `?someVariable` into `?ql_matchingword_someVariable_someTerm` Variable getMatchingWordVariable(std::string_view term) const; diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 839b4863fc..9c905fed31 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -1279,7 +1279,8 @@ void Visitor::setMatchingWordAndScoreVisibleIfPresent( } for (std::string_view s : std::vector( absl::StrSplit(name.substr(1, name.size() - 2), ' '))) { - addVisibleVariable(var->getScoreVariable(std::string(s))); + addVisibleVariable( + var->getWordScoreVariable(std::string(s), s.ends_with('*'))); if (!s.ends_with('*')) { continue; } @@ -1288,9 +1289,9 @@ void Visitor::setMatchingWordAndScoreVisibleIfPresent( } } else if (propertyPath->asString() == CONTAINS_ENTITY_PREDICATE) { if (const auto* entVar = std::get_if(&object)) { - addVisibleVariable(var->getScoreVariable(*entVar)); + addVisibleVariable(var->getEntityScoreVariable(*entVar)); } else { - addVisibleVariable(var->getScoreVariable(object.toSparql())); + addVisibleVariable(var->getEntityScoreVariable(object.toSparql())); } } } diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index 7451016f2a..b56fa69ac4 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -884,12 +884,12 @@ TEST(QueryPlanner, TextLimit) { h::expect( "SELECT * WHERE { ?text ql:contains-word \"test*\" . ?text " "ql:contains-entity } TEXTLIMIT 10", - h::TextLimit( - 10, - h::Join(wordScan(Var{"?text"}, "test*"), - entityScan(Var{"?text"}, "", "test*")), - Var{"?text"}, vector{}, - vector{Var{"?text"}.getScoreVariable("")}), + h::TextLimit(10, + h::Join(wordScan(Var{"?text"}, "test*"), + entityScan(Var{"?text"}, "", "test*")), + Var{"?text"}, vector{}, + vector{ + Var{"?text"}.getEntityScoreVariable("")}), qec); // Contains entity @@ -901,7 +901,8 @@ TEST(QueryPlanner, TextLimit) { h::Join(wordScan(Var{"?text"}, "test*"), entityScan(Var{"?text"}, Var{"?scientist"}, "test*")), Var{"?text"}, vector{Var{"?scientist"}}, - vector{Var{"?text"}.getScoreVariable(Var{"?scientist"})}), + vector{ + Var{"?text"}.getEntityScoreVariable(Var{"?scientist"})}), qec); // Contains entity and fixed entity @@ -909,15 +910,15 @@ TEST(QueryPlanner, TextLimit) { "SELECT * WHERE { ?text ql:contains-entity ?scientist . ?text " "ql:contains-word \"test*\" . ?text ql:contains-entity } " "TEXTLIMIT 5", - h::TextLimit( - 5, - h::UnorderedJoins( - wordScan(Var{"?text"}, "test*"), - entityScan(Var{"?text"}, Var{"?scientist"}, "test*"), - entityScan(Var{"?text"}, "", "test*")), - Var{"?text"}, vector{Var{"?scientist"}}, - vector{Var{"?text"}.getScoreVariable(Var{"?scientist"}), - Var{"?text"}.getScoreVariable("")}), + h::TextLimit(5, + h::UnorderedJoins( + wordScan(Var{"?text"}, "test*"), + entityScan(Var{"?text"}, Var{"?scientist"}, "test*"), + entityScan(Var{"?text"}, "", "test*")), + Var{"?text"}, vector{Var{"?scientist"}}, + vector{ + Var{"?text"}.getEntityScoreVariable(Var{"?scientist"}), + Var{"?text"}.getEntityScoreVariable("")}), qec); // Contains two entities @@ -932,8 +933,9 @@ TEST(QueryPlanner, TextLimit) { entityScan(Var{"?text"}, Var{"?scientist"}, "test*"), entityScan(Var{"?text"}, Var{"?scientist2"}, "test*")), Var{"?text"}, vector{Var{"?scientist"}, Var{"?scientist2"}}, - vector{Var{"?text"}.getScoreVariable(Var{"?scientist"}), - Var{"?text"}.getScoreVariable(Var{"?scientist2"})}), + vector{ + Var{"?text"}.getEntityScoreVariable(Var{"?scientist"}), + Var{"?text"}.getEntityScoreVariable(Var{"?scientist2"})}), qec); // Contains two text variables. Also checks if the textlimit at an efficient @@ -950,17 +952,17 @@ TEST(QueryPlanner, TextLimit) { entityScan(Var{"?text1"}, Var{"?scientist1"}, "test*")), Var{"?text1"}, vector{Var{"?scientist1"}}, vector{ - Var{"?text1"}.getScoreVariable(Var{"?scientist1"})}), - h::TextLimit(5, - h::UnorderedJoins( - wordScan(Var{"?text2"}, "test*"), - entityScan(Var{"?text2"}, Var{"?author1"}, "test*"), - entityScan(Var{"?text2"}, Var{"?author2"}, "test*")), - Var{"?text2"}, - vector{Var{"?author1"}, Var{"?author2"}}, - vector{ - Var{"?text2"}.getScoreVariable(Var{"?author1"}), - Var{"?text2"}.getScoreVariable(Var{"?author2"})})), + Var{"?text1"}.getEntityScoreVariable(Var{"?scientist1"})}), + h::TextLimit( + 5, + h::UnorderedJoins( + wordScan(Var{"?text2"}, "test*"), + entityScan(Var{"?text2"}, Var{"?author1"}, "test*"), + entityScan(Var{"?text2"}, Var{"?author2"}, "test*")), + Var{"?text2"}, vector{Var{"?author1"}, Var{"?author2"}}, + vector{ + Var{"?text2"}.getEntityScoreVariable(Var{"?author1"}), + Var{"?text2"}.getEntityScoreVariable(Var{"?author2"})})), qec); } diff --git a/test/engine/TextIndexScanForWordTest.cpp b/test/engine/TextIndexScanForWordTest.cpp index af9e8c109a..588a549d98 100644 --- a/test/engine/TextIndexScanForWordTest.cpp +++ b/test/engine/TextIndexScanForWordTest.cpp @@ -41,7 +41,7 @@ TEST(TextIndexScanForWord, WordScanPrefix) { VariableToColumnMap expectedVariables{ {Variable{"?text2"}, {0, AlwaysDefined}}, {Variable{"?ql_matchingword_text2_test"}, {1, AlwaysDefined}}, - {Variable{"?ql_score_text2_fixedEntity_test_42_"}, {2, AlwaysDefined}}}; + {Variable{"?ql_score_prefix_text2_test"}, {2, AlwaysDefined}}}; EXPECT_THAT(s2.getExternallyVisibleVariableColumns(), ::testing::UnorderedElementsAreArray(expectedVariables)); From d0b9ee8a09b663d8bdd88a90b52b3e945527273a Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Mon, 29 Jul 2024 18:55:10 +0200 Subject: [PATCH 04/38] Added getWordSCoreVariable for std::string_view --- src/parser/data/Variable.cpp | 25 ++++++++++++++++++- src/parser/data/Variable.h | 7 ++++-- .../sparqlParser/SparqlQleverVisitor.cpp | 3 +-- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/parser/data/Variable.cpp b/src/parser/data/Variable.cpp index 3ee5bbc59e..67e47a647c 100644 --- a/src/parser/data/Variable.cpp +++ b/src/parser/data/Variable.cpp @@ -80,7 +80,7 @@ Variable Variable::getEntityScoreVariable( // _____________________________________________________________________________ Variable Variable::getWordScoreVariable(const std::string& word, - const bool& isPrefix) const { + bool isPrefix) const { std::string_view type; std::string_view wordToConvert; std::string convertedWord; @@ -103,6 +103,29 @@ Variable Variable::getWordScoreVariable(const std::string& word, convertedWord)}; } +// _____________________________________________________________________________ +Variable Variable::getWordScoreVariable(std::string_view word, + bool isPrefix) const { + std::string_view type; + std::string convertedWord; + if (isPrefix) { + word.remove_suffix(1); + type = "prefix_"; + } else { + type = "word_"; + } + convertedWord = "_"; + for (char c : word) { + if (isalpha(static_cast(c))) { + convertedWord += c; + } else { + absl::StrAppend(&convertedWord, "_", std::to_string(c), "_"); + } + } + return Variable{absl::StrCat(SCORE_VARIABLE_PREFIX, type, name().substr(1), + convertedWord)}; +} + // _____________________________________________________________________________ Variable Variable::getMatchingWordVariable(std::string_view term) const { return Variable{ diff --git a/src/parser/data/Variable.h b/src/parser/data/Variable.h index c725c1792e..265320e049 100644 --- a/src/parser/data/Variable.h +++ b/src/parser/data/Variable.h @@ -52,8 +52,11 @@ class Variable { // `?ql_score_prefix_someTextVar_somePrefix`. // Note that if the word contains non ascii characters they are converted to // numbers and escaped. - Variable getWordScoreVariable(const std::string& word, - const bool& isPrefix) const; + Variable getWordScoreVariable(const std::string& word, bool isPrefix) const; + + // Does the same thing as the function with std::string& param only for + // std::string_view + Variable getWordScoreVariable(std::string_view word, bool isPrefix) const; // Convert `?someVariable` into `?ql_matchingword_someVariable_someTerm` Variable getMatchingWordVariable(std::string_view term) const; diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index c0b0379cbf..8e991bfa17 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -1279,8 +1279,7 @@ void Visitor::setMatchingWordAndScoreVisibleIfPresent( } for (std::string_view s : std::vector( absl::StrSplit(name.substr(1, name.size() - 2), ' '))) { - addVisibleVariable( - var->getWordScoreVariable(std::string(s), s.ends_with('*'))); + addVisibleVariable(var->getWordScoreVariable(s, s.ends_with('*'))); if (!s.ends_with('*')) { continue; } From 29511c69b9035a66335e1137f170a8dc95fa1f32 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Fri, 15 Nov 2024 20:05:44 +0100 Subject: [PATCH 05/38] Made it possible to construct query execution contexts with text index. This is done through passing the words and docsfile as string, and then building the text index as normal. Basic Test is existent (TODO make more edge case tests) and e2e testing is fixed. --- e2e/scientists_queries.yaml | 33 ++++++++---- src/engine/TextIndexScanForWord.cpp | 1 - src/index/Index.cpp | 3 ++ src/index/Index.h | 1 + src/index/IndexImpl.Text.cpp | 12 +++-- src/index/IndexImpl.h | 8 ++- src/index/TextMetaData.h | 6 +++ src/parser/ContextFileParser.h | 1 + test/engine/TextIndexScanForWordTest.cpp | 46 +++++++++++++++- test/engine/TextIndexScanTestHelpers.h | 19 ++++++- test/util/IndexTestHelpers.cpp | 69 +++++++++++++++++------- test/util/IndexTestHelpers.h | 9 +++- 12 files changed, 169 insertions(+), 39 deletions(-) diff --git a/e2e/scientists_queries.yaml b/e2e/scientists_queries.yaml index 1fc78430be..421329c58b 100644 --- a/e2e/scientists_queries.yaml +++ b/e2e/scientists_queries.yaml @@ -55,31 +55,43 @@ queries: ?t ql:contains-word "RElaT* phySIKalische rela*" } checks: - - num_cols: 5 - - selected: [ "?x", "?ql_score_t_var_x", "?t", "?ql_matchingword_t_relat", "?ql_matchingword_t_rela" ] + - num_cols: 8 + - selected: [ "?x", "?ql_score_t_var_x", "?t", "?ql_score_prefix_t_RElaT", "?ql_matchingword_t_relat", "?ql_score_word_t_phySIKalische", "?ql_score_prefix_t_rela", "?ql_matchingword_t_rela" ] - contains_row: - "" - null - null + - null - "relationship" + - null + - null - "relationship" - contains_row: - "" - null - null + - null - "relationship" + - null + - null - "relativity" - contains_row: - "" - null - null + - null - "relativity" + - null + - null - "relationship" - contains_row: - "" - null - null + - null - "relativity" + - null + - null - "relativity" - query: algo-star-female-scientists @@ -151,7 +163,7 @@ queries: } TEXTLIMIT 2 checks: - - num_cols: 7 + - num_cols: 9 - num_rows: 18 - query: algor-star-female-born-before-1940 @@ -192,7 +204,7 @@ queries: } ORDER BY DESC(?ql_score_text_fixedEntity__60_Ada_95_Lovelace_62_) checks: - - num_cols: 5 + - num_cols: 6 - num_rows: 7 - contains_row: - "" @@ -202,6 +214,7 @@ queries: Charles Babbage, also known as' the father of computers', and in particular, Babbage's work on the Analytical Engine." - null + - null - "relationship" - order_numeric: {"dir": "DESC", "var" : "?ql_score_text_fixedEntity__60_Ada_95_Lovelace_62_"} @@ -219,7 +232,7 @@ queries: ORDER BY DESC(?ql_score_text_fixedEntity__60_Ada_95_Lovelace_62_) TEXTLIMIT 2 checks: - - num_cols: 5 + - num_cols: 6 - num_rows: 3 - contains_row: - "" @@ -229,6 +242,7 @@ queries: Charles Babbage, also known as' the father of computers', and in particular, Babbage's work on the Analytical Engine." - null + - null - "relationship" - order_numeric: {"dir": "DESC", "var" : "?ql_score_text_fixedEntity__60_Ada_95_Lovelace_62_"} @@ -246,7 +260,7 @@ queries: } TEXTLIMIT 1 checks: - - num_cols: 6 + - num_cols: 7 - num_rows: 2 - contains_row: - "" @@ -255,6 +269,7 @@ queries: with Somerville to visit Babbage as often as she could." - null - null + - null - "relationship" @@ -1391,10 +1406,10 @@ queries: ?t ql:contains-word "algo* herm* primary" } checks: - - num_cols: 5 + - num_cols: 8 - num_rows: 1 - - selected: [ "?x", "?ql_score_t_var_x", "?t", "?ql_matchingword_t_algo", "?ql_matchingword_t_herm" ] - - contains_row: [ "",null,"Hermann's algorithm for primary decomposition is still in use now.","algorithm","hermann" ] + - selected: [ "?x", "?ql_score_t_var_x", "?t", "?ql_score_prefix_t_algo", "?ql_matchingword_t_algo", "?ql_score_prefix_t_herm", "?ql_matchingword_t_herm", "?ql_score_word_t_primary" ] + - contains_row: [ "",null,"Hermann's algorithm for primary decomposition is still in use now.",null,"algorithm",null,"hermann",null ] - query : select_asterisk_regex-lastname-stein diff --git a/src/engine/TextIndexScanForWord.cpp b/src/engine/TextIndexScanForWord.cpp index 1bf6a4d619..cc37400817 100644 --- a/src/engine/TextIndexScanForWord.cpp +++ b/src/engine/TextIndexScanForWord.cpp @@ -24,7 +24,6 @@ ProtoResult TextIndexScanForWord::computeResult( smallIdTable.resize(idTable.numRows()); std::ranges::copy(idTable.getColumn(0), smallIdTable.getColumn(0).begin()); std::ranges::copy(idTable.getColumn(2), smallIdTable.getColumn(1).begin()); - return {std::move(smallIdTable), resultSortedOn(), LocalVocab{}}; } diff --git a/src/index/Index.cpp b/src/index/Index.cpp index 47fcad9c82..a652b85bfc 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -232,6 +232,9 @@ size_t Index::getNofEntityPostings() const { return pimpl_->getNofEntityPostings(); } +// ____________________________________________________________________________ +size_t Index::getNofNonLiterals() const { return pimpl_->getNofNonLiterals(); } + // ____________________________________________________________________________ Index::NumNormalAndInternal Index::numDistinctSubjects() const { return pimpl_->numDistinctSubjects(); diff --git a/src/index/Index.h b/src/index/Index.h index ec408f15df..1fac924aca 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -214,6 +214,7 @@ class Index { size_t getNofTextRecords() const; size_t getNofWordPostings() const; size_t getNofEntityPostings() const; + size_t getNofNonLiterals() const; NumNormalAndInternal numDistinctSubjects() const; NumNormalAndInternal numDistinctObjects() const; diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp index 9c2afad181..211e5e049d 100644 --- a/src/index/IndexImpl.Text.cpp +++ b/src/index/IndexImpl.Text.cpp @@ -65,7 +65,7 @@ cppcoro::generator IndexImpl::wordsInTextRecords( if (!isLiteral(text)) { continue; } - ContextFileParser::Line entityLine{text, true, contextId, 1}; + ContextFileParser::Line entityLine{text, true, contextId, 1, true}; co_yield entityLine; std::string_view textView = text; textView = textView.substr(0, textView.rfind('"')); @@ -239,6 +239,7 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, size_t nofWordPostings = 0; size_t nofEntityPostings = 0; size_t entityNotFoundErrorMsgCount = 0; + size_t nofLiterals = 0; for (auto line : wordsInTextRecords(contextFile, addWordsFromLiterals)) { if (line._contextId != currentContext) { @@ -258,6 +259,9 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, // Note that `entitiesInContext` is a HashMap, so the `Id`s don't have // to be contiguous. entitiesInContext[Id::makeFromVocabIndex(eid)] += line._score; + if (line._isLiteralEntity) { + ++nofLiterals; + } } else { if (entityNotFoundErrorMsgCount < 20) { LOG(WARN) << "Entity from text not in KB: " << line._word << '\n'; @@ -294,6 +298,7 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, textMeta_.setNofTextRecords(nofContexts); textMeta_.setNofWordPostings(nofWordPostings); textMeta_.setNofEntityPostings(nofEntityPostings); + textMeta_.setNofNonLiterals(nofContexts - nofLiterals); writer.finish(); LOG(TRACE) << "END IndexImpl::passContextFileIntoVector" << std::endl; @@ -415,7 +420,7 @@ ContextListMetaData IndexImpl::writePostings(ad_utility::File& out, size_t n = 0; - WordToCodeMap wordCodeMap; + WordCodeMap wordCodeMap; WordCodebook wordCodebook; ScoreCodeMap scoreCodeMap; ScoreCodebook scoreCodebook; @@ -646,10 +651,11 @@ size_t IndexImpl::writeList(Numeric* data, size_t nofElements, // _____________________________________________________________________________ void IndexImpl::createCodebooks(const vector& postings, - IndexImpl::WordToCodeMap& wordCodemap, + IndexImpl::WordCodeMap& wordCodemap, IndexImpl::WordCodebook& wordCodebook, IndexImpl::ScoreCodeMap& scoreCodemap, IndexImpl::ScoreCodebook& scoreCodebook) const { + // There should be a more efficient way to do this (Felix Meisen) ad_utility::HashMap wfMap; ad_utility::HashMap sfMap; for (const auto& p : postings) { diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 0d5b396ccc..6a350a4a6f 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -424,6 +424,7 @@ class IndexImpl { size_t getNofEntityPostings() const { return textMeta_.getNofEntityPostings(); } + size_t getNofNonLiterals() const { return textMeta_.getNofNonLiterals(); } bool hasAllPermutations() const { return SPO().isLoaded(); } @@ -624,14 +625,17 @@ class IndexImpl { ad_utility::File& file) const; // TODO understand what the "codes" are, are they better just ints? - typedef ad_utility::HashMap WordToCodeMap; + // After using createCodebooks on these types, the lowest codes refer to the + // most frequent WordIndex/Score. The maps are mapping those codes to their + // respective frequency. + typedef ad_utility::HashMap WordCodeMap; typedef ad_utility::HashMap ScoreCodeMap; typedef vector WordCodebook; typedef vector ScoreCodebook; //! Creates codebooks for lists that are supposed to be entropy encoded. void createCodebooks(const vector& postings, - WordToCodeMap& wordCodemap, WordCodebook& wordCodebook, + WordCodeMap& wordCodemap, WordCodebook& wordCodebook, ScoreCodeMap& scoreCodemap, ScoreCodebook& scoreCodebook) const; diff --git a/src/index/TextMetaData.h b/src/index/TextMetaData.h index 30fda07921..2198e052e4 100644 --- a/src/index/TextMetaData.h +++ b/src/index/TextMetaData.h @@ -98,6 +98,10 @@ class TextMetaData { void setNofEntityPostings(size_t n) { _nofEntityPostings = n; } + size_t getNofNonLiterals() const { return _nofNonLiterals; } + + void setNofNonLiterals(size_t n) { _nofNonLiterals = n; } + const string& getName() const { return _name; } void setName(const string& name) { _name = name; } @@ -109,6 +113,7 @@ class TextMetaData { size_t _nofTextRecords = 0; size_t _nofWordPostings = 0; size_t _nofEntityPostings = 0; + size_t _nofNonLiterals = 0; string _name; vector _blocks; @@ -118,6 +123,7 @@ class TextMetaData { serializer | arg._nofTextRecords; serializer | arg._nofWordPostings; serializer | arg._nofEntityPostings; + serializer | arg._nofNonLiterals; serializer | arg._name; serializer | arg._blocks; } diff --git a/src/parser/ContextFileParser.h b/src/parser/ContextFileParser.h index e00a268d24..ba8d7bac9c 100644 --- a/src/parser/ContextFileParser.h +++ b/src/parser/ContextFileParser.h @@ -21,6 +21,7 @@ class ContextFileParser { bool _isEntity; TextRecordIndex _contextId; Score _score; + bool _isLiteralEntity = false; }; explicit ContextFileParser(const string& contextFile, diff --git a/test/engine/TextIndexScanForWordTest.cpp b/test/engine/TextIndexScanForWordTest.cpp index 588a549d98..e421a8ca96 100644 --- a/test/engine/TextIndexScanForWordTest.cpp +++ b/test/engine/TextIndexScanForWordTest.cpp @@ -22,10 +22,52 @@ namespace { std::string kg = "

\"he failed the test\" .

\"testing can help\" .

" "\"some other sentence\" .

\"the test on friday was really hard\" " - ". . ."; + ". . . ."; TEST(TextIndexScanForWord, WordScanPrefix) { - auto qec = getQec(kg, true, true, true, 16_B, true); + std::string wordsFileContent; + wordsFileContent = + "astronomer\t0\t1\t1\n" + "\t1\t1\t0\n" + "scientist\t0\t1\t1\n" + "field\t0\t1\t1\n" + "astronomy\t0\t1\t1\n" + "astronomer\t0\t2\t0\n" + "\t1\t2\t0\n" + ":s:firstsentence\t0\t2\t0\n" + "scientist\t0\t2\t0\n" + "field\t0\t2\t0\n" + "astronomy\t0\t2\t0\n" + "astronomy\t0\t3\t1\n" + "concentrates\t0\t3\t1\n" + "studies\t0\t3\t1\n" + "specific\t0\t3\t1\n" + "question\t0\t3\t1\n" + "outside\t0\t3\t1\n" + "scope\t0\t3\t1\n" + "earth\t0\t3\t1\n" + "astronomy\t0\t4\t1\n" + "concentrates\t0\t4\t1\n" + "studies\t0\t4\t1\n" + "field\t0\t4\t1\n" + "outside\t0\t4\t1\n" + "scope\t0\t4\t1\n" + "earth\t0\t4\t1\n"; + std::string docsFileContent; + docsFileContent = + "4\tAn astronomer is a scientist in the field of astronomy who " + "concentrates their studies on a specific question or field outside of " + "the scope of Earth.\n"; + auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, + docsFileContent); + + TextIndexScanForWord t1{qec, Variable{"?t1"}, "astronom*"}; + auto tresult = t1.computeResultOnlyForTesting(); + ASSERT_EQ( + "An astronomer is a scientist in the field of astronomy who concentrates " + "their studies on a specific question or field outside of the scope of " + "Earth.", + h::getTextExcerptFromResultTable(qec, tresult, 0)); TextIndexScanForWord s1{qec, Variable{"?text1"}, "test*"}; TextIndexScanForWord s2{qec, Variable{"?text2"}, "test*"}; diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index 597344ad9e..4b216d522b 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -8,11 +8,26 @@ namespace textIndexScanTestHelpers { // NOTE: this function exploits a "lucky accident" that allows us to // obtain the textRecord using indexToString. // TODO: Implement a more elegant/stable version +// Idea for a more stable version is to add the literals to the docsfile +// which is later parsed and written to the docsDB. This would lead to a +// possible retrieval of the literals text with the getTextExcerpt function. +// The only problem is the increased size of the docsDB and the double saving +// of the literals. inline string getTextRecordFromResultTable(const QueryExecutionContext* qec, const ProtoResult& result, const size_t& rowIndex) { - return qec->getIndex().indexToString( - result.idTable().getColumn(0)[rowIndex].getVocabIndex()); + uint64_t offset = qec->getIndex().getNofNonLiterals(); + uint64_t shiftedTextRecordId = + result.idTable().getColumn(0)[rowIndex].getTextRecordIndex().get() - + offset; + return qec->getIndex().indexToString(VocabIndex::make(shiftedTextRecordId)); +} + +inline string getTextExcerptFromResultTable(const QueryExecutionContext* qec, + const ProtoResult& result, + const size_t& rowIndex) { + return qec->getIndex().getTextExcerpt( + result.idTable().getColumn(0)[rowIndex].getTextRecordIndex()); } inline string getEntityFromResultTable(const QueryExecutionContext* qec, diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp index 79eb77b0d5..ec0d2fbba1 100644 --- a/test/util/IndexTestHelpers.cpp +++ b/test/util/IndexTestHelpers.cpp @@ -46,7 +46,12 @@ std::vector getAllIndexFilenames( indexBasename + ".prefixes", indexBasename + ".vocabulary.internal", indexBasename + ".vocabulary.external", - indexBasename + ".vocabulary.external.offsets"}; + indexBasename + ".vocabulary.external.offsets", + indexBasename + ".wordsfile", + indexBasename + ".docsfile", + indexBasename + ".text.index", + indexBasename + ".text.vocabulary", + indexBasename + ".text.docsDB"}; } namespace { @@ -134,7 +139,9 @@ Index makeTestIndex(const std::string& indexBasename, bool loadAllPermutations, bool usePatterns, [[maybe_unused]] bool usePrefixCompression, ad_utility::MemorySize blocksizePermutations, - bool createTextIndex) { + bool createTextIndex, bool addWordsFromLiterals, + std::optional wordsFileContent, + std::optional docsFileContent) { // Ignore the (irrelevant) log output of the index building and loading during // these tests. static std::ostringstream ignoreLogStream; @@ -181,7 +188,29 @@ Index makeTestIndex(const std::string& indexBasename, std::nullopt}; index.createFromFiles({spec}); if (createTextIndex) { - index.addTextFromContextFile("", true); + if (wordsFileContent.has_value() && docsFileContent.has_value()) { + // Create and write to words- and docsfile to later build a full text + // index from them + ad_utility::File wordsFile(indexBasename + ".wordsfile", "w"); + ad_utility::File docsFile(indexBasename + ".docsfile", "w"); + wordsFile.write(wordsFileContent.value().c_str(), + wordsFileContent.value().size()); + docsFile.write(docsFileContent.value().c_str(), + docsFileContent.value().size()); + wordsFile.close(); + docsFile.close(); + index.setKbName(indexBasename); + index.setTextName(indexBasename); + index.setOnDiskBase(indexBasename); + if (addWordsFromLiterals) { + index.addTextFromContextFile(indexBasename + ".wordsfile", true); + } else { + index.addTextFromContextFile(indexBasename + ".wordsfile", false); + } + index.buildDocsDB(indexBasename + ".docsfile"); + } else if (addWordsFromLiterals) { + index.addTextFromContextFile("", true); + } } } if (!usePatterns || !loadAllPermutations) { @@ -216,7 +245,9 @@ QueryExecutionContext* getQec(std::optional turtleInput, bool loadAllPermutations, bool usePatterns, bool usePrefixCompression, ad_utility::MemorySize blocksizePermutations, - bool createTextIndex) { + bool createTextIndex, bool addWordsFromLiterals, + std::optional wordsFileContent, + std::optional docsFileContent) { // Similar to `absl::Cleanup`. Calls the `callback_` in the destructor, but // the callback is stored as a `std::function`, which allows to store // different types of callbacks in the same wrapper type. @@ -263,20 +294,22 @@ QueryExecutionContext* getQec(std::optional turtleInput, std::string testIndexBasename = "_staticGlobalTestIndex" + std::to_string(contextMap.size()); contextMap.emplace( - key, Context{TypeErasedCleanup{[testIndexBasename]() { - for (const std::string& indexFilename : - getAllIndexFilenames(testIndexBasename)) { - // Don't log when a file can't be deleted, - // because the logging might already be - // destroyed. - ad_utility::deleteFile(indexFilename, false); - } - }}, - std::make_unique(makeTestIndex( - testIndexBasename, turtleInput, loadAllPermutations, - usePatterns, usePrefixCompression, - blocksizePermutations, createTextIndex)), - std::make_unique()}); + key, + Context{TypeErasedCleanup{[testIndexBasename]() { + for (const std::string& indexFilename : + getAllIndexFilenames(testIndexBasename)) { + // Don't log when a file can't be deleted, + // because the logging might already be + // destroyed. + ad_utility::deleteFile(indexFilename, false); + } + }}, + std::make_unique(makeTestIndex( + testIndexBasename, turtleInput, loadAllPermutations, + usePatterns, usePrefixCompression, blocksizePermutations, + createTextIndex, addWordsFromLiterals, wordsFileContent, + docsFileContent)), + std::make_unique()}); } auto* qec = contextMap.at(key).qec_.get(); qec->getIndex().getImpl().setGlobalIndexAndComparatorOnlyForTesting(); diff --git a/test/util/IndexTestHelpers.h b/test/util/IndexTestHelpers.h index 3e09604613..6bbe0c9195 100644 --- a/test/util/IndexTestHelpers.h +++ b/test/util/IndexTestHelpers.h @@ -44,7 +44,10 @@ Index makeTestIndex(const std::string& indexBasename, bool loadAllPermutations = true, bool usePatterns = true, bool usePrefixCompression = true, ad_utility::MemorySize blocksizePermutations = 16_B, - bool createTextIndex = false); + bool createTextIndex = false, + bool addWordsFromLiterals = true, + std::optional wordsFileContent = std::nullopt, + std::optional docsFileContent = std::nullopt); // Return a static `QueryExecutionContext` that refers to an index that was // build using `makeTestIndex` (see above). The index (most notably its @@ -55,7 +58,9 @@ QueryExecutionContext* getQec( bool loadAllPermutations = true, bool usePatterns = true, bool usePrefixCompression = true, ad_utility::MemorySize blocksizePermutations = 16_B, - bool createTextIndex = false); + bool createTextIndex = false, bool addWordsFromLiterals = true, + std::optional wordsFileContent = std::nullopt, + std::optional docsFileContent = std::nullopt); // Return a lambda that takes a string and converts it into an ID by looking // it up in the vocabulary of `index`. An `AD_CONTRACT_CHECK` will fail if the From 602140103074db738f28def276471e48abade190 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Sun, 17 Nov 2024 15:24:18 +0100 Subject: [PATCH 06/38] Reduced usage of column copying in TextIndexScanForWord.cpp --- src/engine/TextIndexScanForWord.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/engine/TextIndexScanForWord.cpp b/src/engine/TextIndexScanForWord.cpp index cc37400817..e6a66d5bc6 100644 --- a/src/engine/TextIndexScanForWord.cpp +++ b/src/engine/TextIndexScanForWord.cpp @@ -19,12 +19,9 @@ ProtoResult TextIndexScanForWord::computeResult( word_, getExecutionContext()->getAllocator()); if (!isPrefix_) { - IdTable smallIdTable{getExecutionContext()->getAllocator()}; - smallIdTable.setNumColumns(2); - smallIdTable.resize(idTable.numRows()); - std::ranges::copy(idTable.getColumn(0), smallIdTable.getColumn(0).begin()); - std::ranges::copy(idTable.getColumn(2), smallIdTable.getColumn(1).begin()); - return {std::move(smallIdTable), resultSortedOn(), LocalVocab{}}; + ColumnIndex columnsToKeep[] = {0, 2}; + idTable.setColumnSubset(std::span{columnsToKeep}); + return {std::move(idTable), resultSortedOn(), LocalVocab{}}; } // Add details to the runtimeInfo. This is has no effect on the result. From ed9fbda49cf6a4e3caf39c17da5d62e26b55d2d7 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Wed, 4 Dec 2024 15:07:51 +0100 Subject: [PATCH 07/38] Changed the counting of nofNonLiterals to nofLiterals. Some methods are still unstable because of the way nofContexts are counted. Implemented new more refined tests. --- src/index/Index.cpp | 2 +- src/index/Index.h | 2 +- src/index/IndexImpl.Text.cpp | 3 +- src/index/IndexImpl.h | 2 +- src/index/TextMetaData.h | 8 +- test/engine/TextIndexScanForWordTest.cpp | 154 ++++++++++++++++------- test/engine/TextIndexScanTestHelpers.h | 28 +++-- 7 files changed, 135 insertions(+), 64 deletions(-) diff --git a/src/index/Index.cpp b/src/index/Index.cpp index a652b85bfc..c8d1b1b40f 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -233,7 +233,7 @@ size_t Index::getNofEntityPostings() const { } // ____________________________________________________________________________ -size_t Index::getNofNonLiterals() const { return pimpl_->getNofNonLiterals(); } +size_t Index::getNofLiterals() const { return pimpl_->getNofLiterals(); } // ____________________________________________________________________________ Index::NumNormalAndInternal Index::numDistinctSubjects() const { diff --git a/src/index/Index.h b/src/index/Index.h index 1fac924aca..d43d363d7d 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -214,7 +214,7 @@ class Index { size_t getNofTextRecords() const; size_t getNofWordPostings() const; size_t getNofEntityPostings() const; - size_t getNofNonLiterals() const; + size_t getNofLiterals() const; NumNormalAndInternal numDistinctSubjects() const; NumNormalAndInternal numDistinctObjects() const; diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp index 211e5e049d..1b1f69b9eb 100644 --- a/src/index/IndexImpl.Text.cpp +++ b/src/index/IndexImpl.Text.cpp @@ -235,6 +235,7 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, ad_utility::HashMap wordsInContext; ad_utility::HashMap entitiesInContext; auto currentContext = TextRecordIndex::make(0); + // The nofContexts can be misleading since it also counts empty contexts size_t nofContexts = 0; size_t nofWordPostings = 0; size_t nofEntityPostings = 0; @@ -298,7 +299,7 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, textMeta_.setNofTextRecords(nofContexts); textMeta_.setNofWordPostings(nofWordPostings); textMeta_.setNofEntityPostings(nofEntityPostings); - textMeta_.setNofNonLiterals(nofContexts - nofLiterals); + textMeta_.setNofLiterals(nofLiterals); writer.finish(); LOG(TRACE) << "END IndexImpl::passContextFileIntoVector" << std::endl; diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 6a350a4a6f..71b59654db 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -424,7 +424,7 @@ class IndexImpl { size_t getNofEntityPostings() const { return textMeta_.getNofEntityPostings(); } - size_t getNofNonLiterals() const { return textMeta_.getNofNonLiterals(); } + size_t getNofLiterals() const { return textMeta_.getNofLiterals(); } bool hasAllPermutations() const { return SPO().isLoaded(); } diff --git a/src/index/TextMetaData.h b/src/index/TextMetaData.h index 2198e052e4..2d45ce28d2 100644 --- a/src/index/TextMetaData.h +++ b/src/index/TextMetaData.h @@ -98,9 +98,9 @@ class TextMetaData { void setNofEntityPostings(size_t n) { _nofEntityPostings = n; } - size_t getNofNonLiterals() const { return _nofNonLiterals; } + size_t getNofLiterals() const { return _nofLiterals; } - void setNofNonLiterals(size_t n) { _nofNonLiterals = n; } + void setNofLiterals(size_t n) { _nofLiterals = n; } const string& getName() const { return _name; } @@ -113,7 +113,7 @@ class TextMetaData { size_t _nofTextRecords = 0; size_t _nofWordPostings = 0; size_t _nofEntityPostings = 0; - size_t _nofNonLiterals = 0; + size_t _nofLiterals = 0; string _name; vector _blocks; @@ -123,7 +123,7 @@ class TextMetaData { serializer | arg._nofTextRecords; serializer | arg._nofWordPostings; serializer | arg._nofEntityPostings; - serializer | arg._nofNonLiterals; + serializer | arg._nofLiterals; serializer | arg._name; serializer | arg._blocks; } diff --git a/test/engine/TextIndexScanForWordTest.cpp b/test/engine/TextIndexScanForWordTest.cpp index e421a8ca96..f3062b6ee1 100644 --- a/test/engine/TextIndexScanForWordTest.cpp +++ b/test/engine/TextIndexScanForWordTest.cpp @@ -24,50 +24,95 @@ std::string kg = "\"some other sentence\" .

\"the test on friday was really hard\" " ". . . ."; +std::string wordsFileContent = + "astronomer\t0\t1\t1\n" + "\t1\t1\t0\n" + "scientist\t0\t1\t1\n" + "field\t0\t1\t1\n" + "astronomy\t0\t1\t1\n" + "astronomer\t0\t2\t0\n" + "\t1\t2\t0\n" + ":s:firstsentence\t0\t2\t0\n" + "scientist\t0\t2\t0\n" + "field\t0\t2\t0\n" + "astronomy\t0\t2\t0\n" + "astronomy\t0\t3\t1\n" + "concentrates\t0\t3\t1\n" + "studies\t0\t3\t1\n" + "specific\t0\t3\t1\n" + "question\t0\t3\t1\n" + "outside\t0\t3\t1\n" + "scope\t0\t3\t1\n" + "earth\t0\t3\t1\n" + "astronomy\t0\t4\t1\n" + "concentrates\t0\t4\t1\n" + "studies\t0\t4\t1\n" + "field\t0\t4\t1\n" + "outside\t0\t4\t1\n" + "scope\t0\t4\t1\n" + "earth\t0\t4\t1\n" + "tester\t0\t5\t1\n" + "rockets\t0\t5\t1\n" + "astronomer\t0\t5\t1\n" + "\t1\t5\t0\n" + "although\t0\t5\t1\n" + "astronomer\t0\t6\t0\n" + "\t1\t6\t0\n" + "although\t0\t6\t0\n" + "\t1\t6\t0\n" + "space\t0\t6\t1\n" + "\t1\t7\t0\n" + "space\t0\t7\t0\n" + "earth\t0\t7\t1\n"; + +std::string docsFileContent = + "4\tAn astronomer is a scientist in the field of astronomy who " + "concentrates their studies on a specific question or field outside of " + "the scope of Earth.\n" + "7\tThe Tester of the rockets can be an astronomer too although they " + "might not be in space but on earth.\n"; + +std::string firstDocText = + "An astronomer is a scientist in the field of " + "astronomy who concentrates their studies on a " + "specific question or field outside of the scope of " + "Earth."; + +std::string secondDocText = + "The Tester of the rockets can be an astronomer " + "too although they might not be in space but on " + "earth."; + TEST(TextIndexScanForWord, WordScanPrefix) { - std::string wordsFileContent; - wordsFileContent = - "astronomer\t0\t1\t1\n" - "\t1\t1\t0\n" - "scientist\t0\t1\t1\n" - "field\t0\t1\t1\n" - "astronomy\t0\t1\t1\n" - "astronomer\t0\t2\t0\n" - "\t1\t2\t0\n" - ":s:firstsentence\t0\t2\t0\n" - "scientist\t0\t2\t0\n" - "field\t0\t2\t0\n" - "astronomy\t0\t2\t0\n" - "astronomy\t0\t3\t1\n" - "concentrates\t0\t3\t1\n" - "studies\t0\t3\t1\n" - "specific\t0\t3\t1\n" - "question\t0\t3\t1\n" - "outside\t0\t3\t1\n" - "scope\t0\t3\t1\n" - "earth\t0\t3\t1\n" - "astronomy\t0\t4\t1\n" - "concentrates\t0\t4\t1\n" - "studies\t0\t4\t1\n" - "field\t0\t4\t1\n" - "outside\t0\t4\t1\n" - "scope\t0\t4\t1\n" - "earth\t0\t4\t1\n"; - std::string docsFileContent; - docsFileContent = - "4\tAn astronomer is a scientist in the field of astronomy who " - "concentrates their studies on a specific question or field outside of " - "the scope of Earth.\n"; auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, docsFileContent); TextIndexScanForWord t1{qec, Variable{"?t1"}, "astronom*"}; auto tresult = t1.computeResultOnlyForTesting(); - ASSERT_EQ( - "An astronomer is a scientist in the field of astronomy who concentrates " - "their studies on a specific question or field outside of the scope of " - "Earth.", - h::getTextExcerptFromResultTable(qec, tresult, 0)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 0)); + ASSERT_EQ(TextRecordIndex::make(1), + h::getTextRecordIdFromResultTable(qec, tresult, 0)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 1)); + ASSERT_EQ(TextRecordIndex::make(1), + h::getTextRecordIdFromResultTable(qec, tresult, 1)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 2)); + ASSERT_EQ(TextRecordIndex::make(2), + h::getTextRecordIdFromResultTable(qec, tresult, 2)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 3)); + ASSERT_EQ(TextRecordIndex::make(2), + h::getTextRecordIdFromResultTable(qec, tresult, 3)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 4)); + ASSERT_EQ(TextRecordIndex::make(3), + h::getTextRecordIdFromResultTable(qec, tresult, 4)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 5)); + ASSERT_EQ(TextRecordIndex::make(4), + h::getTextRecordIdFromResultTable(qec, tresult, 5)); + ASSERT_EQ(secondDocText, h::getTextRecordFromResultTable(qec, tresult, 6)); + ASSERT_EQ(TextRecordIndex::make(5), + h::getTextRecordIdFromResultTable(qec, tresult, 6)); + ASSERT_EQ(secondDocText, h::getTextRecordFromResultTable(qec, tresult, 7)); + ASSERT_EQ(TextRecordIndex::make(6), + h::getTextRecordIdFromResultTable(qec, tresult, 7)); TextIndexScanForWord s1{qec, Variable{"?text1"}, "test*"}; TextIndexScanForWord s2{qec, Variable{"?text2"}, "test*"}; @@ -76,7 +121,7 @@ TEST(TextIndexScanForWord, WordScanPrefix) { auto result = s1.computeResultOnlyForTesting(); ASSERT_EQ(result.idTable().numColumns(), 3); - ASSERT_EQ(result.idTable().size(), 3); + ASSERT_EQ(result.idTable().size(), 4); s2.getExternallyVisibleVariableColumns(); using enum ColumnIndexAndTypeInfo::UndefStatus; @@ -87,20 +132,25 @@ TEST(TextIndexScanForWord, WordScanPrefix) { EXPECT_THAT(s2.getExternallyVisibleVariableColumns(), ::testing::UnorderedElementsAreArray(expectedVariables)); - ASSERT_EQ(h::combineToString("\"he failed the test\"", "test"), + ASSERT_EQ(h::combineToString(secondDocText, "tester"), h::combineToString(h::getTextRecordFromResultTable(qec, result, 0), h::getWordFromResultTable(qec, result, 0))); - ASSERT_EQ(h::combineToString("\"testing can help\"", "testing"), + + ASSERT_EQ(h::combineToString("\"he failed the test\"", "test"), h::combineToString(h::getTextRecordFromResultTable(qec, result, 1), h::getWordFromResultTable(qec, result, 1))); + ASSERT_EQ(h::combineToString("\"testing can help\"", "testing"), + h::combineToString(h::getTextRecordFromResultTable(qec, result, 2), + h::getWordFromResultTable(qec, result, 2))); ASSERT_EQ( h::combineToString("\"the test on friday was really hard\"", "test"), - h::combineToString(h::getTextRecordFromResultTable(qec, result, 2), - h::getWordFromResultTable(qec, result, 2))); + h::combineToString(h::getTextRecordFromResultTable(qec, result, 3), + h::getWordFromResultTable(qec, result, 3))); } TEST(TextIndexScanForWord, WordScanBasic) { - auto qec = getQec(kg, true, true, true, 16_B, true); + auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, + docsFileContent); TextIndexScanForWord s1{qec, Variable{"?text1"}, "test"}; @@ -125,10 +175,21 @@ TEST(TextIndexScanForWord, WordScanBasic) { ASSERT_EQ("\"testing can help\"", h::getTextRecordFromResultTable(qec, result, 0)); + + TextIndexScanForWord s3{qec, Variable{"?text1"}, "tester"}; + + ASSERT_EQ(s3.getResultWidth(), 2); + + result = s3.computeResultOnlyForTesting(); + ASSERT_EQ(result.idTable().numColumns(), 2); + ASSERT_EQ(result.idTable().size(), 1); + + ASSERT_EQ(secondDocText, h::getTextRecordFromResultTable(qec, result, 0)); } TEST(TextIndexScanForWord, CacheKey) { - auto qec = getQec(kg, true, true, true, 16_B, true); + auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, + docsFileContent); TextIndexScanForWord s1{qec, Variable{"?text1"}, "test*"}; TextIndexScanForWord s2{qec, Variable{"?text2"}, "test*"}; @@ -151,7 +212,8 @@ TEST(TextIndexScanForWord, CacheKey) { } TEST(TextIndexScanForWord, KnownEmpty) { - auto qec = getQec(kg, true, true, true, 16_B, true); + auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, + docsFileContent); TextIndexScanForWord s1{qec, Variable{"?text1"}, "nonExistentWord*"}; ASSERT_TRUE(s1.knownEmptyResult()); diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index 4b216d522b..c91a02a87c 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -4,6 +4,7 @@ #pragma once +#include "global/IndexTypes.h" namespace textIndexScanTestHelpers { // NOTE: this function exploits a "lucky accident" that allows us to // obtain the textRecord using indexToString. @@ -16,18 +17,25 @@ namespace textIndexScanTestHelpers { inline string getTextRecordFromResultTable(const QueryExecutionContext* qec, const ProtoResult& result, const size_t& rowIndex) { - uint64_t offset = qec->getIndex().getNofNonLiterals(); - uint64_t shiftedTextRecordId = - result.idTable().getColumn(0)[rowIndex].getTextRecordIndex().get() - - offset; - return qec->getIndex().indexToString(VocabIndex::make(shiftedTextRecordId)); + uint64_t nofLiterals = qec->getIndex().getNofLiterals(); + uint64_t nofContexts = qec->getIndex().getNofTextRecords(); + uint64_t textRecordIdFromTable = + result.idTable().getColumn(0)[rowIndex].getTextRecordIndex().get(); + if ((nofContexts - nofLiterals) <= textRecordIdFromTable) { + // Return when from Literals + return qec->getIndex().indexToString( + VocabIndex::make(textRecordIdFromTable - (nofContexts - nofLiterals))); + } else { + // Return when from DocsDB + return qec->getIndex().getTextExcerpt( + result.idTable().getColumn(0)[rowIndex].getTextRecordIndex()); + } } -inline string getTextExcerptFromResultTable(const QueryExecutionContext* qec, - const ProtoResult& result, - const size_t& rowIndex) { - return qec->getIndex().getTextExcerpt( - result.idTable().getColumn(0)[rowIndex].getTextRecordIndex()); +inline const TextRecordIndex getTextRecordIdFromResultTable( + [[maybe_unused]] const QueryExecutionContext* qec, + const ProtoResult& result, const size_t& rowIndex) { + return result.idTable().getColumn(0)[rowIndex].getTextRecordIndex(); } inline string getEntityFromResultTable(const QueryExecutionContext* qec, From 56ea53125c56b39274f799701014c19a269f16d6 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 5 Dec 2024 11:42:53 +0100 Subject: [PATCH 08/38] Cleaned up the filtering in TextIndexScanForWord::computeResult and commented it --- src/engine/TextIndexScanForWord.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/engine/TextIndexScanForWord.cpp b/src/engine/TextIndexScanForWord.cpp index e6a66d5bc6..7c3f931f8f 100644 --- a/src/engine/TextIndexScanForWord.cpp +++ b/src/engine/TextIndexScanForWord.cpp @@ -18,9 +18,11 @@ ProtoResult TextIndexScanForWord::computeResult( IdTable idTable = getExecutionContext()->getIndex().getWordPostingsForTerm( word_, getExecutionContext()->getAllocator()); + // This filters out the word column. When the searchword is a prefix this + // column shows the word the prefix got extended to if (!isPrefix_) { - ColumnIndex columnsToKeep[] = {0, 2}; - idTable.setColumnSubset(std::span{columnsToKeep}); + using CI = ColumnIndex; + idTable.setColumnSubset(std::array{CI{0}, CI{2}}); return {std::move(idTable), resultSortedOn(), LocalVocab{}}; } From e1e12e972467f4ff8025a9fc4f5e61fd21161b7d Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 5 Dec 2024 11:59:43 +0100 Subject: [PATCH 09/38] renamed nofLiterals to nofLiteralsInTextIndex --- src/index/Index.cpp | 4 +++- src/index/Index.h | 2 +- src/index/IndexImpl.Text.cpp | 2 +- src/index/IndexImpl.h | 4 +++- src/index/TextMetaData.h | 8 ++++---- test/engine/TextIndexScanTestHelpers.h | 2 +- 6 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/index/Index.cpp b/src/index/Index.cpp index c8d1b1b40f..fe11bf55f7 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -233,7 +233,9 @@ size_t Index::getNofEntityPostings() const { } // ____________________________________________________________________________ -size_t Index::getNofLiterals() const { return pimpl_->getNofLiterals(); } +size_t Index::getNofLiteralsInTextIndex() const { + return pimpl_->getNofLiteralsInTextIndex(); +} // ____________________________________________________________________________ Index::NumNormalAndInternal Index::numDistinctSubjects() const { diff --git a/src/index/Index.h b/src/index/Index.h index d43d363d7d..0288b15408 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -214,7 +214,7 @@ class Index { size_t getNofTextRecords() const; size_t getNofWordPostings() const; size_t getNofEntityPostings() const; - size_t getNofLiterals() const; + size_t getNofLiteralsInTextIndex() const; NumNormalAndInternal numDistinctSubjects() const; NumNormalAndInternal numDistinctObjects() const; diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp index 1b1f69b9eb..0c8fb29e22 100644 --- a/src/index/IndexImpl.Text.cpp +++ b/src/index/IndexImpl.Text.cpp @@ -299,7 +299,7 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, textMeta_.setNofTextRecords(nofContexts); textMeta_.setNofWordPostings(nofWordPostings); textMeta_.setNofEntityPostings(nofEntityPostings); - textMeta_.setNofLiterals(nofLiterals); + textMeta_.setNofLiteralsInTextIndex(nofLiterals); writer.finish(); LOG(TRACE) << "END IndexImpl::passContextFileIntoVector" << std::endl; diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 71b59654db..d12619d6df 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -424,7 +424,9 @@ class IndexImpl { size_t getNofEntityPostings() const { return textMeta_.getNofEntityPostings(); } - size_t getNofLiterals() const { return textMeta_.getNofLiterals(); } + size_t getNofLiteralsInTextIndex() const { + return textMeta_.getNofLiteralsInTextIndex(); + } bool hasAllPermutations() const { return SPO().isLoaded(); } diff --git a/src/index/TextMetaData.h b/src/index/TextMetaData.h index 2d45ce28d2..b15b5e9a96 100644 --- a/src/index/TextMetaData.h +++ b/src/index/TextMetaData.h @@ -98,9 +98,9 @@ class TextMetaData { void setNofEntityPostings(size_t n) { _nofEntityPostings = n; } - size_t getNofLiterals() const { return _nofLiterals; } + size_t getNofLiteralsInTextIndex() const { return _nofLiteralsInTextIndex; } - void setNofLiterals(size_t n) { _nofLiterals = n; } + void setNofLiteralsInTextIndex(size_t n) { _nofLiteralsInTextIndex = n; } const string& getName() const { return _name; } @@ -113,7 +113,7 @@ class TextMetaData { size_t _nofTextRecords = 0; size_t _nofWordPostings = 0; size_t _nofEntityPostings = 0; - size_t _nofLiterals = 0; + size_t _nofLiteralsInTextIndex = 0; string _name; vector _blocks; @@ -123,7 +123,7 @@ class TextMetaData { serializer | arg._nofTextRecords; serializer | arg._nofWordPostings; serializer | arg._nofEntityPostings; - serializer | arg._nofLiterals; + serializer | arg._nofLiteralsInTextIndex; serializer | arg._name; serializer | arg._blocks; } diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index c91a02a87c..80c9475608 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -17,7 +17,7 @@ namespace textIndexScanTestHelpers { inline string getTextRecordFromResultTable(const QueryExecutionContext* qec, const ProtoResult& result, const size_t& rowIndex) { - uint64_t nofLiterals = qec->getIndex().getNofLiterals(); + uint64_t nofLiterals = qec->getIndex().getNofLiteralsInTextIndex(); uint64_t nofContexts = qec->getIndex().getNofTextRecords(); uint64_t textRecordIdFromTable = result.idTable().getColumn(0)[rowIndex].getTextRecordIndex().get(); From 017588c300375c1052f9b2f5abd8d77b37a5fb36 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 5 Dec 2024 12:18:32 +0100 Subject: [PATCH 10/38] Removed redundant method getWordScoreVariable --- src/parser/data/Variable.cpp | 25 ------------------------- src/parser/data/Variable.h | 4 ---- 2 files changed, 29 deletions(-) diff --git a/src/parser/data/Variable.cpp b/src/parser/data/Variable.cpp index 5195e48f66..cd41fb3b42 100644 --- a/src/parser/data/Variable.cpp +++ b/src/parser/data/Variable.cpp @@ -79,31 +79,6 @@ Variable Variable::getEntityScoreVariable( absl::StrCat(SCORE_VARIABLE_PREFIX, name().substr(1), type, entity)}; } -// _____________________________________________________________________________ -Variable Variable::getWordScoreVariable(const std::string& word, - bool isPrefix) const { - std::string_view type; - std::string_view wordToConvert; - std::string convertedWord; - if (isPrefix) { - wordToConvert = std::string_view(word.data(), word.size() - 1); - type = "prefix_"; - } else { - wordToConvert = std::string_view(word); - type = "word_"; - } - convertedWord += "_"; - for (char c : wordToConvert) { - if (isalpha(static_cast(c))) { - convertedWord += c; - } else { - absl::StrAppend(&convertedWord, "_", std::to_string(c), "_"); - } - } - return Variable{absl::StrCat(SCORE_VARIABLE_PREFIX, type, name().substr(1), - convertedWord)}; -} - // _____________________________________________________________________________ Variable Variable::getWordScoreVariable(std::string_view word, bool isPrefix) const { diff --git a/src/parser/data/Variable.h b/src/parser/data/Variable.h index fb46abd384..e3ef49136b 100644 --- a/src/parser/data/Variable.h +++ b/src/parser/data/Variable.h @@ -57,10 +57,6 @@ class Variable { // `?ql_score_prefix_someTextVar_somePrefix`. // Note that if the word contains non ascii characters they are converted to // numbers and escaped. - Variable getWordScoreVariable(const std::string& word, bool isPrefix) const; - - // Does the same thing as the function with std::string& param only for - // std::string_view Variable getWordScoreVariable(std::string_view word, bool isPrefix) const; // Convert `?someVariable` into `?ql_matchingword_someVariable_someTerm` From 46666d0bd4d35d1e5c5f9c886d0e264a1c8750d5 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 5 Dec 2024 12:49:12 +0100 Subject: [PATCH 11/38] added method appendEscapedWord to escape special chars in Variables --- src/parser/data/Variable.cpp | 30 ++++++++++++++---------------- src/parser/data/Variable.h | 4 ++++ 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/parser/data/Variable.cpp b/src/parser/data/Variable.cpp index cd41fb3b42..00371c3537 100644 --- a/src/parser/data/Variable.cpp +++ b/src/parser/data/Variable.cpp @@ -65,15 +65,7 @@ Variable Variable::getEntityScoreVariable( entity = std::get(varOrEntity).name().substr(1); } else { type = "_fixedEntity_"; - // Converts input string to unambiguous result string not containing any - // special characters. "_" is used as an escaping character. - for (char c : std::get(varOrEntity)) { - if (isalpha(static_cast(c))) { - entity += c; - } else { - absl::StrAppend(&entity, "_", std::to_string(c), "_"); - } - } + appendEscapedWord(std::get(varOrEntity), entity); } return Variable{ absl::StrCat(SCORE_VARIABLE_PREFIX, name().substr(1), type, entity)}; @@ -91,13 +83,7 @@ Variable Variable::getWordScoreVariable(std::string_view word, type = "word_"; } convertedWord = "_"; - for (char c : word) { - if (isalpha(static_cast(c))) { - convertedWord += c; - } else { - absl::StrAppend(&convertedWord, "_", std::to_string(c), "_"); - } - } + appendEscapedWord(word, convertedWord); return Variable{absl::StrCat(SCORE_VARIABLE_PREFIX, type, name().substr(1), convertedWord)}; } @@ -119,3 +105,15 @@ bool Variable::isValidVariableName(std::string_view var) { return false; } } + +// _____________________________________________________________________________ +void Variable::appendEscapedWord(std::string_view word, + std::string& target) const { + for (char c : word) { + if (isalpha(static_cast(c))) { + target += c; + } else { + absl::StrAppend(&target, "_", std::to_string(c), "_"); + } + } +} diff --git a/src/parser/data/Variable.h b/src/parser/data/Variable.h index e3ef49136b..5d89d21aac 100644 --- a/src/parser/data/Variable.h +++ b/src/parser/data/Variable.h @@ -80,4 +80,8 @@ class Variable { } static bool isValidVariableName(std::string_view var); + + // The method escapes all special chars in word to "_ASCIICODE_" and appends + // it at the end of target + void appendEscapedWord(std::string_view word, std::string& target) const; }; From f36f18935fdb40d32b097df1f8e08ae3b5f74eea Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 5 Dec 2024 13:57:52 +0100 Subject: [PATCH 12/38] Added two function in the TextIndexScanTestHelpers.h to add content to the wordsFileContent and docsFileContent strings. Now you can clearly see what lines are added and can writing tests is cleaner --- test/engine/TextIndexScanForWordTest.cpp | 89 ++++++++++++------------ test/engine/TextIndexScanTestHelpers.h | 14 ++++ 2 files changed, 57 insertions(+), 46 deletions(-) diff --git a/test/engine/TextIndexScanForWordTest.cpp b/test/engine/TextIndexScanForWordTest.cpp index f3062b6ee1..597e95aa8e 100644 --- a/test/engine/TextIndexScanForWordTest.cpp +++ b/test/engine/TextIndexScanForWordTest.cpp @@ -19,58 +19,52 @@ using ad_utility::source_location; namespace h = textIndexScanTestHelpers; namespace { + std::string kg = "

\"he failed the test\" .

\"testing can help\" .

" "\"some other sentence\" .

\"the test on friday was really hard\" " ". . . ."; std::string wordsFileContent = - "astronomer\t0\t1\t1\n" - "\t1\t1\t0\n" - "scientist\t0\t1\t1\n" - "field\t0\t1\t1\n" - "astronomy\t0\t1\t1\n" - "astronomer\t0\t2\t0\n" - "\t1\t2\t0\n" - ":s:firstsentence\t0\t2\t0\n" - "scientist\t0\t2\t0\n" - "field\t0\t2\t0\n" - "astronomy\t0\t2\t0\n" - "astronomy\t0\t3\t1\n" - "concentrates\t0\t3\t1\n" - "studies\t0\t3\t1\n" - "specific\t0\t3\t1\n" - "question\t0\t3\t1\n" - "outside\t0\t3\t1\n" - "scope\t0\t3\t1\n" - "earth\t0\t3\t1\n" - "astronomy\t0\t4\t1\n" - "concentrates\t0\t4\t1\n" - "studies\t0\t4\t1\n" - "field\t0\t4\t1\n" - "outside\t0\t4\t1\n" - "scope\t0\t4\t1\n" - "earth\t0\t4\t1\n" - "tester\t0\t5\t1\n" - "rockets\t0\t5\t1\n" - "astronomer\t0\t5\t1\n" - "\t1\t5\t0\n" - "although\t0\t5\t1\n" - "astronomer\t0\t6\t0\n" - "\t1\t6\t0\n" - "although\t0\t6\t0\n" - "\t1\t6\t0\n" - "space\t0\t6\t1\n" - "\t1\t7\t0\n" - "space\t0\t7\t0\n" - "earth\t0\t7\t1\n"; - -std::string docsFileContent = - "4\tAn astronomer is a scientist in the field of astronomy who " - "concentrates their studies on a specific question or field outside of " - "the scope of Earth.\n" - "7\tThe Tester of the rockets can be an astronomer too although they " - "might not be in space but on earth.\n"; + h::createWordsFileLine("astronomer", false, 1, 1) + + h::createWordsFileLine("", true, 1, 0) + + h::createWordsFileLine("scientist", false, 1, 1) + + h::createWordsFileLine("field", false, 1, 1) + + h::createWordsFileLine("astronomy", false, 1, 1) + + h::createWordsFileLine("astronomer", false, 2, 0) + + h::createWordsFileLine("", true, 2, 0) + + h::createWordsFileLine(":s:firstsentence", false, 2, 0) + + h::createWordsFileLine("scientist", false, 2, 0) + + h::createWordsFileLine("field", false, 2, 0) + + h::createWordsFileLine("astronomy", false, 2, 0) + + h::createWordsFileLine("astronomy", false, 3, 1) + + h::createWordsFileLine("concentrates", false, 3, 1) + + h::createWordsFileLine("studies", false, 3, 1) + + h::createWordsFileLine("specific", false, 3, 1) + + h::createWordsFileLine("question", false, 3, 1) + + h::createWordsFileLine("outside", false, 3, 1) + + h::createWordsFileLine("scope", false, 3, 1) + + h::createWordsFileLine("earth", false, 3, 1) + + h::createWordsFileLine("astronomy", false, 4, 1) + + h::createWordsFileLine("concentrates", false, 4, 1) + + h::createWordsFileLine("studies", false, 4, 1) + + h::createWordsFileLine("field", false, 4, 1) + + h::createWordsFileLine("outside", false, 4, 1) + + h::createWordsFileLine("scope", false, 4, 1) + + h::createWordsFileLine("earth", false, 4, 1) + + h::createWordsFileLine("tester", false, 5, 1) + + h::createWordsFileLine("rockets", false, 5, 1) + + h::createWordsFileLine("astronomer", false, 5, 1) + + h::createWordsFileLine("", true, 5, 0) + + h::createWordsFileLine("although", false, 5, 1) + + h::createWordsFileLine("astronomer", false, 6, 0) + + h::createWordsFileLine("", true, 6, 0) + + h::createWordsFileLine("although", false, 6, 0) + + h::createWordsFileLine("", true, 6, 0) + + h::createWordsFileLine("space", false, 6, 1) + + h::createWordsFileLine("", true, 7, 0) + + h::createWordsFileLine("space", false, 7, 0) + + h::createWordsFileLine("earth", false, 7, 1); std::string firstDocText = "An astronomer is a scientist in the field of " @@ -83,6 +77,9 @@ std::string secondDocText = "too although they might not be in space but on " "earth."; +std::string docsFileContent = h::createDocsFileLine(4, firstDocText) + + h::createDocsFileLine(7, secondDocText); + TEST(TextIndexScanForWord, WordScanPrefix) { auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, docsFileContent); diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index 80c9475608..6c9d897514 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -57,4 +57,18 @@ inline string combineToString(const string& text, const string& word) { ss << "Text: " << text << ", Word: " << word << std::endl; return ss.str(); } + +std::string inlineSeperator = "\t"; +std::string lineSeperator = "\n"; + +inline string createWordsFileLine(std::string word, bool isEntity, + size_t contextId, size_t score) { + return word + inlineSeperator + (isEntity ? "1" : "0") + inlineSeperator + + std::to_string(contextId) + inlineSeperator + std::to_string(score) + + lineSeperator; +}; + +inline string createDocsFileLine(size_t docId, std::string docContent) { + return std::to_string(docId) + inlineSeperator + docContent + lineSeperator; +}; } // namespace textIndexScanTestHelpers From c62a7e682870de46000563e9fee2d1820501dadb Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 5 Dec 2024 14:37:29 +0100 Subject: [PATCH 13/38] Added tests for Scores. Also commented tests and refined them --- test/engine/TextIndexScanForWordTest.cpp | 79 ++++++++++++++++-------- test/engine/TextIndexScanTestHelpers.h | 10 +++ 2 files changed, 62 insertions(+), 27 deletions(-) diff --git a/test/engine/TextIndexScanForWordTest.cpp b/test/engine/TextIndexScanForWordTest.cpp index 597e95aa8e..7e9b0c0fd9 100644 --- a/test/engine/TextIndexScanForWordTest.cpp +++ b/test/engine/TextIndexScanForWordTest.cpp @@ -84,36 +84,10 @@ TEST(TextIndexScanForWord, WordScanPrefix) { auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, docsFileContent); - TextIndexScanForWord t1{qec, Variable{"?t1"}, "astronom*"}; - auto tresult = t1.computeResultOnlyForTesting(); - ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 0)); - ASSERT_EQ(TextRecordIndex::make(1), - h::getTextRecordIdFromResultTable(qec, tresult, 0)); - ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 1)); - ASSERT_EQ(TextRecordIndex::make(1), - h::getTextRecordIdFromResultTable(qec, tresult, 1)); - ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 2)); - ASSERT_EQ(TextRecordIndex::make(2), - h::getTextRecordIdFromResultTable(qec, tresult, 2)); - ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 3)); - ASSERT_EQ(TextRecordIndex::make(2), - h::getTextRecordIdFromResultTable(qec, tresult, 3)); - ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 4)); - ASSERT_EQ(TextRecordIndex::make(3), - h::getTextRecordIdFromResultTable(qec, tresult, 4)); - ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 5)); - ASSERT_EQ(TextRecordIndex::make(4), - h::getTextRecordIdFromResultTable(qec, tresult, 5)); - ASSERT_EQ(secondDocText, h::getTextRecordFromResultTable(qec, tresult, 6)); - ASSERT_EQ(TextRecordIndex::make(5), - h::getTextRecordIdFromResultTable(qec, tresult, 6)); - ASSERT_EQ(secondDocText, h::getTextRecordFromResultTable(qec, tresult, 7)); - ASSERT_EQ(TextRecordIndex::make(6), - h::getTextRecordIdFromResultTable(qec, tresult, 7)); - TextIndexScanForWord s1{qec, Variable{"?text1"}, "test*"}; TextIndexScanForWord s2{qec, Variable{"?text2"}, "test*"}; + // Test if size calculations are right ASSERT_EQ(s1.getResultWidth(), 3); auto result = s1.computeResultOnlyForTesting(); @@ -121,6 +95,7 @@ TEST(TextIndexScanForWord, WordScanPrefix) { ASSERT_EQ(result.idTable().size(), 4); s2.getExternallyVisibleVariableColumns(); + // Test if all columns are there and correct using enum ColumnIndexAndTypeInfo::UndefStatus; VariableToColumnMap expectedVariables{ {Variable{"?text2"}, {0, AlwaysDefined}}, @@ -129,6 +104,8 @@ TEST(TextIndexScanForWord, WordScanPrefix) { EXPECT_THAT(s2.getExternallyVisibleVariableColumns(), ::testing::UnorderedElementsAreArray(expectedVariables)); + // Tests if the correct texts are retrieved from a mix of non literal and + // literal texts ASSERT_EQ(h::combineToString(secondDocText, "tester"), h::combineToString(h::getTextRecordFromResultTable(qec, result, 0), h::getWordFromResultTable(qec, result, 0))); @@ -143,6 +120,54 @@ TEST(TextIndexScanForWord, WordScanPrefix) { h::combineToString("\"the test on friday was really hard\"", "test"), h::combineToString(h::getTextRecordFromResultTable(qec, result, 3), h::getWordFromResultTable(qec, result, 3))); + + // Tests if the correct texts are retrieved from the non literal texts + TextIndexScanForWord t1{qec, Variable{"?t1"}, "astronom*"}; + auto tresult = t1.computeResultOnlyForTesting(); + ASSERT_EQ(TextRecordIndex::make(1), + h::getTextRecordIdFromResultTable(qec, tresult, 0)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 0)); + ASSERT_EQ(TextRecordIndex::make(1), + h::getTextRecordIdFromResultTable(qec, tresult, 1)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 1)); + ASSERT_EQ(TextRecordIndex::make(2), + h::getTextRecordIdFromResultTable(qec, tresult, 2)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 2)); + ASSERT_EQ(TextRecordIndex::make(2), + h::getTextRecordIdFromResultTable(qec, tresult, 3)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 3)); + ASSERT_EQ(TextRecordIndex::make(3), + h::getTextRecordIdFromResultTable(qec, tresult, 4)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 4)); + ASSERT_EQ(TextRecordIndex::make(4), + h::getTextRecordIdFromResultTable(qec, tresult, 5)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 5)); + ASSERT_EQ(TextRecordIndex::make(5), + h::getTextRecordIdFromResultTable(qec, tresult, 6)); + ASSERT_EQ(secondDocText, h::getTextRecordFromResultTable(qec, tresult, 6)); + ASSERT_EQ(TextRecordIndex::make(6), + h::getTextRecordIdFromResultTable(qec, tresult, 7)); + ASSERT_EQ(secondDocText, h::getTextRecordFromResultTable(qec, tresult, 7)); + + // Tests if correct words are deducted from prefix + ASSERT_EQ("astronomer", h::getWordFromResultTable(qec, tresult, 0)); + ASSERT_EQ("astronomy", h::getWordFromResultTable(qec, tresult, 1)); + ASSERT_EQ("astronomer", h::getWordFromResultTable(qec, tresult, 2)); + ASSERT_EQ("astronomy", h::getWordFromResultTable(qec, tresult, 3)); + ASSERT_EQ("astronomy", h::getWordFromResultTable(qec, tresult, 4)); + ASSERT_EQ("astronomy", h::getWordFromResultTable(qec, tresult, 5)); + ASSERT_EQ("astronomer", h::getWordFromResultTable(qec, tresult, 6)); + ASSERT_EQ("astronomer", h::getWordFromResultTable(qec, tresult, 7)); + + // Tests if the correct scores are retrieved from the non literal texts + ASSERT_EQ(1, h::getScoreFromResultTable(qec, tresult, 0, true)); + ASSERT_EQ(1, h::getScoreFromResultTable(qec, tresult, 1, true)); + ASSERT_EQ(0, h::getScoreFromResultTable(qec, tresult, 2, true)); + ASSERT_EQ(0, h::getScoreFromResultTable(qec, tresult, 3, true)); + ASSERT_EQ(1, h::getScoreFromResultTable(qec, tresult, 4, true)); + ASSERT_EQ(1, h::getScoreFromResultTable(qec, tresult, 5, true)); + ASSERT_EQ(1, h::getScoreFromResultTable(qec, tresult, 6, true)); + ASSERT_EQ(0, h::getScoreFromResultTable(qec, tresult, 7, true)); } TEST(TextIndexScanForWord, WordScanBasic) { diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index 6c9d897514..d1bfe0d2fc 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -38,6 +38,7 @@ inline const TextRecordIndex getTextRecordIdFromResultTable( return result.idTable().getColumn(0)[rowIndex].getTextRecordIndex(); } +// Only use on prefix search results inline string getEntityFromResultTable(const QueryExecutionContext* qec, const ProtoResult& result, const size_t& rowIndex) { @@ -45,6 +46,7 @@ inline string getEntityFromResultTable(const QueryExecutionContext* qec, result.idTable().getColumn(1)[rowIndex].getVocabIndex()); } +// Only use on prefix search results inline string getWordFromResultTable(const QueryExecutionContext* qec, const ProtoResult& result, const size_t& rowIndex) { @@ -52,6 +54,14 @@ inline string getWordFromResultTable(const QueryExecutionContext* qec, result.idTable().getColumn(1)[rowIndex].getWordVocabIndex())}; } +inline size_t getScoreFromResultTable( + [[maybe_unused]] const QueryExecutionContext* qec, + const ProtoResult& result, const size_t& rowIndex, bool wasPrefixSearch) { + size_t colToRetrieve = wasPrefixSearch ? 2 : 1; + return static_cast( + result.idTable().getColumn(colToRetrieve)[rowIndex].getInt()); +} + inline string combineToString(const string& text, const string& word) { std::stringstream ss; ss << "Text: " << text << ", Word: " << word << std::endl; From 89f0b2788e9779bd7b4fe9194878b926ead10dbf Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 5 Dec 2024 15:03:41 +0100 Subject: [PATCH 14/38] Changed the getQec function and the respective makeTestIndex to take in the wordsFileContent and docsFileContent as pair contentsOfWordsFileAndDocsFile --- test/engine/TextIndexScanForWordTest.cpp | 19 +++++---- test/util/IndexTestHelpers.cpp | 49 ++++++++++++------------ test/util/IndexTestHelpers.h | 8 ++-- 3 files changed, 39 insertions(+), 37 deletions(-) diff --git a/test/engine/TextIndexScanForWordTest.cpp b/test/engine/TextIndexScanForWordTest.cpp index 7e9b0c0fd9..eac3cb0d2f 100644 --- a/test/engine/TextIndexScanForWordTest.cpp +++ b/test/engine/TextIndexScanForWordTest.cpp @@ -80,9 +80,12 @@ std::string secondDocText = std::string docsFileContent = h::createDocsFileLine(4, firstDocText) + h::createDocsFileLine(7, secondDocText); +std::pair contentsOfWordsFileAndDocsFile = { + wordsFileContent, docsFileContent}; + TEST(TextIndexScanForWord, WordScanPrefix) { - auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, - docsFileContent); + auto qec = getQec(kg, true, true, true, 16_B, true, true, + contentsOfWordsFileAndDocsFile); TextIndexScanForWord s1{qec, Variable{"?text1"}, "test*"}; TextIndexScanForWord s2{qec, Variable{"?text2"}, "test*"}; @@ -171,8 +174,8 @@ TEST(TextIndexScanForWord, WordScanPrefix) { } TEST(TextIndexScanForWord, WordScanBasic) { - auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, - docsFileContent); + auto qec = getQec(kg, true, true, true, 16_B, true, true, + contentsOfWordsFileAndDocsFile); TextIndexScanForWord s1{qec, Variable{"?text1"}, "test"}; @@ -210,8 +213,8 @@ TEST(TextIndexScanForWord, WordScanBasic) { } TEST(TextIndexScanForWord, CacheKey) { - auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, - docsFileContent); + auto qec = getQec(kg, true, true, true, 16_B, true, true, + contentsOfWordsFileAndDocsFile); TextIndexScanForWord s1{qec, Variable{"?text1"}, "test*"}; TextIndexScanForWord s2{qec, Variable{"?text2"}, "test*"}; @@ -234,8 +237,8 @@ TEST(TextIndexScanForWord, CacheKey) { } TEST(TextIndexScanForWord, KnownEmpty) { - auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, - docsFileContent); + auto qec = getQec(kg, true, true, true, 16_B, true, true, + contentsOfWordsFileAndDocsFile); TextIndexScanForWord s1{qec, Variable{"?text1"}, "nonExistentWord*"}; ASSERT_TRUE(s1.knownEmptyResult()); diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp index 72d1016b50..0dcfd334a6 100644 --- a/test/util/IndexTestHelpers.cpp +++ b/test/util/IndexTestHelpers.cpp @@ -140,8 +140,8 @@ Index makeTestIndex(const std::string& indexBasename, [[maybe_unused]] bool usePrefixCompression, ad_utility::MemorySize blocksizePermutations, bool createTextIndex, bool addWordsFromLiterals, - std::optional wordsFileContent, - std::optional docsFileContent) { + std::optional> + contentsOfWordsFileAndDocsFile) { // Ignore the (irrelevant) log output of the index building and loading during // these tests. static std::ostringstream ignoreLogStream; @@ -188,15 +188,15 @@ Index makeTestIndex(const std::string& indexBasename, std::nullopt}; index.createFromFiles({spec}); if (createTextIndex) { - if (wordsFileContent.has_value() && docsFileContent.has_value()) { + if (contentsOfWordsFileAndDocsFile.has_value()) { // Create and write to words- and docsfile to later build a full text // index from them ad_utility::File wordsFile(indexBasename + ".wordsfile", "w"); ad_utility::File docsFile(indexBasename + ".docsfile", "w"); - wordsFile.write(wordsFileContent.value().c_str(), - wordsFileContent.value().size()); - docsFile.write(docsFileContent.value().c_str(), - docsFileContent.value().size()); + wordsFile.write(contentsOfWordsFileAndDocsFile.value().first.c_str(), + contentsOfWordsFileAndDocsFile.value().first.size()); + docsFile.write(contentsOfWordsFileAndDocsFile.value().second.c_str(), + contentsOfWordsFileAndDocsFile.value().second.size()); wordsFile.close(); docsFile.close(); index.setKbName(indexBasename); @@ -246,8 +246,8 @@ QueryExecutionContext* getQec(std::optional turtleInput, bool usePrefixCompression, ad_utility::MemorySize blocksizePermutations, bool createTextIndex, bool addWordsFromLiterals, - std::optional wordsFileContent, - std::optional docsFileContent) { + std::optional> + contentsOfWordsFileAndDocsFile) { // Similar to `absl::Cleanup`. Calls the `callback_` in the destructor, but // the callback is stored as a `std::function`, which allows to store // different types of callbacks in the same wrapper type. @@ -294,22 +294,21 @@ QueryExecutionContext* getQec(std::optional turtleInput, std::string testIndexBasename = "_staticGlobalTestIndex" + std::to_string(contextMap.size()); contextMap.emplace( - key, - Context{TypeErasedCleanup{[testIndexBasename]() { - for (const std::string& indexFilename : - getAllIndexFilenames(testIndexBasename)) { - // Don't log when a file can't be deleted, - // because the logging might already be - // destroyed. - ad_utility::deleteFile(indexFilename, false); - } - }}, - std::make_unique(makeTestIndex( - testIndexBasename, turtleInput, loadAllPermutations, - usePatterns, usePrefixCompression, blocksizePermutations, - createTextIndex, addWordsFromLiterals, wordsFileContent, - docsFileContent)), - std::make_unique()}); + key, Context{TypeErasedCleanup{[testIndexBasename]() { + for (const std::string& indexFilename : + getAllIndexFilenames(testIndexBasename)) { + // Don't log when a file can't be deleted, + // because the logging might already be + // destroyed. + ad_utility::deleteFile(indexFilename, false); + } + }}, + std::make_unique(makeTestIndex( + testIndexBasename, turtleInput, loadAllPermutations, + usePatterns, usePrefixCompression, + blocksizePermutations, createTextIndex, + addWordsFromLiterals, contentsOfWordsFileAndDocsFile)), + std::make_unique()}); } auto* qec = contextMap.at(key).qec_.get(); qec->getIndex().getImpl().setGlobalIndexAndComparatorOnlyForTesting(); diff --git a/test/util/IndexTestHelpers.h b/test/util/IndexTestHelpers.h index 6bbe0c9195..cbbd5ea486 100644 --- a/test/util/IndexTestHelpers.h +++ b/test/util/IndexTestHelpers.h @@ -46,8 +46,8 @@ Index makeTestIndex(const std::string& indexBasename, ad_utility::MemorySize blocksizePermutations = 16_B, bool createTextIndex = false, bool addWordsFromLiterals = true, - std::optional wordsFileContent = std::nullopt, - std::optional docsFileContent = std::nullopt); + std::optional> + contentsOfWordsFileAndDocsfile = std::nullopt); // Return a static `QueryExecutionContext` that refers to an index that was // build using `makeTestIndex` (see above). The index (most notably its @@ -59,8 +59,8 @@ QueryExecutionContext* getQec( bool usePrefixCompression = true, ad_utility::MemorySize blocksizePermutations = 16_B, bool createTextIndex = false, bool addWordsFromLiterals = true, - std::optional wordsFileContent = std::nullopt, - std::optional docsFileContent = std::nullopt); + std::optional> + contentsOfWordsFileAndDocsfile = std::nullopt); // Return a lambda that takes a string and converts it into an ID by looking // it up in the vocabulary of `index`. An `AD_CONTRACT_CHECK` will fail if the From e8bf56e4f54ef642d64765141c80eee083cb4730 Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Thu, 12 Dec 2024 14:11:49 +0100 Subject: [PATCH 15/38] Fix the multiple definition error. Signed-off-by: Johannes Kalmbach --- test/engine/TextIndexScanTestHelpers.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index d1bfe0d2fc..a0107ffe83 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -68,17 +68,17 @@ inline string combineToString(const string& text, const string& word) { return ss.str(); } -std::string inlineSeperator = "\t"; -std::string lineSeperator = "\n"; +inline std::string inlineSeparator = "\t"; +inline std::string lineSeparator = "\n"; inline string createWordsFileLine(std::string word, bool isEntity, size_t contextId, size_t score) { - return word + inlineSeperator + (isEntity ? "1" : "0") + inlineSeperator + - std::to_string(contextId) + inlineSeperator + std::to_string(score) + - lineSeperator; + return word + inlineSeparator + (isEntity ? "1" : "0") + inlineSeparator + + std::to_string(contextId) + inlineSeparator + std::to_string(score) + + lineSeparator; }; inline string createDocsFileLine(size_t docId, std::string docContent) { - return std::to_string(docId) + inlineSeperator + docContent + lineSeperator; + return std::to_string(docId) + inlineSeparator + docContent + lineSeparator; }; } // namespace textIndexScanTestHelpers From 4a159948af0b56472898e34392754141c68e07cf Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Thu, 12 Dec 2024 15:16:18 +0100 Subject: [PATCH 16/38] Make query planning of index scans fast again (#1674) Since #1619, the size estimate for an index scan always involved one or several copies of the block metadata, which incurred a significant query planning cost for most queries. Now, such a copy is only made for an index scan followed by a `FILTER` and only the metadata of those blocks is copied, which remain after the `FILTER` (in which case the two operations are expensive anyway). --- src/engine/IndexScan.cpp | 32 +++++++++++++++++--------------- src/engine/IndexScan.h | 9 ++++----- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index f56123a42b..5bf47dd4c8 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -339,24 +339,26 @@ IndexScan::getBlockMetadata() const { // _____________________________________________________________________________ std::optional> IndexScan::getBlockMetadataOptionallyPrefiltered() const { + // The code after this is expensive because it always copies the complete + // block metadata, so we do an early return of `nullopt` (which means "use all + // the blocks") if no prefilter is specified. + if (!prefilter_.has_value()) { + return std::nullopt; + } auto optBlockSpan = getBlockMetadata(); - std::optional> optBlocks = std::nullopt; - if (optBlockSpan.has_value()) { - const auto& blockSpan = optBlockSpan.value(); - optBlocks = {blockSpan.begin(), blockSpan.end()}; - applyPrefilterIfPossible(optBlocks.value()); + if (!optBlockSpan.has_value()) { + return std::nullopt; } - return optBlocks; + return applyPrefilter(optBlockSpan.value()); } // _____________________________________________________________________________ -void IndexScan::applyPrefilterIfPossible( - std::vector& blocks) const { - if (prefilter_.has_value()) { - // Apply the prefilter on given blocks. - auto& [prefilterExpr, columnIndex] = prefilter_.value(); - blocks = prefilterExpr->evaluate(blocks, columnIndex); - } +std::vector IndexScan::applyPrefilter( + std::span blocks) const { + AD_CORRECTNESS_CHECK(prefilter_.has_value() && getLimit().isUnconstrained()); + // Apply the prefilter on given blocks. + auto& [prefilterExpr, columnIndex] = prefilter_.value(); + return prefilterExpr->evaluate(blocks, columnIndex); } // _____________________________________________________________________________ @@ -369,12 +371,12 @@ Permutation::IdTableGenerator IndexScan::getLazyScan( auto filteredBlocks = getLimit().isUnconstrained() ? std::optional(std::move(blocks)) : std::nullopt; - if (filteredBlocks.has_value()) { + if (filteredBlocks.has_value() && prefilter_.has_value()) { // Note: The prefilter expression applied with applyPrefilterIfPossible() // is not related to the prefilter procedure mentioned in the comment above. // If this IndexScan owns a pair, it can // be applied. - applyPrefilterIfPossible(filteredBlocks.value()); + filteredBlocks = applyPrefilter(filteredBlocks.value()); } return getScanPermutation().lazyScan(getScanSpecification(), filteredBlocks, additionalColumns(), cancellationHandle_, diff --git a/src/engine/IndexScan.h b/src/engine/IndexScan.h index c10680f59e..d778260efe 100644 --- a/src/engine/IndexScan.h +++ b/src/engine/IndexScan.h @@ -223,11 +223,10 @@ class IndexScan final : public Operation { std::optional> getBlockMetadataOptionallyPrefiltered() const; - // If `isUnconstrained()` yields true, return the blocks as given or the - // prefiltered blocks (if `prefilter_` has value). If `isUnconstrained()` is - // false, return `std::nullopt`. - void applyPrefilterIfPossible( - std::vector& blocks) const; + // Apply the `prefilter_` to the `blocks`. May only be called if the limit is + // unconstrained, and a `prefilter_` exists. + std::vector applyPrefilter( + std::span blocks) const; // Helper functions for the public `getLazyScanFor...` methods and // `chunkedIndexScan` (see above). From 70964d6e5aaaf0d5f507c384c78933327865342a Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Thu, 12 Dec 2024 18:54:24 +0100 Subject: [PATCH 17/38] Allow operations to not store their result in the cache (#1665) Each operation now has a `bool` that determines whether the results can be stored in the cache or not (whether it is actually stored depends on other circumstances, like the available cache size). That `bool` does not have to be fixed when the operation is created, but can be changed. For example, this is useful for index scans that only return a subset of their full result (because of another constraining operation, like a join or a filter). --- src/engine/Operation.cpp | 39 ++++++++++++++++++++++++++++++------ src/engine/Operation.h | 38 +++++++++++++++++------------------ src/util/ConcurrentCache.h | 22 ++++++++++++++++++++ test/ConcurrentCacheTest.cpp | 36 +++++++++++++++++++++++++++++++++ test/OperationTest.cpp | 37 ++++++++++++++++++++++++++++++++++ 5 files changed, 147 insertions(+), 25 deletions(-) diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index e4d7136527..1a9f53fa76 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -232,7 +232,8 @@ CacheValue Operation::runComputationAndPrepareForCache( auto maxSize = std::min(RuntimeParameters().get<"lazy-result-max-cache-size">(), cache.getMaxSizeSingleEntry()); - if (!result.isFullyMaterialized() && !unlikelyToFitInCache(maxSize)) { + if (canResultBeCached() && !result.isFullyMaterialized() && + !unlikelyToFitInCache(maxSize)) { AD_CONTRACT_CHECK(!pinned); result.cacheDuringConsumption( [maxSize]( @@ -316,11 +317,16 @@ std::shared_ptr Operation::getResult( bool onlyReadFromCache = computationMode == ComputationMode::ONLY_IF_CACHED; - auto result = - pinResult ? cache.computeOncePinned(cacheKey, cacheSetup, - onlyReadFromCache, suitedForCache) - : cache.computeOnce(cacheKey, cacheSetup, onlyReadFromCache, - suitedForCache); + auto result = [&]() { + auto compute = [&](auto&&... args) { + if (!canResultBeCached()) { + return cache.computeButDontStore(AD_FWD(args)...); + } + return pinResult ? cache.computeOncePinned(AD_FWD(args)...) + : cache.computeOnce(AD_FWD(args)...); + }; + return compute(cacheKey, cacheSetup, onlyReadFromCache, suitedForCache); + }(); if (result._resultPointer == nullptr) { AD_CORRECTNESS_CHECK(onlyReadFromCache); @@ -596,3 +602,24 @@ void Operation::signalQueryUpdate() const { _executionContext->signalQueryUpdate(*_rootRuntimeInfo); } } + +// _____________________________________________________________________________ +std::string Operation::getCacheKey() const { + auto result = getCacheKeyImpl(); + if (_limit._limit.has_value()) { + absl::StrAppend(&result, " LIMIT ", _limit._limit.value()); + } + if (_limit._offset != 0) { + absl::StrAppend(&result, " OFFSET ", _limit._offset); + } + return result; +} + +// _____________________________________________________________________________ +uint64_t Operation::getSizeEstimate() { + if (_limit._limit.has_value()) { + return std::min(_limit._limit.value(), getSizeEstimateBeforeLimit()); + } else { + return getSizeEstimateBeforeLimit(); + } +} diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 3e06a9498e..9e649cb0a4 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -90,6 +90,9 @@ class Operation { // limit/offset is applied post computation. bool externalLimitApplied_ = false; + // See the documentation of the getter function below. + bool canResultBeCached_ = true; + public: // Holds a `PrefilterExpression` with its corresponding `Variable`. using PrefilterVariablePair = sparqlExpression::PrefilterExprVariablePair; @@ -162,20 +165,23 @@ class Operation { // Get a unique, not ambiguous string representation for a subtree. // This should act like an ID for each subtree. // Calls `getCacheKeyImpl` and adds the information about the `LIMIT` clause. - virtual string getCacheKey() const final { - auto result = getCacheKeyImpl(); - if (_limit._limit.has_value()) { - absl::StrAppend(&result, " LIMIT ", _limit._limit.value()); - } - if (_limit._offset != 0) { - absl::StrAppend(&result, " OFFSET ", _limit._offset); - } - return result; - } + virtual std::string getCacheKey() const final; + + // If this function returns `false`, then the result of this `Operation` will + // never be stored in the cache. It might however be read from the cache. + // This can be used, if the operation actually only returns a subset of the + // actual result because it has been constrained by a parent operation (e.g. + // an IndexScan that has been prefiltered by another operation which it is + // joined with). + virtual bool canResultBeCached() const { return canResultBeCached_; } + + // After calling this function, `canResultBeCached()` will return `false` (see + // above for details). + virtual void disableStoringInCache() final { canResultBeCached_ = false; } private: - // The individual implementation of `getCacheKey` (see above) that has to be - // customized by every child class. + // The individual implementation of `getCacheKey` (see above) that has to + // be customized by every child class. virtual string getCacheKeyImpl() const = 0; public: @@ -186,13 +192,7 @@ class Operation { virtual size_t getCostEstimate() = 0; - virtual uint64_t getSizeEstimate() final { - if (_limit._limit.has_value()) { - return std::min(_limit._limit.value(), getSizeEstimateBeforeLimit()); - } else { - return getSizeEstimateBeforeLimit(); - } - } + virtual uint64_t getSizeEstimate() final; private: virtual uint64_t getSizeEstimateBeforeLimit() = 0; diff --git a/src/util/ConcurrentCache.h b/src/util/ConcurrentCache.h index 2f22efde8c..21262da12b 100644 --- a/src/util/ConcurrentCache.h +++ b/src/util/ConcurrentCache.h @@ -208,6 +208,28 @@ class ConcurrentCache { suitedForCache); } + // If the result is contained in the cache, read and return it. Otherwise, + // compute it, but do not store it in the cache. The interface is the same as + // for the above two functions, therefore some of the arguments are unused. + ResultAndCacheStatus computeButDontStore( + const Key& key, + const InvocableWithConvertibleReturnType auto& computeFunction, + bool onlyReadFromCache, + [[maybe_unused]] const InvocableWithConvertibleReturnType< + bool, const Value&> auto& suitedForCache) { + { + auto resultPtr = _cacheAndInProgressMap.wlock()->_cache[key]; + if (resultPtr != nullptr) { + return {std::move(resultPtr), CacheStatus::cachedNotPinned}; + } + } + if (onlyReadFromCache) { + return {nullptr, CacheStatus::notInCacheAndNotComputed}; + } + auto value = std::make_shared(computeFunction()); + return {std::move(value), CacheStatus::computed}; + } + // Insert `value` into the cache, if the `key` is not already present. In case // `pinned` is true and the key is already present, the existing value is // pinned in case it is not pinned yet. diff --git a/test/ConcurrentCacheTest.cpp b/test/ConcurrentCacheTest.cpp index f52eca0561..9dbfbde509 100644 --- a/test/ConcurrentCacheTest.cpp +++ b/test/ConcurrentCacheTest.cpp @@ -530,3 +530,39 @@ TEST(ConcurrentCache, testTryInsertIfNotPresentDoesWorkCorrectly) { expectContainsSingleElementAtKey0(true, "jkl"); } + +TEST(ConcurrentCache, computeButDontStore) { + SimpleConcurrentLruCache cache{}; + + // The last argument of `computeOnce...`: For the sake of this test, all + // results are suitable for the cache. Note: In the `computeButDontStore` + // function this argument is ignored, because the results are never stored in + // the cache. + auto alwaysSuitable = [](auto&&) { return true; }; + // Store the element in the cache. + cache.computeOnce( + 42, []() { return "42"; }, false, alwaysSuitable); + + // The result is read from the cache, so we get "42", not "blubb". + auto res = cache.computeButDontStore( + 42, []() { return "blubb"; }, false, alwaysSuitable); + EXPECT_EQ(*res._resultPointer, "42"); + + // The same with `onlyReadFromCache` == true; + res = cache.computeButDontStore( + 42, []() { return "blubb"; }, true, alwaysSuitable); + EXPECT_EQ(*res._resultPointer, "42"); + + cache.clearAll(); + + // Compute, but don't store. + res = cache.computeButDontStore( + 42, []() { return "blubb"; }, false, alwaysSuitable); + EXPECT_EQ(*res._resultPointer, "blubb"); + + // Nothing is stored in the cache, so we cannot read it. + EXPECT_FALSE(cache.getIfContained(42).has_value()); + res = cache.computeButDontStore( + 42, []() { return "blubb"; }, true, alwaysSuitable); + EXPECT_EQ(res._resultPointer, nullptr); +} diff --git a/test/OperationTest.cpp b/test/OperationTest.cpp index c1ad709a4f..4ad1f1313c 100644 --- a/test/OperationTest.cpp +++ b/test/OperationTest.cpp @@ -653,3 +653,40 @@ TEST(Operation, checkLazyOperationIsNotCachedIfUnlikelyToFitInCache) { EXPECT_FALSE( qec->getQueryTreeCache().cacheContains(makeQueryCacheKey("test"))); } + +TEST(OperationTest, disableCaching) { + auto qec = getQec(); + qec->getQueryTreeCache().clearAll(); + std::vector idTablesVector{}; + idTablesVector.push_back(makeIdTableFromVector({{3, 4}})); + idTablesVector.push_back(makeIdTableFromVector({{7, 8}, {9, 123}})); + ValuesForTesting valuesForTesting{ + qec, std::move(idTablesVector), {Variable{"?x"}, Variable{"?y"}}, true}; + + QueryCacheKey cacheKey{valuesForTesting.getCacheKey(), + qec->locatedTriplesSnapshot().index_}; + + // By default, the result of `valuesForTesting` is cached because it is + // sufficiently small, no matter if it was computed lazily or fully + // materialized. + EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey)); + valuesForTesting.getResult(true); + EXPECT_TRUE(qec->getQueryTreeCache().cacheContains(cacheKey)); + qec->getQueryTreeCache().clearAll(); + EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey)); + valuesForTesting.getResult(false); + EXPECT_TRUE(qec->getQueryTreeCache().cacheContains(cacheKey)); + + // We now disable caching for the `valuesForTesting`. Then the result is never + // cached, no matter if it is computed lazily or fully materialized. + valuesForTesting.disableStoringInCache(); + qec->getQueryTreeCache().clearAll(); + + EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey)); + valuesForTesting.getResult(true); + EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey)); + qec->getQueryTreeCache().clearAll(); + EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey)); + valuesForTesting.getResult(false); + EXPECT_FALSE(qec->getQueryTreeCache().cacheContains(cacheKey)); +} From 4237e0d4af70e6e400f4357f61756eb5873fe98a Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Thu, 12 Dec 2024 21:49:38 +0100 Subject: [PATCH 18/38] For C++17, use `range-v3` instead of `std::ranges` (#1667) This is a first step towards making QLever compile with C++17. If the compile-time flag `QLEVER_CPP_17` is set, use Eric Niebler's `range-v3` library as a drop-in replacement for `std::ranges`. In the code, we simply write `ql::ranges` instead of `std::ranges` in most places. Some places need special treatment. For example, where `std::ranges` was used as a C++20 concept, we now use the macros `CPP_template` and `CPP_and` (also from the `range-v3` library), which does the right thing for both C++20 and C++17. --- .github/workflows/native-build.yml | 4 + CMakeLists.txt | 22 ++- src/backports/algorithm.h | 59 +++++++ src/backports/concepts.h | 17 ++ src/engine/AddCombinedRowToTable.h | 4 +- src/engine/Bind.cpp | 4 +- src/engine/CallFixedSize.h | 4 +- src/engine/CartesianProductJoin.cpp | 41 +++-- src/engine/CartesianProductJoin.h | 9 +- src/engine/CheckUsePatternTrick.cpp | 22 +-- src/engine/Distinct.cpp | 44 +++--- src/engine/Engine.cpp | 2 +- src/engine/Engine.h | 2 +- src/engine/ExportQueryExecutionTrees.cpp | 12 +- src/engine/ExportQueryExecutionTrees.h | 2 +- src/engine/Filter.cpp | 2 +- src/engine/GroupBy.cpp | 34 ++-- src/engine/HasPredicateScan.cpp | 2 +- src/engine/IndexScan.cpp | 36 +++-- src/engine/Join.cpp | 12 +- src/engine/LazyGroupBy.h | 4 +- src/engine/LocalVocab.cpp | 6 +- src/engine/LocalVocab.h | 12 +- src/engine/MultiColumnJoin.cpp | 4 +- src/engine/Operation.cpp | 4 +- src/engine/OptionalJoin.cpp | 4 +- src/engine/OrderBy.cpp | 2 +- src/engine/PathSearch.cpp | 6 +- src/engine/QueryExecutionTree.cpp | 4 +- src/engine/QueryPlanner.cpp | 100 ++++++------ src/engine/Result.cpp | 10 +- src/engine/RuntimeInformation.cpp | 8 +- src/engine/Service.cpp | 8 +- src/engine/SpatialJoinAlgorithms.cpp | 6 +- src/engine/TextIndexScanForWord.cpp | 2 +- src/engine/TextLimit.cpp | 16 +- src/engine/TransitivePathBase.cpp | 2 +- src/engine/TransitivePathBinSearch.h | 2 +- src/engine/Union.cpp | 21 ++- src/engine/Values.cpp | 2 +- src/engine/VariableToColumnMap.cpp | 6 +- src/engine/VariableToColumnMap.h | 4 + .../idTable/CompressedExternalIdTable.h | 37 ++--- src/engine/idTable/IdTable.h | 50 +++--- src/engine/idTable/IdTableRow.h | 13 +- .../sparqlExpressions/CountStarExpression.cpp | 14 +- .../sparqlExpressions/LiteralExpression.h | 10 +- .../sparqlExpressions/NaryExpressionImpl.h | 4 +- .../NumericBinaryExpressions.cpp | 4 +- .../NumericUnaryExpressions.cpp | 4 +- .../PrefilterExpressionIndex.cpp | 14 +- .../sparqlExpressions/RegexExpression.cpp | 4 +- .../RelationalExpressions.cpp | 20 +-- .../sparqlExpressions/RelationalExpressions.h | 2 +- .../sparqlExpressions/SetOfIntervals.cpp | 2 +- src/engine/sparqlExpressions/SetOfIntervals.h | 3 +- .../sparqlExpressions/SparqlExpression.cpp | 9 +- .../SparqlExpressionGenerators.h | 2 +- .../SparqlExpressionPimpl.cpp | 2 +- .../sparqlExpressions/SparqlExpressionPimpl.h | 2 +- .../sparqlExpressions/StringExpressions.cpp | 12 +- .../sparqlExpressions/VariadicExpression.h | 2 +- src/global/IdTriple.h | 4 +- src/global/SpecialIds.h | 5 +- src/global/ValueId.h | 4 +- src/global/ValueIdComparators.h | 1 + src/index/CompressedRelation.cpp | 99 ++++++------ src/index/CompressedRelation.h | 4 +- src/index/DeltaTriples.cpp | 27 ++-- src/index/DocsDB.cpp | 3 +- src/index/IndexBuilderTypes.h | 2 +- src/index/IndexImpl.Text.cpp | 18 +-- src/index/IndexImpl.cpp | 10 +- src/index/IndexMetaData.h | 2 +- src/index/LocatedTriples.cpp | 25 ++- src/index/LocatedTriples.h | 6 +- src/index/PatternCreator.cpp | 8 +- src/index/PrefixHeuristic.cpp | 2 +- src/index/StringSortComparator.h | 2 +- src/index/StxxlSortFunctors.h | 2 +- src/index/Vocabulary.cpp | 4 +- src/index/Vocabulary.h | 2 +- src/index/VocabularyMergerImpl.h | 16 +- src/index/vocabulary/CompressionWrappers.h | 2 +- .../vocabulary/VocabularyBinarySearchMixin.h | 6 +- .../VocabularyInMemoryBinSearch.cpp | 2 +- src/parser/LiteralOrIri.cpp | 2 +- src/parser/ParsedQuery.cpp | 10 +- src/parser/RdfEscaping.cpp | 4 +- src/parser/RdfParser.cpp | 4 +- .../sparqlParser/SparqlQleverVisitor.cpp | 30 ++-- src/util/Algorithm.h | 10 +- src/util/BatchedPipeline.h | 2 +- src/util/BlankNodeManager.cpp | 6 +- src/util/BlankNodeManager.h | 2 +- src/util/ChunkedForLoop.h | 27 ++-- src/util/ConfigManager/ConfigManager.cpp | 80 +++++----- src/util/ConfigManager/ConfigOption.cpp | 4 +- src/util/ConstexprMap.h | 3 +- src/util/ConstexprUtils.h | 3 +- src/util/FsstCompressor.h | 2 +- src/util/Generator.h | 18 ++- src/util/JoinAlgorithms/FindUndefRanges.h | 22 +-- src/util/JoinAlgorithms/JoinAlgorithms.h | 70 +++++---- src/util/MemorySize/MemorySize.h | 2 +- src/util/ParallelMultiwayMerge.h | 13 +- src/util/PriorityQueue.h | 2 +- src/util/Random.h | 2 +- src/util/Serializer/ByteBufferSerializer.h | 2 +- src/util/Simple8bCode.h | 2 +- src/util/StringUtils.cpp | 15 +- src/util/StringUtils.h | 112 +++----------- src/util/StringUtilsImpl.h | 97 ++++++++++++ src/util/TaskQueue.h | 2 +- src/util/ThreadSafeQueue.h | 2 +- src/util/Views.h | 146 +++++++++++------- src/util/http/MediaTypes.cpp | 6 +- test/AddCombinedRowToTableTest.cpp | 2 +- test/AlgorithmTest.cpp | 30 ++-- test/AsyncStreamTest.cpp | 4 +- test/BenchmarkMeasurementContainerTest.cpp | 10 +- test/CMakeLists.txt | 1 + test/CallFixedSizeTest.cpp | 2 +- test/CompactStringVectorTest.cpp | 2 +- test/ComparisonWithNanTest.cpp | 4 +- test/CompressedRelationsTest.cpp | 27 ++-- test/ConfigManagerTest.cpp | 18 +-- test/DeltaTriplesTest.cpp | 2 +- test/FindUndefRangesTest.cpp | 4 +- test/GeoPointTest.cpp | 1 + test/GroupByTest.cpp | 8 +- test/HttpTest.cpp | 2 +- test/IdTableHelpersTest.cpp | 63 ++++---- test/IdTableTest.cpp | 13 +- test/JoinAlgorithmsTest.cpp | 4 +- test/JoinTest.cpp | 16 +- test/LocalVocabTest.cpp | 8 +- test/LocatedTriplesTest.cpp | 2 +- test/MemorySizeTest.cpp | 36 ++--- test/OrderByTest.cpp | 8 +- test/ParallelMultiwayMergeTest.cpp | 22 +-- test/PrefilterExpressionIndexTest.cpp | 2 +- test/QueryPlannerTestHelpers.h | 4 +- test/RandomTest.cpp | 33 ++-- test/RdfParserTest.cpp | 2 +- test/RelationalExpressionTest.cpp | 6 +- test/ResultTableColumnOperationsTest.cpp | 2 +- test/SortTest.cpp | 8 +- test/StringUtilsTest.cpp | 11 +- test/ThreadSafeQueueTest.cpp | 10 +- test/ViewsTest.cpp | 18 +-- test/backports/CMakeLists.txt | 6 + test/backports/DebugJoinView.cpp | 38 +++++ test/backports/algorithmTest.cpp | 9 ++ test/engine/BindTest.cpp | 4 +- test/engine/CartesianProductJoinTest.cpp | 8 +- test/engine/DistinctTest.cpp | 10 +- test/engine/IndexScanTest.cpp | 2 +- test/engine/ValuesForTesting.h | 17 +- .../idTable/CompressedExternalIdTableTest.cpp | 12 +- test/index/PatternCreatorTest.cpp | 4 +- test/util/IdTableHelpers.cpp | 50 +++--- test/util/RandomTestHelpers.h | 4 +- 163 files changed, 1258 insertions(+), 1026 deletions(-) create mode 100644 src/backports/algorithm.h create mode 100644 src/backports/concepts.h create mode 100644 src/util/StringUtilsImpl.h create mode 100644 test/backports/CMakeLists.txt create mode 100644 test/backports/DebugJoinView.cpp create mode 100644 test/backports/algorithmTest.cpp diff --git a/.github/workflows/native-build.yml b/.github/workflows/native-build.yml index 41e7561e23..da8bc0f727 100644 --- a/.github/workflows/native-build.yml +++ b/.github/workflows/native-build.yml @@ -40,6 +40,10 @@ jobs: - compiler: clang compiler-version: 13 include: + - compiler: gcc + compiler-version: 11 + additional-cmake-options: "-DUSE_CPP_17_BACKPORTS=ON" + build-type: Release - compiler: clang compiler-version: 16 asan-flags: "-fsanitize=address -fno-omit-frame-pointer" diff --git a/CMakeLists.txt b/CMakeLists.txt index 0503cd210f..3679de4c51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,6 +96,17 @@ FetchContent_Declare( SOURCE_SUBDIR runtime/Cpp ) +################################# +# Range v3 (for C++-17 backwards compatibility) +################################ +FetchContent_Declare( + range-v3 + GIT_REPOSITORY https://github.com/joka921/range-v3 + GIT_TAG 1dc0b09abab1bdc7d085a78754abd5c6e37a5d0c # 0.12.0 +) + + + ################################ # Threading ################################ @@ -184,6 +195,14 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") # Enable the specification of additional compiler flags manually from the commandline set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ADDITIONAL_COMPILER_FLAGS}") +# Enable the manual usage of the C++ 17 backports (currently `range-v3` instead +# of `std::ranges` and the `std::enable_if_t` based expansion of the concept +# macros from `range-v3`. +set(USE_CPP_17_BACKPORTS OFF CACHE BOOL "Use the C++17 backports (range-v3 and enable_if_t instead of std::ranges and concepts)") +if (${USE_CPP_17_BACKPORTS}) + add_definitions("-DQLEVER_CPP_17 -DCPP_CXX_CONCEPTS=0") +endif() + # Enable the specification of additional linker flags manually from the commandline set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ADDITIONAL_LINKER_FLAGS}") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${ADDITIONAL_LINKER_FLAGS}") @@ -321,7 +340,7 @@ FetchContent_Declare( ################################ # Apply FetchContent ################################ -FetchContent_MakeAvailable(googletest ctre abseil re2 stxxl fsst s2 nlohmann-json antlr) +FetchContent_MakeAvailable(googletest ctre abseil re2 stxxl fsst s2 nlohmann-json antlr range-v3) # Disable some warnings in RE2, STXXL, and GTEST target_compile_options(s2 PRIVATE -Wno-sign-compare -Wno-unused-parameter -Wno-class-memaccess -Wno-comment -Wno-redundant-move -Wno-unknown-warning-option -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-unused-but-set-variable -Wno-unused-function) target_compile_options(re2 PRIVATE -Wno-unused-parameter) @@ -333,6 +352,7 @@ include_directories(${ctre_SOURCE_DIR}/single-header) target_compile_options(fsst PRIVATE -Wno-extra -Wno-all -Wno-error) target_compile_options(fsst12 PRIVATE -Wno-extra -Wno-all -Wno-error) include_directories(${fsst_SOURCE_DIR}) +include_directories(${range-v3_SOURCE_DIR}/include) target_compile_options(antlr4_static PRIVATE -Wno-all -Wno-extra -Wno-error -Wno-deprecated-declarations) # Only required because a lot of classes that do not explicitly link against antlr4_static use the headers. include_directories(SYSTEM "${antlr_SOURCE_DIR}/runtime/Cpp/runtime/src") diff --git a/src/backports/algorithm.h b/src/backports/algorithm.h new file mode 100644 index 0000000000..90b4e2884c --- /dev/null +++ b/src/backports/algorithm.h @@ -0,0 +1,59 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Kalmbach + +#pragma once + +#include +#include +#include +#include + +// The following defines namespaces `ql::ranges` and `ql::views` that are almost +// drop-in replacements for `std::ranges` and `std::views`. In C++20 mode (when +// the `QLEVER_CPP_17` macro is not used), these namespaces are simply aliases +// for `std::ranges` and `std::views`. In C++17 mode they contain the ranges and +// views from Erice Niebler's `range-v3` library. NOTE: `ql::ranges::unique` +// currently doesn't work, because the interface to this function is different +// in both implementations. NOTE: There might be other caveats which we are +// currently not aware of, because they only affect functions that we currently +// don't use. For those, the following header can be expanded in the future. +#ifndef QLEVER_CPP_17 +#include +#include +#endif + +namespace ql { + +namespace ranges { +#ifdef QLEVER_CPP_17 +using namespace ::ranges; + +// The `view` concept (which is rather important when implementing custom views) +// is in a different namespace in range-v3, so we make it manually accessible. +template +CPP_concept view = ::ranges::cpp20::view; +#else +using namespace std::ranges; +#endif +} // namespace ranges + +namespace views { +#ifdef QLEVER_CPP_17 +using namespace ::ranges::views; +#else +using namespace std::views; +#endif +} // namespace views + +// The namespace `ql::concepts` includes concepts that are contained in the +// C++20 standard as well as in `range-v3`. +namespace concepts { +#ifdef QLEVER_CPP_17 +using namespace ::concepts; +#else +using namespace std; +#endif +} // namespace concepts + +} // namespace ql diff --git a/src/backports/concepts.h b/src/backports/concepts.h new file mode 100644 index 0000000000..ad0159da32 --- /dev/null +++ b/src/backports/concepts.h @@ -0,0 +1,17 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: Johannes Kalmbach + +#pragma once + +// Define the following macros: +// `QL_OPT_CONCEPT(arg)` which expands to `arg` in C++20 mode, and to nothing in +// C++17 mode. It can be used to easily opt out of concepts that are only used +// for documentation and increased safety and not for overload resolution. +// Example usage: +// `(QL_OPT_CONCEPT(std::view) auto x = someFunction();` +#ifdef QLEVER_CPP_17 +#define QL_OPT_CONCEPT(arg) +#else +#define QL_OPT_CONCEPT(arg) arg +#endif diff --git a/src/engine/AddCombinedRowToTable.h b/src/engine/AddCombinedRowToTable.h index 8c8939f64c..c43e298a88 100644 --- a/src/engine/AddCombinedRowToTable.h +++ b/src/engine/AddCombinedRowToTable.h @@ -349,8 +349,8 @@ class AddCombinedRowToIdTable { // Make sure to reset `mergedVocab_` so it is in a valid state again. mergedVocab_ = LocalVocab{}; // Only merge non-null vocabs. - auto range = currentVocabs_ | std::views::filter(toBool) | - std::views::transform(dereference); + auto range = currentVocabs_ | ql::views::filter(toBool) | + ql::views::transform(dereference); mergedVocab_.mergeWith(std::ranges::ref_view{range}); } } diff --git a/src/engine/Bind.cpp b/src/engine/Bind.cpp index 2419531888..95de8a4dfe 100644 --- a/src/engine/Bind.cpp +++ b/src/engine/Bind.cpp @@ -86,8 +86,8 @@ IdTable Bind::cloneSubView(const IdTable& idTable, const std::pair& subrange) { IdTable result(idTable.numColumns(), idTable.getAllocator()); result.resize(subrange.second - subrange.first); - std::ranges::copy(idTable.begin() + subrange.first, - idTable.begin() + subrange.second, result.begin()); + ql::ranges::copy(idTable.begin() + subrange.first, + idTable.begin() + subrange.second, result.begin()); return result; } diff --git a/src/engine/CallFixedSize.h b/src/engine/CallFixedSize.h index 065a7f51a3..9a62457597 100644 --- a/src/engine/CallFixedSize.h +++ b/src/engine/CallFixedSize.h @@ -56,7 +56,7 @@ template auto callLambdaForIntArray(std::array array, auto&& lambda, auto&&... args) { AD_CONTRACT_CHECK( - std::ranges::all_of(array, [](auto el) { return el <= maxValue; })); + ql::ranges::all_of(array, [](auto el) { return el <= maxValue; })); using ArrayType = std::array; // Call the `lambda` when the correct compile-time `Int`s are given as a @@ -131,7 +131,7 @@ decltype(auto) callFixedSize(std::array ints, auto&& functor, static_assert(NumIntegers > 0); // TODO Use `std::bind_back` auto p = [](int i) { return detail::mapToZeroIfTooLarge(i, MaxValue); }; - std::ranges::transform(ints, ints.begin(), p); + ql::ranges::transform(ints, ints.begin(), p); // The only step that remains is to lift our single runtime `value` which // is in the range `[0, (MaxValue +1)^ NumIntegers]` to a compile-time diff --git a/src/engine/CartesianProductJoin.cpp b/src/engine/CartesianProductJoin.cpp index 401ba6df58..cedb832648 100644 --- a/src/engine/CartesianProductJoin.cpp +++ b/src/engine/CartesianProductJoin.cpp @@ -15,7 +15,7 @@ CartesianProductJoin::CartesianProductJoin( children_{std::move(children)}, chunkSize_{chunkSize} { AD_CONTRACT_CHECK(!children_.empty()); - AD_CONTRACT_CHECK(std::ranges::all_of( + AD_CONTRACT_CHECK(ql::ranges::all_of( children_, [](auto& child) { return child != nullptr; })); // Check that the variables of the passed in operations are in fact @@ -25,13 +25,13 @@ CartesianProductJoin::CartesianProductJoin( // false as soon as a duplicate is encountered. ad_utility::HashSet vars; auto checkVarsForOp = [&vars](const Operation& op) { - return std::ranges::all_of( - op.getExternallyVisibleVariableColumns() | std::views::keys, + return ql::ranges::all_of( + op.getExternallyVisibleVariableColumns() | ql::views::keys, [&vars](const Variable& variable) { return vars.insert(variable).second; }); }; - return std::ranges::all_of(childView(), checkVarsForOp); + return ql::ranges::all_of(childView(), checkVarsForOp); }(); AD_CONTRACT_CHECK(variablesAreDisjoint); } @@ -39,8 +39,8 @@ CartesianProductJoin::CartesianProductJoin( // ____________________________________________________________________________ std::vector CartesianProductJoin::getChildren() { std::vector result; - std::ranges::copy( - children_ | std::views::transform([](auto& ptr) { return ptr.get(); }), + ql::ranges::copy( + children_ | ql::views::transform([](auto& ptr) { return ptr.get(); }), std::back_inserter(result)); return result; } @@ -49,28 +49,28 @@ std::vector CartesianProductJoin::getChildren() { string CartesianProductJoin::getCacheKeyImpl() const { return "CARTESIAN PRODUCT JOIN " + ad_utility::lazyStrJoin( - std::views::transform( + ql::views::transform( childView(), [](auto& child) { return child.getCacheKey(); }), " "); } // ____________________________________________________________________________ size_t CartesianProductJoin::getResultWidth() const { - auto view = childView() | std::views::transform(&Operation::getResultWidth); + auto view = childView() | ql::views::transform(&Operation::getResultWidth); return std::reduce(view.begin(), view.end(), 0UL, std::plus{}); } // ____________________________________________________________________________ size_t CartesianProductJoin::getCostEstimate() { auto childSizes = - childView() | std::views::transform(&Operation::getCostEstimate); + childView() | ql::views::transform(&Operation::getCostEstimate); return getSizeEstimate() + std::reduce(childSizes.begin(), childSizes.end(), 0UL, std::plus{}); } // ____________________________________________________________________________ uint64_t CartesianProductJoin::getSizeEstimateBeforeLimit() { - auto view = childView() | std::views::transform(&Operation::getSizeEstimate); + auto view = childView() | ql::views::transform(&Operation::getSizeEstimate); return std::reduce(view.begin(), view.end(), 1UL, std::multiplies{}); } @@ -86,7 +86,7 @@ float CartesianProductJoin::getMultiplicity([[maybe_unused]] size_t col) { bool CartesianProductJoin::knownEmptyResult() { // If children were empty, returning false would be the wrong behavior. AD_CORRECTNESS_CHECK(!children_.empty()); - return std::ranges::any_of(childView(), &Operation::knownEmptyResult); + return ql::ranges::any_of(childView(), &Operation::knownEmptyResult); } // ____________________________________________________________________________ @@ -138,16 +138,15 @@ ProtoResult CartesianProductJoin::computeResult(bool requestLaziness) { LocalVocab staticMergedVocab{}; staticMergedVocab.mergeWith( subResults | - std::views::transform([](const auto& result) -> const LocalVocab& { + ql::views::transform([](const auto& result) -> const LocalVocab& { return result->localVocab(); })); if (!requestLaziness) { AD_CORRECTNESS_CHECK(!lazyResult); - return { - writeAllColumns(subResults | std::views::transform(&Result::idTable), - getLimit()._offset, getLimit().limitOrDefault()), - resultSortedOn(), std::move(staticMergedVocab)}; + return {writeAllColumns(subResults | ql::views::transform(&Result::idTable), + getLimit()._offset, getLimit().limitOrDefault()), + resultSortedOn(), std::move(staticMergedVocab)}; } if (lazyResult) { @@ -159,7 +158,7 @@ ProtoResult CartesianProductJoin::computeResult(bool requestLaziness) { // Owning view wrapper to please gcc 11. return {produceTablesLazily(std::move(staticMergedVocab), ad_utility::OwningView{std::move(subResults)} | - std::views::transform(&Result::idTable), + ql::views::transform(&Result::idTable), getLimit()._offset, getLimit().limitOrDefault()), resultSortedOn()}; } @@ -192,11 +191,11 @@ IdTable CartesianProductJoin::writeAllColumns( // single result is left. This can probably be done by using the // `ProtoResult`. - auto sizesView = std::views::transform(idTables, &IdTable::size); + auto sizesView = ql::views::transform(idTables, &IdTable::size); auto totalResultSize = std::reduce(sizesView.begin(), sizesView.end(), 1UL, std::multiplies{}); - if (!std::ranges::empty(idTables) && sizesView.back() != 0) { + if (!ql::ranges::empty(idTables) && sizesView.back() != 0) { totalResultSize += (totalResultSize / sizesView.back()) * lastTableOffset; } else { AD_CORRECTNESS_CHECK(lastTableOffset == 0); @@ -254,7 +253,7 @@ CartesianProductJoin::calculateSubResults(bool requestLaziness) { std::shared_ptr lazyResult = nullptr; auto children = childView(); - AD_CORRECTNESS_CHECK(!std::ranges::empty(children)); + AD_CORRECTNESS_CHECK(!ql::ranges::empty(children)); // Get all child results (possibly with limit, see above). for (Operation& child : children) { if (limitIfPresent.has_value() && child.supportsLimit()) { @@ -346,7 +345,7 @@ Result::Generator CartesianProductJoin::createLazyConsumer( size_t producedTableSize = 0; for (auto& idTableAndVocab : produceTablesLazily( std::move(localVocab), - std::views::transform( + ql::views::transform( idTables, [](const auto& wrapper) -> const IdTable& { return wrapper; }), offset, limit, lastTableOffset)) { diff --git a/src/engine/CartesianProductJoin.h b/src/engine/CartesianProductJoin.h index 72f5ff9a12..8c0a071c98 100644 --- a/src/engine/CartesianProductJoin.h +++ b/src/engine/CartesianProductJoin.h @@ -21,15 +21,14 @@ class CartesianProductJoin : public Operation { // TODO We can move this whole children management into a base class // and clean up the implementation of several other children. auto childView() { - return std::views::transform(children_, [](auto& child) -> Operation& { + return ql::views::transform(children_, [](auto& child) -> Operation& { return *child->getRootOperation(); }); } auto childView() const { - return std::views::transform(children_, - [](auto& child) -> const Operation& { - return *child->getRootOperation(); - }); + return ql::views::transform(children_, [](auto& child) -> const Operation& { + return *child->getRootOperation(); + }); } public: diff --git a/src/engine/CheckUsePatternTrick.cpp b/src/engine/CheckUsePatternTrick.cpp index 79ed50969b..866caa2f9b 100644 --- a/src/engine/CheckUsePatternTrick.cpp +++ b/src/engine/CheckUsePatternTrick.cpp @@ -4,10 +4,10 @@ #include "./CheckUsePatternTrick.h" -#include #include #include +#include "backports/algorithm.h" #include "parser/GraphPatternOperation.h" namespace checkUsePatternTrick { @@ -15,7 +15,7 @@ namespace checkUsePatternTrick { bool isVariableContainedInGraphPattern( const Variable& variable, const ParsedQuery::GraphPattern& graphPattern, const SparqlTriple* tripleToIgnore) { - if (std::ranges::any_of( + if (ql::ranges::any_of( graphPattern._filters, [&variable](const SparqlFilter& filter) { return filter.expression_.isVariableContained(variable); })) { @@ -25,7 +25,7 @@ bool isVariableContainedInGraphPattern( return isVariableContainedInGraphPatternOperation(variable, op, tripleToIgnore); }; - return std::ranges::any_of(graphPattern._graphPatterns, check); + return ql::ranges::any_of(graphPattern._graphPatterns, check); } namespace p = parsedQuery; @@ -101,7 +101,7 @@ static void rewriteTriplesForPatternTrick(const PatternTrickTuple& subAndPred, auto findAndRewriteMatchingTriple = [&subAndPred, &triples]( auto triplePosition, size_t additionalScanColumn) { - auto matchingTriple = std::ranges::find_if( + auto matchingTriple = ql::ranges::find_if( triples, [&subAndPred, triplePosition](const SparqlTriple& t) { return std::invoke(triplePosition, t) == subAndPred.subject_ && t.p_.isIri() && !isVariable(t.p_); @@ -231,7 +231,7 @@ std::optional isTripleSuitableForPatternTrick( std::vector variables{triple.s_.getVariable().name(), triple.o_.getVariable().name(), triple.p_.asString()}; - std::ranges::sort(variables); + ql::ranges::sort(variables); if (std::unique(variables.begin(), variables.end()) != variables.end()) { return std::nullopt; } @@ -270,12 +270,12 @@ std::optional isTripleSuitableForPatternTrick( // Check that the pattern trick triple is the only place in the query // where the predicate variable (and the object variable in the three // variables case) occurs. - if (std::ranges::any_of(patternTrickData.variablesNotAllowedInRestOfQuery_, - [&](const Variable& variable) { - return isVariableContainedInGraphPattern( - variable, parsedQuery->_rootGraphPattern, - &triple); - })) { + if (ql::ranges::any_of(patternTrickData.variablesNotAllowedInRestOfQuery_, + [&](const Variable& variable) { + return isVariableContainedInGraphPattern( + variable, parsedQuery->_rootGraphPattern, + &triple); + })) { return std::nullopt; } diff --git a/src/engine/Distinct.cpp b/src/engine/Distinct.cpp index 06b9718540..a3047569a1 100644 --- a/src/engine/Distinct.cpp +++ b/src/engine/Distinct.cpp @@ -84,8 +84,8 @@ ProtoResult Distinct::computeResult(bool requestLaziness) { // _____________________________________________________________________________ bool Distinct::matchesRow(const auto& a, const auto& b) const { - return std::ranges::all_of(keepIndices_, - [&a, &b](ColumnIndex i) { return a[i] == b[i]; }); + return ql::ranges::all_of(keepIndices_, + [&a, &b](ColumnIndex i) { return a[i] == b[i]; }); } // _____________________________________________________________________________ @@ -100,7 +100,7 @@ IdTable Distinct::distinct( // Variant of `std::ranges::unique` that allows to skip the begin rows of // elements found in the previous table. auto begin = - std::ranges::find_if(result, [this, &previousRow](const auto& row) { + ql::ranges::find_if(result, [this, &previousRow](const auto& row) { // Without explicit this clang seems to // think the this capture is redundant. return !previousRow.has_value() || @@ -111,12 +111,12 @@ IdTable Distinct::distinct( auto dest = result.begin(); if (begin == dest) { // Optimization to avoid redundant move operations. - begin = std::ranges::adjacent_find(begin, end, - [this](const auto& a, const auto& b) { - // Without explicit this clang seems to - // think the this capture is redundant. - return this->matchesRow(a, b); - }); + begin = ql::ranges::adjacent_find(begin, end, + [this](const auto& a, const auto& b) { + // Without explicit this clang seems to + // think the this capture is redundant. + return this->matchesRow(a, b); + }); dest = begin; if (begin != end) { ++begin; @@ -154,13 +154,13 @@ IdTable Distinct::outOfPlaceDistinct(const IdTable& dynInput) const { auto end = inputView.end(); while (begin < end) { int64_t allowedOffset = std::min(end - begin, CHUNK_SIZE); - begin = std::ranges::unique_copy(begin, begin + allowedOffset, - std::back_inserter(output), - [this](const auto& a, const auto& b) { - // Without explicit this clang seems to - // think the this capture is redundant. - return this->matchesRow(a, b); - }) + begin = ql::ranges::unique_copy(begin, begin + allowedOffset, + std::back_inserter(output), + [this](const auto& a, const auto& b) { + // Without explicit this clang seems to + // think the this capture is redundant. + return this->matchesRow(a, b); + }) .in; checkCancellation(); // Skip to next unique value @@ -169,12 +169,12 @@ IdTable Distinct::outOfPlaceDistinct(const IdTable& dynInput) const { // This can only be called when dynInput is not empty, so `begin[-1]` is // always valid. auto lastRow = begin[-1]; - begin = std::ranges::find_if(begin, begin + allowedOffset, - [this, &lastRow](const auto& row) { - // Without explicit this clang seems to - // think the this capture is redundant. - return !this->matchesRow(row, lastRow); - }); + begin = ql::ranges::find_if(begin, begin + allowedOffset, + [this, &lastRow](const auto& row) { + // Without explicit this clang seems to + // think the this capture is redundant. + return !this->matchesRow(row, lastRow); + }); checkCancellation(); } while (begin != end && matchesRow(*begin, begin[-1])); } diff --git a/src/engine/Engine.cpp b/src/engine/Engine.cpp index 7bc031f694..a9d8c80529 100644 --- a/src/engine/Engine.cpp +++ b/src/engine/Engine.cpp @@ -55,7 +55,7 @@ void Engine::sort(IdTable& idTable, const std::vector& sortCols) { size_t Engine::countDistinct(IdTableView<0> input, const std::function& checkCancellation) { AD_EXPENSIVE_CHECK( - std::ranges::is_sorted(input, std::ranges::lexicographical_compare), + ql::ranges::is_sorted(input, ql::ranges::lexicographical_compare), "Input to Engine::countDistinct must be sorted"); if (input.empty()) { return 0; diff --git a/src/engine/Engine.h b/src/engine/Engine.h index 47a415674a..577e982f81 100644 --- a/src/engine/Engine.h +++ b/src/engine/Engine.h @@ -3,11 +3,11 @@ // Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de) #pragma once -#include #include #include #include +#include "backports/algorithm.h" #include "engine/IndexSequence.h" #include "engine/idTable/IdTable.h" #include "global/Constants.h" diff --git a/src/engine/ExportQueryExecutionTrees.cpp b/src/engine/ExportQueryExecutionTrees.cpp index 6cea915b55..3375e82924 100644 --- a/src/engine/ExportQueryExecutionTrees.cpp +++ b/src/engine/ExportQueryExecutionTrees.cpp @@ -19,7 +19,7 @@ bool getResultForAsk(const std::shared_ptr& result) { if (result->isFullyMaterialized()) { return !result->idTable().empty(); } else { - return std::ranges::any_of(result->idTables(), [](const auto& pair) { + return ql::ranges::any_of(result->idTables(), [](const auto& pair) { return !pair.idTable_.empty(); }); } @@ -139,7 +139,7 @@ ExportQueryExecutionTrees::getRowIndices(LimitOffsetClause limitOffset, // If there is something to be exported, yield it. if (numRowsToBeExported > 0) { co_yield {std::move(tableWithVocab), - std::views::iota(rangeBegin, rangeBegin + numRowsToBeExported)}; + ql::views::iota(rangeBegin, rangeBegin + numRowsToBeExported)}; } // Add to `resultSize` and update the effective offset (which becomes zero @@ -565,8 +565,8 @@ ExportQueryExecutionTrees::selectQueryResultToStream( selectClause.getSelectedVariablesAsStrings(); // In the CSV format, the variables don't include the question mark. if (format == MediaType::csv) { - std::ranges::for_each(variables, - [](std::string& var) { var = var.substr(1); }); + ql::ranges::for_each(variables, + [](std::string& var) { var = var.substr(1); }); } co_yield absl::StrJoin(variables, std::string_view{&separator, 1}); co_yield '\n'; @@ -688,7 +688,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: std::shared_ptr result = qet.getResult(true); // In the XML format, the variables don't include the question mark. - auto varsWithoutQuestionMark = std::views::transform( + auto varsWithoutQuestionMark = ql::views::transform( variables, [](std::string_view var) { return var.substr(1); }); for (std::string_view var : varsWithoutQuestionMark) { co_yield absl::StrCat("\n "sv); @@ -740,7 +740,7 @@ ad_utility::streams::stream_generator ExportQueryExecutionTrees:: qet.selectedVariablesToColumnIndices(selectClause, false); auto vars = selectClause.getSelectedVariablesAsStrings(); - std::ranges::for_each(vars, [](std::string& var) { var = var.substr(1); }); + ql::ranges::for_each(vars, [](std::string& var) { var = var.substr(1); }); nlohmann::json jsonVars = vars; co_yield absl::StrCat(R"({"head":{"vars":)", jsonVars.dump(), R"(},"results":{"bindings":[)"); diff --git a/src/engine/ExportQueryExecutionTrees.h b/src/engine/ExportQueryExecutionTrees.h index a1443e802d..93eb05a5b4 100644 --- a/src/engine/ExportQueryExecutionTrees.h +++ b/src/engine/ExportQueryExecutionTrees.h @@ -164,7 +164,7 @@ class ExportQueryExecutionTrees { // access the `IdTable` with. struct TableWithRange { TableConstRefWithVocab tableWithVocab_; - std::ranges::iota_view view_; + ql::ranges::iota_view view_; }; private: diff --git a/src/engine/Filter.cpp b/src/engine/Filter.cpp index 7e0c66b551..9ecdd85f7a 100644 --- a/src/engine/Filter.cpp +++ b/src/engine/Filter.cpp @@ -6,9 +6,9 @@ #include "./Filter.h" -#include #include +#include "backports/algorithm.h" #include "engine/CallFixedSize.h" #include "engine/QueryExecutionTree.h" #include "engine/sparqlExpressions/SparqlExpression.h" diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp index a2f52e9e60..1ae50b1b79 100644 --- a/src/engine/GroupBy.cpp +++ b/src/engine/GroupBy.cpp @@ -49,7 +49,7 @@ GroupBy::GroupBy(QueryExecutionContext* qec, vector groupByVariables, // NOTE: It is tempting to do the same also for the aliases, but that would // break the case when an alias reuses a variable that was bound by a previous // alias. - std::ranges::sort(_groupByVariables, std::less<>{}, &Variable::name); + ql::ranges::sort(_groupByVariables, std::less<>{}, &Variable::name); auto sortColumns = computeSortColumns(subtree.get()); _subtree = @@ -179,8 +179,8 @@ uint64_t GroupBy::getSizeEstimateBeforeLimit() { // TODO Once we can use `std::views` this can be solved // more elegantly. - float minMultiplicity = std::ranges::min( - _groupByVariables | std::views::transform(varToMultiplicity)); + float minMultiplicity = ql::ranges::min( + _groupByVariables | ql::views::transform(varToMultiplicity)); return _subtree->getSizeEstimate() / minMultiplicity; } @@ -420,7 +420,7 @@ size_t GroupBy::searchBlockBoundaries( for (size_t pos = 0; pos < idTable.size(); pos++) { checkCancellation(); bool rowMatchesCurrentBlock = - std::ranges::all_of(currentGroupBlock, [&](const auto& colIdxAndValue) { + ql::ranges::all_of(currentGroupBlock, [&](const auto& colIdxAndValue) { return idTable(pos, colIdxAndValue.first) == colIdxAndValue.second; }); if (!rowMatchesCurrentBlock) { @@ -735,7 +735,7 @@ std::optional GroupBy::computeGroupByForFullIndexScan() const { } else if (!variableIsBoundInSubtree) { // The variable inside the COUNT() is not part of the input, so it is always // unbound and has a count of 0 in each group. - std::ranges::fill(table.getColumn(1), Id::makeFromInt(0)); + ql::ranges::fill(table.getColumn(1), Id::makeFromInt(0)); } // TODO This optimization should probably also apply if @@ -848,7 +848,7 @@ std::optional GroupBy::computeGroupByForJoinWithFullScan() const { const auto& index = getExecutionContext()->getIndex(); // TODO Simplify the following pattern by using - // `std::views::chunk_by` and implement a lazy version of this view for + // `ql::views::chunkd_by` and implement a lazy version of this view for // input iterators. // Take care of duplicate values in the input. @@ -1021,7 +1021,7 @@ GroupBy::isSupportedAggregate(sparqlExpression::SparqlExpression* expr) { return std::nullopt; // `expr` is not a nested aggregated - if (std::ranges::any_of(expr->children(), [](const auto& ptr) { + if (ql::ranges::any_of(expr->children(), [](const auto& ptr) { return ptr->containsAggregate(); })) { return std::nullopt; @@ -1164,7 +1164,7 @@ void GroupBy::substituteGroupVariable( for (const auto& occurrence : occurrences) { sparqlExpression::VectorWithMemoryLimit values(allocator); values.resize(groupValues.size()); - std::ranges::copy(groupValues, values.begin()); + ql::ranges::copy(groupValues, values.begin()); auto newExpression = std::make_unique( std::move(values)); @@ -1276,7 +1276,7 @@ GroupBy::HashMapAggregationData::getSortedGroupColumns() } // Sort data. - std::ranges::sort(sortedKeys.begin(), sortedKeys.end()); + ql::ranges::sort(sortedKeys.begin(), sortedKeys.end()); // Get data in a column-wise manner. ArrayOrVector> result; @@ -1307,7 +1307,7 @@ void GroupBy::evaluateAlias( // have to be substituted away before evaluation auto substitutions = alias.groupedVariables_; - auto topLevelGroupedVariable = std::ranges::find_if( + auto topLevelGroupedVariable = ql::ranges::find_if( substitutions, [](HashMapGroupedVariableInformation& val) { return std::get_if(&val.occurrences_); }); @@ -1320,13 +1320,13 @@ void GroupBy::evaluateAlias( result->getColumn(topLevelGroupedVariable->resultColumnIndex_) .subspan(evaluationContext._beginIndex, evaluationContext.size()); decltype(auto) outValues = result->getColumn(alias.outCol_); - std::ranges::copy(groupValues, - outValues.begin() + evaluationContext._beginIndex); + ql::ranges::copy(groupValues, + outValues.begin() + evaluationContext._beginIndex); // We also need to store it for possible future use sparqlExpression::VectorWithMemoryLimit values(allocator); values.resize(groupValues.size()); - std::ranges::copy(groupValues, values.begin()); + ql::ranges::copy(groupValues, values.begin()); evaluationContext._previousResultsFromSameGroup.at(alias.outCol_) = sparqlExpression::copyExpressionResult( @@ -1345,8 +1345,8 @@ void GroupBy::evaluateAlias( // Copy to result table decltype(auto) outValues = result->getColumn(alias.outCol_); - std::ranges::copy(aggregateResults, - outValues.begin() + evaluationContext._beginIndex); + ql::ranges::copy(aggregateResults, + outValues.begin() + evaluationContext._beginIndex); // Copy the result so that future aliases may reuse it evaluationContext._previousResultsFromSameGroup.at(alias.outCol_) = @@ -1375,7 +1375,7 @@ void GroupBy::evaluateAlias( // Restore original children. Only necessary when the expression will be // used in the future (not the case for the hash map optimization). - // TODO Use `std::views::zip(info, originalChildren)`. + // TODO Use `ql::views::zip(info, originalChildren)`. for (size_t i = 0; i < info.size(); ++i) { auto& aggregate = info.at(i); auto parentAndIndex = aggregate.parentAndIndex_.value(); @@ -1434,7 +1434,7 @@ IdTable GroupBy::createResultFromHashMap( // Copy grouped by values for (size_t idx = 0; idx < aggregationData.numOfGroupedColumns_; ++idx) { - std::ranges::copy(sortedKeys.at(idx), result.getColumn(idx).begin()); + ql::ranges::copy(sortedKeys.at(idx), result.getColumn(idx).begin()); } // Initialize evaluation context diff --git a/src/engine/HasPredicateScan.cpp b/src/engine/HasPredicateScan.cpp index b01ede635b..9804c226c4 100644 --- a/src/engine/HasPredicateScan.cpp +++ b/src/engine/HasPredicateScan.cpp @@ -347,7 +347,7 @@ void HasPredicateScan::computeFreeO( for (Id patternId : hasPattern.getColumn(0)) { const auto& pattern = patterns[patternId.getInt()]; resultTable->resize(pattern.size()); - std::ranges::copy(pattern, resultTable->getColumn(0).begin()); + ql::ranges::copy(pattern, resultTable->getColumn(0).begin()); } } diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index 5bf47dd4c8..0cd735863d 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -117,10 +117,10 @@ string IndexScan::getCacheKeyImpl() const { if (graphsToFilter_.has_value()) { // The graphs are stored as a hash set, but we need a deterministic order. std::vector graphIdVec; - std::ranges::transform(graphsToFilter_.value(), - std::back_inserter(graphIdVec), - &TripleComponent::toRdfLiteral); - std::ranges::sort(graphIdVec); + ql::ranges::transform(graphsToFilter_.value(), + std::back_inserter(graphIdVec), + &TripleComponent::toRdfLiteral); + ql::ranges::sort(graphIdVec); os << "\nFiltered by Graphs:"; os << absl::StrJoin(graphIdVec, " "); } @@ -146,8 +146,10 @@ size_t IndexScan::getResultWidth() const { // _____________________________________________________________________________ vector IndexScan::resultSortedOn() const { - auto resAsView = ad_utility::integerRange(ColumnIndex{numVariables_}); - std::vector result{resAsView.begin(), resAsView.end()}; + std::vector result; + for (auto i : ad_utility::integerRange(ColumnIndex{numVariables_})) { + result.push_back(i); + } for (size_t i = 0; i < additionalColumns_.size(); ++i) { if (additionalColumns_.at(i) == ADDITIONAL_COLUMN_GRAPH_ID) { result.push_back(numVariables_ + i); @@ -167,7 +169,7 @@ IndexScan::setPrefilterGetUpdatedQueryExecutionTree( } const auto& [sortedVar, colIdx] = optSortedVarColIdxPair.value(); auto it = - std::ranges::find(prefilterVariablePairs, sortedVar, ad_utility::second); + ql::ranges::find(prefilterVariablePairs, sortedVar, ad_utility::second); if (it != prefilterVariablePairs.end()) { return makeCopyWithAddedPrefilters( std::make_pair(it->first->clone(), colIdx)); @@ -190,7 +192,7 @@ VariableToColumnMap IndexScan::computeVariableToColumnMap() const { addCol(ptr->getVariable()); } } - std::ranges::for_each(additionalVariables_, addCol); + ql::ranges::for_each(additionalVariables_, addCol); return variableToColumnMap; } @@ -285,7 +287,7 @@ void IndexScan::determineMultiplicities() { } }(); for ([[maybe_unused]] size_t i : - std::views::iota(multiplicity_.size(), getResultWidth())) { + ql::views::iota(multiplicity_.size(), getResultWidth())) { multiplicity_.emplace_back(1); } AD_CONTRACT_CHECK(multiplicity_.size() == getResultWidth()); @@ -442,7 +444,7 @@ IndexScan::lazyScanForJoinOfTwoScans(const IndexScan& s1, const IndexScan& s2) { // _____________________________________________________________________________ Permutation::IdTableGenerator IndexScan::lazyScanForJoinOfColumnWithScan( std::span joinColumn) const { - AD_EXPENSIVE_CHECK(std::ranges::is_sorted(joinColumn)); + AD_EXPENSIVE_CHECK(ql::ranges::is_sorted(joinColumn)); AD_CORRECTNESS_CHECK(numVariables_ <= 3 && numVariables_ > 0); AD_CONTRACT_CHECK(joinColumn.empty() || !joinColumn[0].isUndefined()); @@ -545,7 +547,7 @@ struct IndexScan::SharedGeneratorState { } auto& idTable = iterator_.value()->idTable_; auto joinColumn = idTable.getColumn(joinColumn_); - AD_EXPENSIVE_CHECK(std::ranges::is_sorted(joinColumn)); + AD_EXPENSIVE_CHECK(ql::ranges::is_sorted(joinColumn)); AD_CORRECTNESS_CHECK(!joinColumn.empty()); // Skip processing for undef case, it will be handled differently if (hasUndef_) { @@ -564,12 +566,12 @@ struct IndexScan::SharedGeneratorState { // matching blocks. auto startIterator = lastBlockIndex_.has_value() - ? std::ranges::upper_bound(newBlocks, lastBlockIndex_.value(), {}, - &CompressedBlockMetadata::blockIndex_) + ? ql::ranges::upper_bound(newBlocks, lastBlockIndex_.value(), {}, + &CompressedBlockMetadata::blockIndex_) : newBlocks.begin(); lastBlockIndex_ = newBlocks.back().blockIndex_; - std::ranges::move(startIterator, newBlocks.end(), - std::back_inserter(pendingBlocks_)); + ql::ranges::move(startIterator, newBlocks.end(), + std::back_inserter(pendingBlocks_)); } } @@ -588,8 +590,8 @@ Result::Generator IndexScan::createPrefilteredJoinSide( std::shared_ptr innerState) { if (innerState->hasUndef()) { AD_CORRECTNESS_CHECK(innerState->prefetchedValues_.empty()); - for (auto& value : std::ranges::subrange{innerState->iterator_.value(), - innerState->generator_.end()}) { + for (auto& value : ql::ranges::subrange{innerState->iterator_.value(), + innerState->generator_.end()}) { co_yield value; } co_return; diff --git a/src/engine/Join.cpp b/src/engine/Join.cpp index 93557d06e5..b7b25a8e74 100644 --- a/src/engine/Join.cpp +++ b/src/engine/Join.cpp @@ -371,10 +371,10 @@ void Join::join(const IdTable& a, const IdTable& b, IdTable* result) const { // The UNDEF values are right at the start, so this calculation works. size_t numUndefA = - std::ranges::upper_bound(joinColumnL, ValueId::makeUndefined()) - + ql::ranges::upper_bound(joinColumnL, ValueId::makeUndefined()) - joinColumnL.begin(); size_t numUndefB = - std::ranges::upper_bound(joinColumnR, ValueId::makeUndefined()) - + ql::ranges::upper_bound(joinColumnR, ValueId::makeUndefined()) - joinColumnR.begin(); std::pair undefRangeA{joinColumnL.begin(), joinColumnL.begin() + numUndefA}; std::pair undefRangeB{joinColumnR.begin(), joinColumnR.begin() + numUndefB}; @@ -389,11 +389,11 @@ void Join::join(const IdTable& a, const IdTable& b, IdTable* result) const { auto inverseAddRow = [&addRow](const auto& rowA, const auto& rowB) { addRow(rowB, rowA); }; - ad_utility::gallopingJoin(joinColumnR, joinColumnL, std::ranges::less{}, + ad_utility::gallopingJoin(joinColumnR, joinColumnL, ql::ranges::less{}, inverseAddRow, {}, cancellationCallback); } else if (b.size() / a.size() > GALLOP_THRESHOLD && numUndefA == 0 && numUndefB == 0) { - ad_utility::gallopingJoin(joinColumnL, joinColumnR, std::ranges::less{}, + ad_utility::gallopingJoin(joinColumnL, joinColumnR, ql::ranges::less{}, addRow, {}, cancellationCallback); } else { auto findSmallerUndefRangeLeft = @@ -414,12 +414,12 @@ void Join::join(const IdTable& a, const IdTable& b, IdTable* result) const { auto numOutOfOrder = [&]() { if (numUndefB == 0 && numUndefA == 0) { return ad_utility::zipperJoinWithUndef( - joinColumnL, joinColumnR, std::ranges::less{}, addRow, + joinColumnL, joinColumnR, ql::ranges::less{}, addRow, ad_utility::noop, ad_utility::noop, {}, cancellationCallback); } else { return ad_utility::zipperJoinWithUndef( - joinColumnL, joinColumnR, std::ranges::less{}, addRow, + joinColumnL, joinColumnR, ql::ranges::less{}, addRow, findSmallerUndefRangeLeft, findSmallerUndefRangeRight, {}, cancellationCallback); } diff --git a/src/engine/LazyGroupBy.h b/src/engine/LazyGroupBy.h index f95915eb09..389cbdab40 100644 --- a/src/engine/LazyGroupBy.h +++ b/src/engine/LazyGroupBy.h @@ -51,9 +51,9 @@ class LazyGroupBy { auto allAggregateInfoView() const { return aggregateAliases_ | - std::views::transform( + ql::views::transform( &GroupBy::HashMapAliasInformation::aggregateInfo_) | - std::views::join; + ql::views::join; } FRIEND_TEST(LazyGroupBy, verifyGroupConcatIsCorrectlyInitialized); diff --git a/src/engine/LocalVocab.cpp b/src/engine/LocalVocab.cpp index 67ee0c36ab..a50e888aa7 100644 --- a/src/engine/LocalVocab.cpp +++ b/src/engine/LocalVocab.cpp @@ -19,7 +19,7 @@ LocalVocab LocalVocab::clone() const { // _____________________________________________________________________________ LocalVocab LocalVocab::merge(std::span vocabs) { LocalVocab result; - result.mergeWith(vocabs | std::views::transform(ad_utility::dereference)); + result.mergeWith(vocabs | ql::views::transform(ad_utility::dereference)); return result; } @@ -67,9 +67,9 @@ const LocalVocabEntry& LocalVocab::getWord( // _____________________________________________________________________________ std::vector LocalVocab::getAllWordsForTesting() const { std::vector result; - std::ranges::copy(primaryWordSet(), std::back_inserter(result)); + ql::ranges::copy(primaryWordSet(), std::back_inserter(result)); for (const auto& previous : otherWordSets_) { - std::ranges::copy(*previous, std::back_inserter(result)); + ql::ranges::copy(*previous, std::back_inserter(result)); } return result; } diff --git a/src/engine/LocalVocab.h b/src/engine/LocalVocab.h index 28d5ab2dac..e8bd3be550 100644 --- a/src/engine/LocalVocab.h +++ b/src/engine/LocalVocab.h @@ -8,7 +8,6 @@ #include #include -#include #include #include #include @@ -16,6 +15,7 @@ #include #include +#include "backports/algorithm.h" #include "index/LocalVocabEntry.h" #include "util/BlankNodeManager.h" #include "util/Exception.h" @@ -115,7 +115,7 @@ class LocalVocab { // primary set of this `LocalVocab` remains unchanged. template void mergeWith(const R& vocabs) { - using std::views::filter; + using ql::views::filter; auto addWordSet = [this](const std::shared_ptr& set) { bool added = otherWordSets_.insert(set).second; size_ += static_cast(added) * set->size(); @@ -125,7 +125,7 @@ class LocalVocab { // typically don't compare equal to each other because of the`shared_ptr` // semantics. for (const auto& vocab : vocabs | filter(std::not_fn(&LocalVocab::empty))) { - std::ranges::for_each(vocab.otherWordSets_, addWordSet); + ql::ranges::for_each(vocab.otherWordSets_, addWordSet); addWordSet(vocab.primaryWordSet_); } @@ -134,12 +134,12 @@ class LocalVocab { ad_utility::BlankNodeManager::LocalBlankNodeManager; auto localManagersView = vocabs | - std::views::transform([](const LocalVocab& vocab) -> const auto& { + ql::views::transform([](const LocalVocab& vocab) -> const auto& { return vocab.localBlankNodeManager_; }); - auto it = std::ranges::find_if(localManagersView, - [](const auto& l) { return l != nullptr; }); + auto it = ql::ranges::find_if(localManagersView, + [](const auto& l) { return l != nullptr; }); if (it == localManagersView.end()) { return; } diff --git a/src/engine/MultiColumnJoin.cpp b/src/engine/MultiColumnJoin.cpp index 0c92e421c9..bb3e4e5995 100644 --- a/src/engine/MultiColumnJoin.cpp +++ b/src/engine/MultiColumnJoin.cpp @@ -260,12 +260,12 @@ void MultiColumnJoin::computeMultiColumnJoin( if (isCheap) { return ad_utility::zipperJoinWithUndef( leftJoinColumns, rightJoinColumns, - std::ranges::lexicographical_compare, addRow, ad_utility::noop, + ql::ranges::lexicographical_compare, addRow, ad_utility::noop, ad_utility::noop, ad_utility::noop, checkCancellationLambda); } else { return ad_utility::zipperJoinWithUndef( leftJoinColumns, rightJoinColumns, - std::ranges::lexicographical_compare, addRow, findUndef, findUndef, + ql::ranges::lexicographical_compare, addRow, findUndef, findUndef, ad_utility::noop, checkCancellationLambda); } }(); diff --git a/src/engine/Operation.cpp b/src/engine/Operation.cpp index 1a9f53fa76..3a25e7521c 100644 --- a/src/engine/Operation.cpp +++ b/src/engine/Operation.cpp @@ -442,7 +442,7 @@ void Operation::updateRuntimeInformationWhenOptimizedOut( // `totalTime_ - #sum of childrens' total time#` in `getOperationTime()`. // To set it to zero we thus have to set the `totalTime_` to that sum. auto timesOfChildren = _runtimeInfo->children_ | - std::views::transform(&RuntimeInformation::totalTime_); + ql::views::transform(&RuntimeInformation::totalTime_); _runtimeInfo->totalTime_ = std::reduce(timesOfChildren.begin(), timesOfChildren.end(), 0us); @@ -575,7 +575,7 @@ std::optional Operation::getPrimarySortKeyVariable() const { return std::nullopt; } - auto it = std::ranges::find( + auto it = ql::ranges::find( varToColMap, sortedIndices.front(), [](const auto& keyValue) { return keyValue.second.columnIndex_; }); if (it == varToColMap.end()) { diff --git a/src/engine/OptionalJoin.cpp b/src/engine/OptionalJoin.cpp index dea5fcc259..8f009d963e 100644 --- a/src/engine/OptionalJoin.cpp +++ b/src/engine/OptionalJoin.cpp @@ -258,7 +258,7 @@ auto OptionalJoin::computeImplementationFromIdTables( -> Implementation { auto implementation = Implementation::NoUndef; auto anyIsUndefined = [](auto column) { - return std::ranges::any_of(column, &Id::isUndefined); + return ql::ranges::any_of(column, &Id::isUndefined); }; for (size_t i = 0; i < joinColumns.size(); ++i) { auto [leftCol, rightCol] = joinColumns.at(i); @@ -308,7 +308,7 @@ void OptionalJoin::optionalJoin( auto rightPermuted = right.asColumnSubsetView(joinColumnData.permutationRight()); - auto lessThanBoth = std::ranges::lexicographical_compare; + auto lessThanBoth = ql::ranges::lexicographical_compare; auto rowAdder = ad_utility::AddCombinedRowToIdTable( joinColumns.size(), leftPermuted, rightPermuted, std::move(*result), diff --git a/src/engine/OrderBy.cpp b/src/engine/OrderBy.cpp index 8ee3a9c687..5d999e62bc 100644 --- a/src/engine/OrderBy.cpp +++ b/src/engine/OrderBy.cpp @@ -25,7 +25,7 @@ OrderBy::OrderBy(QueryExecutionContext* qec, subtree_{std::move(subtree)}, sortIndices_{std::move(sortIndices)} { AD_CONTRACT_CHECK(!sortIndices_.empty()); - AD_CONTRACT_CHECK(std::ranges::all_of( + AD_CONTRACT_CHECK(ql::ranges::all_of( sortIndices_, [this](ColumnIndex index) { return index < getResultWidth(); }, ad_utility::first)); diff --git a/src/engine/PathSearch.cpp b/src/engine/PathSearch.cpp index 9291fba19a..0a60197341 100644 --- a/src/engine/PathSearch.cpp +++ b/src/engine/PathSearch.cpp @@ -4,7 +4,6 @@ #include "PathSearch.h" -#include #include #include #include @@ -13,6 +12,7 @@ #include #include +#include "backports/algorithm.h" #include "engine/CallFixedSize.h" #include "engine/QueryExecutionTree.h" #include "engine/VariableToColumnMap.h" @@ -31,7 +31,7 @@ BinSearchWrapper::BinSearchWrapper(const IdTable& table, size_t startCol, // _____________________________________________________________________________ std::vector BinSearchWrapper::outgoingEdes(const Id node) const { auto startIds = table_.getColumn(startCol_); - auto range = std::ranges::equal_range(startIds, node); + auto range = ql::ranges::equal_range(startIds, node); auto startIndex = std::distance(startIds.begin(), range.begin()); std::vector edges; @@ -47,7 +47,7 @@ std::vector BinSearchWrapper::outgoingEdes(const Id node) const { std::vector BinSearchWrapper::getSources() const { auto startIds = table_.getColumn(startCol_); std::vector sources; - std::ranges::unique_copy(startIds, std::back_inserter(sources)); + ql::ranges::unique_copy(startIds, std::back_inserter(sources)); return sources; } diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index c9496fe958..7f22de2020 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -182,7 +182,7 @@ std::vector> QueryExecutionTree::getJoinColumns( } } - std::ranges::sort(jcs, std::ranges::lexicographical_compare); + ql::ranges::sort(jcs, ql::ranges::lexicographical_compare); return jcs; } @@ -219,7 +219,7 @@ auto QueryExecutionTree::getSortedSubtreesAndJoinColumns( const VariableToColumnMap::value_type& QueryExecutionTree::getVariableAndInfoByColumnIndex(ColumnIndex colIdx) const { const auto& varColMap = getVariableColumns(); - auto it = std::ranges::find_if(varColMap, [leftCol = colIdx](const auto& el) { + auto it = ql::ranges::find_if(varColMap, [leftCol = colIdx](const auto& el) { return el.second.columnIndex_ == leftCol; }); AD_CONTRACT_CHECK(it != varColMap.end()); diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index a01aaaece5..9e0cd56083 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -8,12 +8,12 @@ #include -#include #include #include #include #include +#include "backports/algorithm.h" #include "engine/Bind.h" #include "engine/CartesianProductJoin.h" #include "engine/CheckUsePatternTrick.h" @@ -131,7 +131,7 @@ std::vector QueryPlanner::createExecutionTrees( // this is handled correctly in all cases. bool doGroupBy = !pq._groupByVariables.empty() || patternTrickTuple.has_value() || - std::ranges::any_of(pq.getAliases(), [](const Alias& alias) { + ql::ranges::any_of(pq.getAliases(), [](const Alias& alias) { return alias._expression.containsAggregate(); }); @@ -786,32 +786,31 @@ auto QueryPlanner::seedWithScansAndText( continue; } - auto addIndexScan = [this, pushPlan, node, - &relevantGraphs = - activeDatasetClauses_.defaultGraphs_]( - Permutation::Enum permutation, - std::optional triple = - std::nullopt) { - if (!triple.has_value()) { - triple = node.triple_.getSimple(); - } + auto addIndexScan = + [this, pushPlan, node, + &relevantGraphs = activeDatasetClauses_.defaultGraphs_]( + Permutation::Enum permutation, + std::optional triple = std::nullopt) { + if (!triple.has_value()) { + triple = node.triple_.getSimple(); + } - // We are inside a `GRAPH ?var {...}` clause, so all index scans have - // to add the graph variable as an additional column. - auto& additionalColumns = triple.value().additionalScanColumns_; - AD_CORRECTNESS_CHECK(!ad_utility::contains( - additionalColumns | std::views::keys, ADDITIONAL_COLUMN_GRAPH_ID)); - if (activeGraphVariable_.has_value()) { - additionalColumns.emplace_back(ADDITIONAL_COLUMN_GRAPH_ID, - activeGraphVariable_.value()); - } + // We are inside a `GRAPH ?var {...}` clause, so all index scans have + // to add the graph variable as an additional column. + auto& additionalColumns = triple.value().additionalScanColumns_; + AD_CORRECTNESS_CHECK(!ad_utility::contains( + additionalColumns | ql::views::keys, ADDITIONAL_COLUMN_GRAPH_ID)); + if (activeGraphVariable_.has_value()) { + additionalColumns.emplace_back(ADDITIONAL_COLUMN_GRAPH_ID, + activeGraphVariable_.value()); + } - // TODO Handle the case, that the Graph variable is also used - // inside the `GRAPH` clause, e.g. by being used inside a triple. + // TODO Handle the case, that the Graph variable is also used + // inside the `GRAPH` clause, e.g. by being used inside a triple. - pushPlan(makeSubtreePlan( - _qec, permutation, std::move(triple.value()), relevantGraphs)); - }; + pushPlan(makeSubtreePlan( + _qec, permutation, std::move(triple.value()), relevantGraphs)); + }; auto addFilter = [&filters = result.filters_](SparqlFilter filter) { filters.push_back(std::move(filter)); @@ -1208,10 +1207,10 @@ void QueryPlanner::applyFiltersIfPossible( continue; } - if (std::ranges::all_of(filters[i].expression_.containedVariables(), - [&plan](const auto& variable) { - return plan._qet->isVariableCovered(*variable); - })) { + if (ql::ranges::all_of(filters[i].expression_.containedVariables(), + [&plan](const auto& variable) { + return plan._qet->isVariableCovered(*variable); + })) { // Apply this filter. SubtreePlan newPlan = makeSubtreePlan(_qec, plan._qet, filters[i].expression_); @@ -1294,12 +1293,12 @@ size_t QueryPlanner::findUniqueNodeIds( const std::vector& connectedComponent) { ad_utility::HashSet uniqueNodeIds; auto nodeIds = connectedComponent | - std::views::transform(&SubtreePlan::_idsOfIncludedNodes); + ql::views::transform(&SubtreePlan::_idsOfIncludedNodes); // Check that all the `_idsOfIncludedNodes` are one-hot encodings of a single // value, i.e. they have exactly one bit set. - AD_CORRECTNESS_CHECK(std::ranges::all_of( + AD_CORRECTNESS_CHECK(ql::ranges::all_of( nodeIds, [](auto nodeId) { return std::popcount(nodeId) == 1; })); - std::ranges::copy(nodeIds, std::inserter(uniqueNodeIds, uniqueNodeIds.end())); + ql::ranges::copy(nodeIds, std::inserter(uniqueNodeIds, uniqueNodeIds.end())); return uniqueNodeIds.size(); } @@ -1341,10 +1340,9 @@ size_t QueryPlanner::countSubgraphs( std::vector graph, size_t budget) { // Remove duplicate plans from `graph`. auto getId = [](const SubtreePlan* v) { return v->_idsOfIncludedNodes; }; - std::ranges::sort(graph, std::ranges::less{}, getId); - graph.erase( - std::ranges::unique(graph, std::ranges::equal_to{}, getId).begin(), - graph.end()); + ql::ranges::sort(graph, ql::ranges::less{}, getId); + graph.erase(std::ranges::unique(graph, ql::ranges::equal_to{}, getId).begin(), + graph.end()); // Qlever currently limits the number of triples etc. per group to be <= 64 // anyway, so we can simply assert here. @@ -1409,7 +1407,7 @@ vector> QueryPlanner::fillDpTab( const vector>& children) { auto [initialPlans, additionalFilters] = seedWithScansAndText(tg, children, textLimits); - std::ranges::move(additionalFilters, std::back_inserter(filters)); + ql::ranges::move(additionalFilters, std::back_inserter(filters)); if (filters.size() > 64) { AD_THROW("At most 64 filters allowed at the moment."); } @@ -1419,7 +1417,7 @@ vector> QueryPlanner::fillDpTab( components[componentIndices.at(i)].push_back(std::move(initialPlans.at(i))); } vector> lastDpRowFromComponents; - for (auto& component : components | std::views::values) { + for (auto& component : components | ql::views::values) { std::vector g; for (const auto& plan : component) { g.push_back(&plan); @@ -1461,9 +1459,9 @@ vector> QueryPlanner::fillDpTab( uint64_t nodes = 0; uint64_t filterIds = 0; uint64_t textLimitIds = 0; - std::ranges::for_each( + ql::ranges::for_each( lastDpRowFromComponents | - std::views::transform([this](auto& vec) -> decltype(auto) { + ql::views::transform([this](auto& vec) -> decltype(auto) { return vec.at(findCheapestExecutionTree(vec)); }), [&](SubtreePlan& plan) { @@ -1603,7 +1601,7 @@ vector QueryPlanner::TripleGraph::pickFilters( coveredVariables.insert(node._variables.begin(), node._variables.end()); } for (auto& f : origFilters) { - if (std::ranges::any_of( + if (ql::ranges::any_of( f.expression_.containedVariables(), [&](const auto* var) { return coveredVariables.contains(*var); })) { ret.push_back(f); @@ -1775,7 +1773,7 @@ size_t QueryPlanner::findCheapestExecutionTree( return aCost < bCost; } }; - return std::ranges::min_element(lastRow, compare) - lastRow.begin(); + return ql::ranges::min_element(lastRow, compare) - lastRow.begin(); }; // _________________________________________________________________________________ @@ -1788,7 +1786,7 @@ size_t QueryPlanner::findSmallestExecutionTree( }; return tie(a) < tie(b); }; - return std::ranges::min_element(lastRow, compare) - lastRow.begin(); + return ql::ranges::min_element(lastRow, compare) - lastRow.begin(); }; // _____________________________________________________________________________ @@ -2138,7 +2136,7 @@ void QueryPlanner::QueryGraph::setupGraph( ad_utility::HashMap> result; for (const auto& node : nodes_) { for (const auto& var : - node->plan_->_qet->getVariableColumns() | std::views::keys) { + node->plan_->_qet->getVariableColumns() | ql::views::keys) { result[var].push_back(node.get()); } } @@ -2150,8 +2148,8 @@ void QueryPlanner::QueryGraph::setupGraph( ad_utility::HashMap> adjacentNodes = [&varToNode]() { ad_utility::HashMap> result; - for (auto& nodesThatContainSameVar : varToNode | std::views::values) { - // TODO Use std::views::cartesian_product + for (auto& nodesThatContainSameVar : varToNode | ql::views::values) { + // TODO Use ql::views::cartesian_product for (auto* n1 : nodesThatContainSameVar) { for (auto* n2 : nodesThatContainSameVar) { if (n1 != n2) { @@ -2216,12 +2214,12 @@ void QueryPlanner::GraphPatternPlanner::visitGroupOptionalOrMinus( // Optionals that occur before any of their variables have been bound, // actually behave like ordinary (Group)GraphPatterns. - auto variables = candidates[0]._qet->getVariableColumns() | std::views::keys; + auto variables = candidates[0]._qet->getVariableColumns() | ql::views::keys; using enum SubtreePlan::Type; if (auto type = candidates[0].type; (type == OPTIONAL || type == MINUS) && - std::ranges::all_of(variables, [this](const Variable& var) { + ql::ranges::all_of(variables, [this](const Variable& var) { return !boundVariables_.contains(var); })) { // A MINUS clause that doesn't share any variable with the preceding @@ -2240,7 +2238,7 @@ void QueryPlanner::GraphPatternPlanner::visitGroupOptionalOrMinus( // All variables seen so far are considered bound and cannot appear as the // RHS of a BIND operation. This is also true for variables from OPTIONALs // and MINUS clauses (this used to be a bug in an old version of the code). - std::ranges::for_each( + ql::ranges::for_each( variables, [this](const Variable& var) { boundVariables_.insert(var); }); // If our input is not OPTIONAL and not a MINUS, this means that we can still @@ -2568,9 +2566,9 @@ void QueryPlanner::GraphPatternPlanner::visitSubquery( plan._qet->getRootOperation()->setSelectedVariablesForSubquery( selectedVariables); }; - std::ranges::for_each(candidatesForSubquery, setSelectedVariables); + ql::ranges::for_each(candidatesForSubquery, setSelectedVariables); // A subquery must also respect LIMIT and OFFSET clauses - std::ranges::for_each(candidatesForSubquery, [&](SubtreePlan& plan) { + ql::ranges::for_each(candidatesForSubquery, [&](SubtreePlan& plan) { plan._qet->getRootOperation()->setLimit(arg.get()._limitOffset); }); visitGroupOptionalOrMinus(std::move(candidatesForSubquery)); diff --git a/src/engine/Result.cpp b/src/engine/Result.cpp index bc731e2433..a1cfa10583 100644 --- a/src/engine/Result.cpp +++ b/src/engine/Result.cpp @@ -92,7 +92,7 @@ Result::Result(Generator idTables, std::vector sortedBy) // _____________________________________________________________________________ // Apply `LimitOffsetClause` to given `IdTable`. void resizeIdTable(IdTable& idTable, const LimitOffsetClause& limitOffset) { - std::ranges::for_each( + ql::ranges::for_each( idTable.getColumns(), [offset = limitOffset.actualOffset(idTable.numRows()), upperBound = @@ -178,10 +178,10 @@ void Result::assertThatLimitWasRespected(const LimitOffsetClause& limitOffset) { // _____________________________________________________________________________ void Result::checkDefinedness(const VariableToColumnMap& varColMap) { auto performCheck = [](const auto& map, IdTable& idTable) { - return std::ranges::all_of(map, [&](const auto& varAndCol) { + return ql::ranges::all_of(map, [&](const auto& varAndCol) { const auto& [columnIndex, mightContainUndef] = varAndCol.second; if (mightContainUndef == ColumnIndexAndTypeInfo::AlwaysDefined) { - return std::ranges::all_of(idTable.getColumn(columnIndex), [](Id id) { + return ql::ranges::all_of(idTable.getColumn(columnIndex), [](Id id) { return id.getDatatype() != Datatype::Undefined; }); } @@ -239,12 +239,12 @@ void Result::runOnNewChunkComputed( void Result::assertSortOrderIsRespected( const IdTable& idTable, const std::vector& sortedBy) { AD_CONTRACT_CHECK( - std::ranges::all_of(sortedBy, [&idTable](ColumnIndex colIndex) { + ql::ranges::all_of(sortedBy, [&idTable](ColumnIndex colIndex) { return colIndex < idTable.numColumns(); })); AD_EXPENSIVE_CHECK( - std::ranges::is_sorted(idTable, compareRowsBySortColumns(sortedBy))); + ql::ranges::is_sorted(idTable, compareRowsBySortColumns(sortedBy))); } // _____________________________________________________________________________ diff --git a/src/engine/RuntimeInformation.cpp b/src/engine/RuntimeInformation.cpp index 2e9abd05c1..f9f2d851c8 100644 --- a/src/engine/RuntimeInformation.cpp +++ b/src/engine/RuntimeInformation.cpp @@ -109,9 +109,9 @@ void RuntimeInformation::setColumnNames(const VariableToColumnMap& columnMap) { // Resize the `columnNames_` vector such that we can use the keys from // columnMap (which are not necessarily consecutive) as indexes. - ColumnIndex maxColumnIndex = std::ranges::max( - columnMap | std::views::values | - std::views::transform(&ColumnIndexAndTypeInfo::columnIndex_)); + ColumnIndex maxColumnIndex = ql::ranges::max( + columnMap | ql::views::values | + ql::views::transform(&ColumnIndexAndTypeInfo::columnIndex_)); columnNames_.resize(maxColumnIndex + 1); // Now copy the `variable, index` pairs from the map to the vector. If the @@ -145,7 +145,7 @@ std::chrono::microseconds RuntimeInformation::getOperationTime() const { // computing that child is *not* included in this operation's // `totalTime_`. That's why we skip such children in the following loop. auto timesOfChildren = - children_ | std::views::transform(&RuntimeInformation::totalTime_); + children_ | ql::views::transform(&RuntimeInformation::totalTime_); // Prevent "negative" computation times in case totalTime_ was not // computed for this yet. return std::max(0us, totalTime_ - std::reduce(timesOfChildren.begin(), diff --git a/src/engine/Service.cpp b/src/engine/Service.cpp index 8c946a2fb3..81df6be64c 100644 --- a/src/engine/Service.cpp +++ b/src/engine/Service.cpp @@ -175,9 +175,9 @@ ProtoResult Service::computeResultImpl([[maybe_unused]] bool requestLaziness) { // for the variables sent in the response as they're maybe not read before // the bindings. std::vector expVariableKeys; - std::ranges::transform(parsedServiceClause_.visibleVariables_, - std::back_inserter(expVariableKeys), - [](const Variable& v) { return v.name().substr(1); }); + ql::ranges::transform(parsedServiceClause_.visibleVariables_, + std::back_inserter(expVariableKeys), + [](const Variable& v) { return v.name().substr(1); }); auto body = ad_utility::LazyJsonParser::parse(std::move(response.body_), {"results", "bindings"}); @@ -569,7 +569,7 @@ void Service::precomputeSiblingResult(std::shared_ptr left, for (auto& pair : pairs) { co_yield pair; } - for (auto& pair : std::ranges::subrange{it, prevGenerator.end()}) { + for (auto& pair : ql::ranges::subrange{it, prevGenerator.end()}) { co_yield pair; } }; diff --git a/src/engine/SpatialJoinAlgorithms.cpp b/src/engine/SpatialJoinAlgorithms.cpp index f95efa7154..82d5102df6 100644 --- a/src/engine/SpatialJoinAlgorithms.cpp +++ b/src/engine/SpatialJoinAlgorithms.cpp @@ -407,7 +407,7 @@ bool SpatialJoinAlgorithms::isContainedInBoundingBoxes( const std::vector& boundingBox, Point point) const { convertToNormalCoordinates(point); - return std::ranges::any_of(boundingBox, [point](const Box& aBox) { + return ql::ranges::any_of(boundingBox, [point](const Box& aBox) { return boost::geometry::covered_by(point, aBox); }); } @@ -512,11 +512,11 @@ Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() { std::vector bbox = computeBoundingBox(p); results.clear(); - std::ranges::for_each(bbox, [&](const Box& bbox) { + ql::ranges::for_each(bbox, [&](const Box& bbox) { rtree.query(bgi::intersects(bbox), std::back_inserter(results)); }); - std::ranges::for_each(results, [&](const Value& res) { + ql::ranges::for_each(results, [&](const Value& res) { size_t rowLeft = res.second; size_t rowRight = i; if (!leftResSmaller) { diff --git a/src/engine/TextIndexScanForWord.cpp b/src/engine/TextIndexScanForWord.cpp index 6e141ad518..612780e28b 100644 --- a/src/engine/TextIndexScanForWord.cpp +++ b/src/engine/TextIndexScanForWord.cpp @@ -22,7 +22,7 @@ ProtoResult TextIndexScanForWord::computeResult( IdTable smallIdTable{getExecutionContext()->getAllocator()}; smallIdTable.setNumColumns(1); smallIdTable.resize(idTable.numRows()); - std::ranges::copy(idTable.getColumn(0), smallIdTable.getColumn(0).begin()); + ql::ranges::copy(idTable.getColumn(0), smallIdTable.getColumn(0).begin()); return {std::move(smallIdTable), resultSortedOn(), LocalVocab{}}; } diff --git a/src/engine/TextLimit.cpp b/src/engine/TextLimit.cpp index 4125f676cd..6ec7b7868f 100644 --- a/src/engine/TextLimit.cpp +++ b/src/engine/TextLimit.cpp @@ -34,11 +34,11 @@ ProtoResult TextLimit::computeResult([[maybe_unused]] bool requestLaziness) { auto compareScores = [this](const auto& lhs, const auto& rhs) { size_t lhsScore = 0; size_t rhsScore = 0; - std::ranges::for_each(scoreColumns_, - [&lhs, &rhs, &lhsScore, &rhsScore](const auto& col) { - lhsScore += lhs[col].getInt(); - rhsScore += rhs[col].getInt(); - }); + ql::ranges::for_each(scoreColumns_, + [&lhs, &rhs, &lhsScore, &rhsScore](const auto& col) { + lhsScore += lhs[col].getInt(); + rhsScore += rhs[col].getInt(); + }); if (lhsScore > rhsScore) { return 1; } else if (lhsScore < rhsScore) { @@ -49,7 +49,7 @@ ProtoResult TextLimit::computeResult([[maybe_unused]] bool requestLaziness) { auto compareEntities = [this](const auto& lhs, const auto& rhs) { auto it = - std::ranges::find_if(entityColumns_, [&lhs, &rhs](const auto& col) { + ql::ranges::find_if(entityColumns_, [&lhs, &rhs](const auto& col) { return lhs[col] < rhs[col] || lhs[col] > rhs[col]; }); @@ -64,8 +64,8 @@ ProtoResult TextLimit::computeResult([[maybe_unused]] bool requestLaziness) { return 0; }; - std::ranges::sort(idTable, [this, compareScores, compareEntities]( - const auto& lhs, const auto& rhs) { + ql::ranges::sort(idTable, [this, compareScores, compareEntities]( + const auto& lhs, const auto& rhs) { return compareEntities(lhs, rhs) == 1 || (compareEntities(lhs, rhs) == 0 && (compareScores(lhs, rhs) == 1 || diff --git a/src/engine/TransitivePathBase.cpp b/src/engine/TransitivePathBase.cpp index 1db8a7eb0d..3d6ec38262 100644 --- a/src/engine/TransitivePathBase.cpp +++ b/src/engine/TransitivePathBase.cpp @@ -363,7 +363,7 @@ std::shared_ptr TransitivePathBase::bindLeftOrRightSide( maxDist_)); } - auto& p = *std::ranges::min_element( + auto& p = *ql::ranges::min_element( candidates, {}, [](const auto& tree) { return tree->getCostEstimate(); }); // Note: The `variable` in the following structured binding is `const`, even diff --git a/src/engine/TransitivePathBinSearch.h b/src/engine/TransitivePathBinSearch.h index 7928cba9c1..4973d9da5e 100644 --- a/src/engine/TransitivePathBinSearch.h +++ b/src/engine/TransitivePathBinSearch.h @@ -47,7 +47,7 @@ struct BinSearchMap { * startIds_ == node. */ auto successors(const Id node) const { - auto range = std::ranges::equal_range(startIds_, node); + auto range = ql::ranges::equal_range(startIds_, node); auto startIndex = std::distance(startIds_.begin(), range.begin()); diff --git a/src/engine/Union.cpp b/src/engine/Union.cpp index 0fac7b31f3..46e5d06f4a 100644 --- a/src/engine/Union.cpp +++ b/src/engine/Union.cpp @@ -42,7 +42,7 @@ Union::Union(QueryExecutionContext* qec, _columnOrigins[it.second.columnIndex_][1] = NO_COLUMN; } } - AD_CORRECTNESS_CHECK(std::ranges::all_of(_columnOrigins, [](const auto& el) { + AD_CORRECTNESS_CHECK(ql::ranges::all_of(_columnOrigins, [](const auto& el) { return el[0] != NO_COLUMN || el[1] != NO_COLUMN; })); } @@ -75,7 +75,7 @@ VariableToColumnMap Union::computeVariableToColumnMap() const { // A variable is only guaranteed to always be bound if it exists in all the // subtrees and if it is guaranteed to be bound in all the subtrees. auto mightContainUndef = [this](const Variable& var) { - return std::ranges::any_of( + return ql::ranges::any_of( _subtrees, [&](const std::shared_ptr& subtree) { const auto& varCols = subtree->getVariableColumns(); return !varCols.contains(var) || @@ -86,7 +86,7 @@ VariableToColumnMap Union::computeVariableToColumnMap() const { // Note: it is tempting to declare `nextColumnIndex` inside the lambda // `addVariableColumnIfNotExists`, but that doesn't work because - // `std::ranges::for_each` takes the lambda by value and creates a new + // `ql::ranges::for_each` takes the lambda by value and creates a new // variable at every invocation. size_t nextColumnIndex = 0; auto addVariableColumnIfNotExists = @@ -102,14 +102,13 @@ VariableToColumnMap Union::computeVariableToColumnMap() const { } }; - auto addVariablesForSubtree = - [&addVariableColumnIfNotExists](const auto& subtree) { - std::ranges::for_each( - copySortedByColumnIndex(subtree->getVariableColumns()), - addVariableColumnIfNotExists); - }; + auto addVariablesForSubtree = [&addVariableColumnIfNotExists]( + const auto& subtree) { + ql::ranges::for_each(copySortedByColumnIndex(subtree->getVariableColumns()), + addVariableColumnIfNotExists); + }; - std::ranges::for_each(_subtrees, addVariablesForSubtree); + ql::ranges::for_each(_subtrees, addVariablesForSubtree); return variableColumns; } @@ -203,7 +202,7 @@ IdTable Union::computeUnion( [this]() { checkCancellation(); }); } else { ad_utility::chunkedFill( - std::ranges::subrange{ + ql::ranges::subrange{ targetColumn.begin() + offset, targetColumn.begin() + offset + inputTable.size()}, Id::makeUndefined(), chunkSize, [this]() { checkCancellation(); }); diff --git a/src/engine/Values.cpp b/src/engine/Values.cpp index 181f2f4207..7a4535ceed 100644 --- a/src/engine/Values.cpp +++ b/src/engine/Values.cpp @@ -18,7 +18,7 @@ Values::Values(QueryExecutionContext* qec, SparqlValues parsedValues) : Operation(qec), parsedValues_(std::move(parsedValues)) { AD_CONTRACT_CHECK( - std::ranges::all_of(parsedValues_._values, [&](const auto& row) { + ql::ranges::all_of(parsedValues_._values, [&](const auto& row) { return row.size() == parsedValues_._variables.size(); })); } diff --git a/src/engine/VariableToColumnMap.cpp b/src/engine/VariableToColumnMap.cpp index 00eeb64f47..ff7f221a2b 100644 --- a/src/engine/VariableToColumnMap.cpp +++ b/src/engine/VariableToColumnMap.cpp @@ -11,8 +11,8 @@ std::vector> copySortedByColumnIndex(VariableToColumnMap map) { std::vector> result{ std::make_move_iterator(map.begin()), std::make_move_iterator(map.end())}; - std::ranges::sort(result, std::less<>{}, - [](const auto& pair) { return pair.second.columnIndex_; }); + ql::ranges::sort(result, std::less<>{}, + [](const auto& pair) { return pair.second.columnIndex_; }); return result; } @@ -34,7 +34,7 @@ VariableToColumnMap makeVarToColMapForJoinOperation( const auto& colIdxRight = columnIndexWithType.columnIndex_; // Figure out if the column (from the right operand) is a join column. auto joinColumnIt = - std::ranges::find(joinColumns, colIdxRight, ad_utility::second); + ql::ranges::find(joinColumns, colIdxRight, ad_utility::second); if (joinColumnIt != joinColumns.end()) { // For non-optional joins, a join column is `AlwaysDefined` if it is // always defined in ANY of the inputs. For optional joins a join column diff --git a/src/engine/VariableToColumnMap.h b/src/engine/VariableToColumnMap.h index 7b31b5222f..e0ad443baf 100644 --- a/src/engine/VariableToColumnMap.h +++ b/src/engine/VariableToColumnMap.h @@ -8,6 +8,10 @@ #include "parser/data/Variable.h" #include "util/HashMap.h" +// TODO We have a cyclic dependency between `Id.h` and +// `VariableToColumnMap.h`. +using ColumnIndex = uint64_t; + // Store an index of a column together with additional information about that // column which can be inferred from the `QueryExecutionTree` without actually // computing the result. diff --git a/src/engine/idTable/CompressedExternalIdTable.h b/src/engine/idTable/CompressedExternalIdTable.h index 962d18b6aa..12bc406e16 100644 --- a/src/engine/idTable/CompressedExternalIdTable.h +++ b/src/engine/idTable/CompressedExternalIdTable.h @@ -7,10 +7,10 @@ #include -#include #include #include +#include "backports/algorithm.h" #include "engine/CallFixedSize.h" #include "engine/idTable/IdTable.h" #include "util/AsyncStream.h" @@ -116,12 +116,12 @@ class CompressedExternalIdTableWriter { // fine-grained) but only once we have a reasonable abstraction for // parallelism. std::vector> compressColumFutures; - for (auto i : std::views::iota(0u, numColumns())) { + for (auto i : ql::views::iota(0u, numColumns())) { compressColumFutures.push_back( std::async(std::launch::async, [this, i, blockSize, &table]() { auto& blockMetadata = blocksPerColumn_.at(i); decltype(auto) column = table.getColumn(i); - // TODO Use `std::views::chunk` + // TODO Use `ql::views::chunkd` for (size_t lower = 0; lower < column.size(); lower += blockSize) { size_t upper = std::min(lower + blockSize, column.size()); auto thisBlockSizeUncompressed = (upper - lower) * sizeof(Id); @@ -151,7 +151,7 @@ class CompressedExternalIdTableWriter { file_.wlock()->flush(); std::vector>> result; result.reserve(startOfSingleIdTables_.size()); - for (auto i : std::views::iota(0u, startOfSingleIdTables_.size())) { + for (auto i : ql::views::iota(0u, startOfSingleIdTables_.size())) { result.push_back(makeGeneratorForIdTable(i)); } return result; @@ -164,7 +164,7 @@ class CompressedExternalIdTableWriter { file_.wlock()->flush(); std::vector(0))> result; result.reserve(startOfSingleIdTables_.size()); - for (auto i : std::views::iota(0u, startOfSingleIdTables_.size())) { + for (auto i : ql::views::iota(0u, startOfSingleIdTables_.size())) { result.push_back(makeGeneratorForRows(i)); } return result; @@ -173,8 +173,9 @@ class CompressedExternalIdTableWriter { template auto getGeneratorForAllRows() { // Note: As soon as we drop the support for GCC11 this can be - // `return getAllRowGenerators() | std::views::join; - return std::views::join(ad_utility::OwningView{getAllRowGenerators()}); + // `return getAllRowGenerators() | ql::views::join; + return ql::views::join( + ad_utility::OwningViewNoConst{getAllRowGenerators()}); } // Clear the underlying file and completely reset the data structure s.t. it @@ -189,7 +190,7 @@ class CompressedExternalIdTableWriter { file_.wlock()->close(); ad_utility::deleteFile(filename_); file_.wlock()->open(filename_, "w+"); - std::ranges::for_each(blocksPerColumn_, [](auto& block) { block.clear(); }); + ql::ranges::for_each(blocksPerColumn_, [](auto& block) { block.clear(); }); startOfSingleIdTables_.clear(); } @@ -197,7 +198,7 @@ class CompressedExternalIdTableWriter { // Get the row generator for a single IdTable, specified by the `index`. template auto makeGeneratorForRows(size_t index) { - return std::views::join( + return ql::views::join( ad_utility::OwningView{makeGeneratorForIdTable(index)}); } // Get the block generator for a single IdTable, specified by the `index`. @@ -255,7 +256,7 @@ class CompressedExternalIdTableWriter { blocksPerColumn_.at(0).at(blockIdx).uncompressedSize_ / sizeof(Id); block.resize(blockSize); std::vector> readColumnFutures; - for (auto i : std::views::iota(0u, numColumns())) { + for (auto i : ql::views::iota(0u, numColumns())) { readColumnFutures.push_back( std::async(std::launch::async, [&block, this, i, blockIdx]() { decltype(auto) col = block.getColumn(i); @@ -478,10 +479,10 @@ class CompressedExternalIdTable co_yield block; }(this->currentBlock_); auto rowView = - std::views::join(ad_utility::OwningView{std::move(generator)}); + ql::views::join(ad_utility::OwningView{std::move(generator)}); std::vector vec; vec.push_back(std::move(rowView)); - return std::views::join(ad_utility::OwningView(std::move(vec))); + return ql::views::join(ad_utility::OwningViewNoConst(std::move(vec))); } this->pushBlock(std::move(this->currentBlock_)); this->resetCurrentBlock(false); @@ -534,7 +535,7 @@ struct BlockSorter { #ifdef _PARALLEL_SORT ad_utility::parallel_sort(std::begin(block), std::end(block), comparator_); #else - std::ranges::sort(block, comparator_); + ql::ranges::sort(block, comparator_); #endif } }; @@ -612,7 +613,7 @@ class CompressedExternalIdTableSorter // one. Either this function or the following function must be called exactly // once. auto sortedView() { - return std::views::join(ad_utility::OwningView{getSortedBlocks()}); + return ql::views::join(ad_utility::OwningView{getSortedBlocks()}); } // Similar to `sortedView` (see above), but the elements are yielded in @@ -642,8 +643,8 @@ class CompressedExternalIdTableSorter // once. void pushBlock(const IdTableStatic<0>& block) override { AD_CONTRACT_CHECK(block.numColumns() == this->numColumns_); - std::ranges::for_each(block, - [ptr = this](const auto& row) { ptr->push(row); }); + ql::ranges::for_each(block, + [ptr = this](const auto& row) { ptr->push(row); }); } // The implementation of the type-erased interface. Get the sorted blocks as @@ -680,7 +681,7 @@ class CompressedExternalIdTableSorter co_yield blockAsStatic; } } else { - // TODO Use `std::views::chunk`. + // TODO Use `ql::views::chunkd`. for (size_t i = 0; i < block.numRows(); i += blocksizeOutput) { size_t upper = std::min(i + blocksizeOutput, block.numRows()); auto curBlock = IdTableStatic( @@ -749,7 +750,7 @@ class CompressedExternalIdTableSorter #ifdef _PARALLEL_SORT ad_utility::parallel_sort(block.begin(), block.end(), comparator_); #else - std::ranges::sort(block, comparator_); + ql::ranges::sort(block, comparator_); #endif } diff --git a/src/engine/idTable/IdTable.h b/src/engine/idTable/IdTable.h index fb28b4aa99..4ffe33138a 100644 --- a/src/engine/idTable/IdTable.h +++ b/src/engine/idTable/IdTable.h @@ -214,7 +214,7 @@ class IdTable { if (data().size() > numColumns_) { data().erase(data().begin() + numColumns_, data().end()); } - AD_CONTRACT_CHECK(std::ranges::all_of( + AD_CONTRACT_CHECK(ql::ranges::all_of( data(), [](const auto& column) { return column.empty(); })); } @@ -263,7 +263,7 @@ class IdTable { AD_CORRECTNESS_CHECK(numColumns == NumColumns); } AD_CORRECTNESS_CHECK(this->data().size() == numColumns_); - AD_CORRECTNESS_CHECK(std::ranges::all_of( + AD_CORRECTNESS_CHECK(ql::ranges::all_of( this->data(), [this](const auto& column) { return column.size() == numRows_; })); } @@ -382,8 +382,8 @@ class IdTable { // Note: The semantics of this function is similar to `std::vector::resize`. // To set the capacity, use the `reserve` function. void resize(size_t numRows) requires(!isView) { - std::ranges::for_each(data(), - [numRows](auto& column) { column.resize(numRows); }); + ql::ranges::for_each(data(), + [numRows](auto& column) { column.resize(numRows); }); numRows_ = numRows; } @@ -394,8 +394,8 @@ class IdTable { // of the next `numRows - size()` elements (via `insert` or `push_back`) can // be done in O(1) time without dynamic allocations. void reserve(size_t numRows) requires(!isView) { - std::ranges::for_each(data(), - [numRows](auto& column) { column.reserve(numRows); }); + ql::ranges::for_each(data(), + [numRows](auto& column) { column.reserve(numRows); }); } // Delete all the elements, but keep the allocated memory (`capacityRows_` @@ -403,14 +403,14 @@ class IdTable { // `shrinkToFit()` after calling `clear()` . void clear() requires(!isView) { numRows_ = 0; - std::ranges::for_each(data(), [](auto& column) { column.clear(); }); + ql::ranges::for_each(data(), [](auto& column) { column.clear(); }); } // Adjust the capacity to exactly match the size. This optimizes the memory // consumption of this table. This operation runs in O(size()), allocates // memory, and invalidates all iterators. void shrinkToFit() requires(!isView) { - std::ranges::for_each(data(), [](auto& column) { column.shrink_to_fit(); }); + ql::ranges::for_each(data(), [](auto& column) { column.shrink_to_fit(); }); } // Note: The following functions `emplace_back` and `push_back` all have the @@ -421,7 +421,7 @@ class IdTable { // Insert a new uninitialized row at the end. void emplace_back() requires(!isView) { - std::ranges::for_each(data(), [](auto& column) { column.emplace_back(); }); + ql::ranges::for_each(data(), [](auto& column) { column.emplace_back(); }); ++numRows_; } @@ -434,10 +434,10 @@ class IdTable { void push_back(const RowLike& newRow) requires(!isView) { AD_EXPENSIVE_CHECK(newRow.size() == numColumns()); ++numRows_; - std::ranges::for_each(ad_utility::integerRange(numColumns()), - [this, &newRow](auto i) { - data()[i].push_back(*(std::begin(newRow) + i)); - }); + ql::ranges::for_each(ad_utility::integerRange(numColumns()), + [this, &newRow](auto i) { + data()[i].push_back(*(std::begin(newRow) + i)); + }); } void push_back(const std::initializer_list& newRow) requires(!isView) { @@ -482,7 +482,7 @@ class IdTable { AD_CONTRACT_CHECK(newColumns.size() >= numColumns()); Data newStorage(std::make_move_iterator(newColumns.begin()), std::make_move_iterator(newColumns.begin() + numColumns())); - std::ranges::for_each( + ql::ranges::for_each( ad_utility::integerRange(numColumns()), [this, &newStorage](auto i) { newStorage[i].insert(newStorage[i].end(), data()[i].begin(), data()[i].end()); @@ -549,7 +549,7 @@ class IdTable { // the argument `columnIndices`. IdTable asColumnSubsetView( std::span columnIndices) const requires isDynamic { - AD_CONTRACT_CHECK(std::ranges::all_of( + AD_CONTRACT_CHECK(ql::ranges::all_of( columnIndices, [this](size_t idx) { return idx < numColumns(); })); ViewSpans viewSpans; viewSpans.reserve(columnIndices.size()); @@ -574,7 +574,7 @@ class IdTable { // First check that the `subset` is indeed a subset of the column // indices. std::vector check{subset.begin(), subset.end()}; - std::ranges::sort(check); + ql::ranges::sort(check); AD_CONTRACT_CHECK(std::unique(check.begin(), check.end()) == check.end()); AD_CONTRACT_CHECK(!subset.empty() && subset.back() < numColumns()); @@ -586,7 +586,7 @@ class IdTable { Data newData; newData.reserve(subset.size()); - std::ranges::for_each(subset, [this, &newData](ColumnIndex colIdx) { + ql::ranges::for_each(subset, [this, &newData](ColumnIndex colIdx) { newData.push_back(std::move(data().at(colIdx))); }); data() = std::move(newData); @@ -691,12 +691,12 @@ class IdTable { auto numInserted = end - begin; auto oldSize = size(); resize(numRows() + numInserted); - std::ranges::for_each( - ad_utility::integerRange(numColumns()), - [this, &table, oldSize, begin, numInserted](size_t i) { - std::ranges::copy(table.getColumn(i).subspan(begin, numInserted), - getColumn(i).begin() + oldSize); - }); + ql::ranges::for_each(ad_utility::integerRange(numColumns()), + [this, &table, oldSize, begin, numInserted](size_t i) { + ql::ranges::copy( + table.getColumn(i).subspan(begin, numInserted), + getColumn(i).begin() + oldSize); + }); } // Check whether two `IdTables` have the same content. Mostly used for unit @@ -710,7 +710,7 @@ class IdTable { } // TODO This can be implemented using `zip_view` and - // `std::ranges::all_of`. The iteration over the columns is cache-friendly. + // `ql::ranges::all_of`. The iteration over the columns is cache-friendly. const auto& cols = getColumns(); const auto& otherCols = other.getColumns(); for (size_t i = 0; i < numColumns(); ++i) { @@ -795,7 +795,7 @@ class IdTableStatic friend std::ostream& operator<<(std::ostream& os, const IdTableStatic& idTable) { os << "{ "; - std::ranges::copy( + ql::ranges::copy( idTable, std::ostream_iterator>(os, " ")); os << "}"; return os; diff --git a/src/engine/idTable/IdTableRow.h b/src/engine/idTable/IdTableRow.h index 4ab4c77776..23290d2305 100644 --- a/src/engine/idTable/IdTableRow.h +++ b/src/engine/idTable/IdTableRow.h @@ -91,7 +91,7 @@ class Row { explicit operator std::array() const requires(numStaticColumns != 0) { std::array result; - std::ranges::copy(*this, result.begin()); + ql::ranges::copy(*this, result.begin()); return result; } @@ -282,7 +282,7 @@ class RowReferenceImpl { explicit operator std::array() const requires(numStaticColumns != 0) { std::array result; - std::ranges::copy(*this, result.begin()); + ql::ranges::copy(*this, result.begin()); return result; } @@ -319,18 +319,19 @@ class RowReferenceImpl { // This strange overload needs to be declared to make `Row` a // `std::random_access_range` that can be used e.g. with - // `std::ranges::sort`. There is no need to define it, as it is only + // `ql::ranges::sort`. There is no need to define it, as it is only // needed to fulfill the concept `std::indirectly_writable`. For more // details on this "esoteric" overload see the notes at the end of // `https://en.cppreference.com/w/cpp/iterator/indirectly_writable` This& operator=(const Row& other) const&&; - protected: // No need to copy this internal type, but the implementation of the - // `RowReference` class below requires it, - // so the copy Constructor is protected. + // `RowReference` class and the `input_range` concept from `range-v3` + // require it. RowReferenceWithRestrictedAccess(const RowReferenceWithRestrictedAccess&) = default; + RowReferenceWithRestrictedAccess(RowReferenceWithRestrictedAccess&&) = + default; }; }; diff --git a/src/engine/sparqlExpressions/CountStarExpression.cpp b/src/engine/sparqlExpressions/CountStarExpression.cpp index e4abc2f20a..45cd94314f 100644 --- a/src/engine/sparqlExpressions/CountStarExpression.cpp +++ b/src/engine/sparqlExpressions/CountStarExpression.cpp @@ -37,22 +37,22 @@ ExpressionResult CountStarExpression::evaluate( // part of the DISTINCT computation. auto varToColNoInternalVariables = - ctx->_variableToColumnMap | std::views::filter([](const auto& varAndIdx) { + ctx->_variableToColumnMap | ql::views::filter([](const auto& varAndIdx) { return !varAndIdx.first.name().starts_with( QLEVER_INTERNAL_VARIABLE_PREFIX); }); - table.setNumColumns(std::ranges::distance(varToColNoInternalVariables)); + table.setNumColumns(ql::ranges::distance(varToColNoInternalVariables)); table.resize(ctx->size()); auto checkCancellation = [ctx]() { ctx->cancellationHandle_->throwIfCancelled(); }; size_t targetColIdx = 0; for (const auto& [sourceColIdx, _] : - varToColNoInternalVariables | std::views::values) { + varToColNoInternalVariables | ql::views::values) { const auto& sourceColumn = ctx->_inputTable.getColumn(sourceColIdx); - std::ranges::copy(sourceColumn.begin() + ctx->_beginIndex, - sourceColumn.begin() + ctx->_endIndex, - table.getColumn(targetColIdx).begin()); + ql::ranges::copy(sourceColumn.begin() + ctx->_beginIndex, + sourceColumn.begin() + ctx->_endIndex, + table.getColumn(targetColIdx).begin()); ++targetColIdx; checkCancellation(); } @@ -60,7 +60,7 @@ ExpressionResult CountStarExpression::evaluate( table.numRows(), table.numColumns(), ctx->deadline_, "Sort for COUNT(DISTINCT *)"); ad_utility::callFixedSize(table.numColumns(), [&table]() { - Engine::sort(&table, std::ranges::lexicographical_compare); + Engine::sort(&table, ql::ranges::lexicographical_compare); }); return Id::makeFromInt( static_cast(Engine::countDistinct(table, checkCancellation))); diff --git a/src/engine/sparqlExpressions/LiteralExpression.h b/src/engine/sparqlExpressions/LiteralExpression.h index 50e0de5fd3..4d9be1db5b 100644 --- a/src/engine/sparqlExpressions/LiteralExpression.h +++ b/src/engine/sparqlExpressions/LiteralExpression.h @@ -171,11 +171,11 @@ class LiteralExpression : public SparqlExpression { if (context->_groupedVariables.contains(variable) && !isInsideAggregate()) { const auto& table = context->_inputTable; auto constantValue = table.at(context->_beginIndex, column.value()); - AD_EXPENSIVE_CHECK((std::ranges::all_of( - table.begin() + context->_beginIndex, - table.begin() + context->_endIndex, [&](const auto& row) { - return row[column.value()] == constantValue; - }))); + AD_EXPENSIVE_CHECK(( + std::all_of(table.begin() + context->_beginIndex, + table.begin() + context->_endIndex, [&](const auto& row) { + return row[column.value()] == constantValue; + }))); return constantValue; } else { return variable; diff --git a/src/engine/sparqlExpressions/NaryExpressionImpl.h b/src/engine/sparqlExpressions/NaryExpressionImpl.h index 3f852b5f09..1061e64d0d 100644 --- a/src/engine/sparqlExpressions/NaryExpressionImpl.h +++ b/src/engine/sparqlExpressions/NaryExpressionImpl.h @@ -82,7 +82,7 @@ class NaryExpression : public SparqlExpression { using ResultType = typename decltype(resultGenerator)::value_type; VectorWithMemoryLimit result{context->_allocator}; result.reserve(targetSize); - std::ranges::move(resultGenerator, std::back_inserter(result)); + ql::ranges::move(resultGenerator, std::back_inserter(result)); if constexpr (resultIsConstant) { AD_CORRECTNESS_CHECK(result.size() == 1); @@ -186,7 +186,7 @@ requires(isOperation) [[nodiscard]] string NaryExpression::getCacheKey( const VariableToColumnMap& varColMap) const { string key = typeid(*this).name(); key += ad_utility::lazyStrJoin( - children_ | std::views::transform([&varColMap](const auto& child) { + children_ | ql::views::transform([&varColMap](const auto& child) { return child->getCacheKey(varColMap); }), ""); diff --git a/src/engine/sparqlExpressions/NumericBinaryExpressions.cpp b/src/engine/sparqlExpressions/NumericBinaryExpressions.cpp index e8bf9a450f..173ed08271 100644 --- a/src/engine/sparqlExpressions/NumericBinaryExpressions.cpp +++ b/src/engine/sparqlExpressions/NumericBinaryExpressions.cpp @@ -223,8 +223,8 @@ std::vector mergeChildrenForBinaryOpExpressionImpl( } } if constexpr (binOp == AND) { - std::ranges::move(itLeft, leftChild.end(), std::back_inserter(resPairs)); - std::ranges::move(itRight, rightChild.end(), std::back_inserter(resPairs)); + ql::ranges::move(itLeft, leftChild.end(), std::back_inserter(resPairs)); + ql::ranges::move(itRight, rightChild.end(), std::back_inserter(resPairs)); } pd::checkPropertiesForPrefilterConstruction(resPairs); return resPairs; diff --git a/src/engine/sparqlExpressions/NumericUnaryExpressions.cpp b/src/engine/sparqlExpressions/NumericUnaryExpressions.cpp index a9d46d645e..c97e7ce57e 100644 --- a/src/engine/sparqlExpressions/NumericUnaryExpressions.cpp +++ b/src/engine/sparqlExpressions/NumericUnaryExpressions.cpp @@ -68,8 +68,8 @@ class UnaryNegateExpressionImpl : public NaryExpression { auto child = this->getNthChild(0).value()->getPrefilterExpressionForMetadata( !isNegated); - std::ranges::for_each( - child | std::views::keys, + ql::ranges::for_each( + child | ql::views::keys, [](std::unique_ptr& expression) { expression = std::make_unique(std::move(expression)); diff --git a/src/engine/sparqlExpressions/PrefilterExpressionIndex.cpp b/src/engine/sparqlExpressions/PrefilterExpressionIndex.cpp index 93af26df7e..616d432429 100644 --- a/src/engine/sparqlExpressions/PrefilterExpressionIndex.cpp +++ b/src/engine/sparqlExpressions/PrefilterExpressionIndex.cpp @@ -69,7 +69,7 @@ static void checkEvalRequirements(std::span input, throw std::runtime_error(errorMessage); }; // Check for duplicates. - if (auto it = std::ranges::adjacent_find(input); it != input.end()) { + if (auto it = ql::ranges::adjacent_find(input); it != input.end()) { throwRuntimeError("The provided data blocks must be unique."); } // Helper to check for fully sorted blocks. Return `true` if `b1 < b2` is @@ -91,7 +91,7 @@ static void checkEvalRequirements(std::span input, } return false; }; - if (!std::ranges::is_sorted(input, checkOrder)) { + if (!ql::ranges::is_sorted(input, checkOrder)) { throwRuntimeError("The blocks must be provided in sorted order."); } // Helper to check for column consistency. Returns `true` if the columns for @@ -103,7 +103,7 @@ static void checkEvalRequirements(std::span input, getMaskedTriple(b2.firstTriple_, evaluationColumn) || checkBlockIsInconsistent(b2, evaluationColumn); }; - if (auto it = std::ranges::adjacent_find(input, checkColumnConsistency); + if (auto it = ql::ranges::adjacent_find(input, checkColumnConsistency); it != input.end()) { throwRuntimeError( "The values in the columns up to the evaluation column must be " @@ -498,14 +498,14 @@ static std::unique_ptr makeMirroredExpression( //______________________________________________________________________________ void checkPropertiesForPrefilterConstruction( const std::vector& vec) { - auto viewVariable = vec | std::views::values; - if (!std::ranges::is_sorted(viewVariable, std::less<>{})) { + auto viewVariable = vec | ql::views::values; + if (!ql::ranges::is_sorted(viewVariable, std::less<>{})) { throw std::runtime_error( "The vector must contain the pairs in " "sorted order w.r.t. Variable value."); } - if (auto it = std::ranges::adjacent_find(viewVariable); - it != std::ranges::end(viewVariable)) { + if (auto it = ql::ranges::adjacent_find(viewVariable); + it != ql::ranges::end(viewVariable)) { throw std::runtime_error( "For each relevant Variable must exist exactly one " " pair."); diff --git a/src/engine/sparqlExpressions/RegexExpression.cpp b/src/engine/sparqlExpressions/RegexExpression.cpp index 695f12b947..02047297ad 100644 --- a/src/engine/sparqlExpressions/RegexExpression.cpp +++ b/src/engine/sparqlExpressions/RegexExpression.cpp @@ -247,7 +247,7 @@ ExpressionResult RegexExpression::evaluatePrefixRegex( result.reserve(resultSize); for (auto id : detail::makeGenerator(variable, resultSize, context)) { result.push_back(Id::makeFromBool( - std::ranges::any_of(lowerAndUpperIds, [&](const auto& lowerUpper) { + ql::ranges::any_of(lowerAndUpperIds, [&](const auto& lowerUpper) { return !valueIdComparators::compareByBits(id, lowerUpper.first) && valueIdComparators::compareByBits(id, lowerUpper.second); }))); @@ -270,7 +270,7 @@ ExpressionResult RegexExpression::evaluateGeneralCase( // `std::nullopt` for a row, the result is `UNDEF`. Otherwise, we have a // string and evaluate the regex on it. auto computeResult = [&](const ValueGetter& getter) { - std::ranges::for_each( + ql::ranges::for_each( detail::makeGenerator(AD_FWD(input), resultSize, context), [&getter, &context, &result, this](const auto& id) { auto str = getter(id, context); diff --git a/src/engine/sparqlExpressions/RelationalExpressions.cpp b/src/engine/sparqlExpressions/RelationalExpressions.cpp index 5bc36b528e..603e334c69 100644 --- a/src/engine/sparqlExpressions/RelationalExpressions.cpp +++ b/src/engine/sparqlExpressions/RelationalExpressions.cpp @@ -342,15 +342,15 @@ SparqlExpression::Estimates getEstimatesForFilterExpressionImpl( // filtering on the `LocalVocab`. // Check iff all the pairs `(children[0], someOtherChild)` can be evaluated // using binary search. - if (std::ranges::all_of(children | std::views::drop(1), - [&lhs = children.at(0), - &canBeEvaluatedWithBinarySearch](const auto& child) { - // The implementation automatically chooses the - // cheaper direction, so we can do the same when - // estimating the cost. - return canBeEvaluatedWithBinarySearch(lhs, child) || - canBeEvaluatedWithBinarySearch(child, lhs); - })) { + if (ql::ranges::all_of(children | ql::views::drop(1), + [&lhs = children.at(0), + &canBeEvaluatedWithBinarySearch](const auto& child) { + // The implementation automatically chooses the + // cheaper direction, so we can do the same when + // estimating the cost. + return canBeEvaluatedWithBinarySearch(lhs, child) || + canBeEvaluatedWithBinarySearch(child, lhs); + })) { // When evaluating via binary search, the only significant cost that occurs // is that of writing the output. costEstimate = sizeEstimate; @@ -385,7 +385,7 @@ ExpressionResult InExpression::evaluate( auto lhs = children_.at(0)->evaluate(context); ExpressionResult result{ad_utility::SetOfIntervals{}}; bool firstChild = true; - for (const auto& child : children_ | std::views::drop(1)) { + for (const auto& child : children_ | ql::views::drop(1)) { auto rhs = child->evaluate(context); auto evaluateEqualsExpression = [context](const auto& a, auto b) -> ExpressionResult { diff --git a/src/engine/sparqlExpressions/RelationalExpressions.h b/src/engine/sparqlExpressions/RelationalExpressions.h index d406afc936..45fc44f471 100644 --- a/src/engine/sparqlExpressions/RelationalExpressions.h +++ b/src/engine/sparqlExpressions/RelationalExpressions.h @@ -69,7 +69,7 @@ class InExpression : public SparqlExpression { explicit InExpression(SparqlExpression::Ptr lhs, Children children) { children_.reserve(children.size() + 1); children_.push_back(std::move(lhs)); - std::ranges::move(children, std::back_inserter(children_)); + ql::ranges::move(children, std::back_inserter(children_)); } ExpressionResult evaluate(EvaluationContext* context) const override; diff --git a/src/engine/sparqlExpressions/SetOfIntervals.cpp b/src/engine/sparqlExpressions/SetOfIntervals.cpp index 9db82c4fd9..6022a2f3a2 100644 --- a/src/engine/sparqlExpressions/SetOfIntervals.cpp +++ b/src/engine/sparqlExpressions/SetOfIntervals.cpp @@ -4,7 +4,7 @@ #include "SetOfIntervals.h" -#include +#include "backports/algorithm.h" namespace ad_utility { // ___________________________________________________________________________ diff --git a/src/engine/sparqlExpressions/SetOfIntervals.h b/src/engine/sparqlExpressions/SetOfIntervals.h index a899facbe4..4ca8bea107 100644 --- a/src/engine/sparqlExpressions/SetOfIntervals.h +++ b/src/engine/sparqlExpressions/SetOfIntervals.h @@ -8,6 +8,7 @@ #include #include +#include "backports/algorithm.h" #include "util/Exception.h" namespace ad_utility { @@ -77,7 +78,7 @@ struct SetOfIntervals { inline static std::vector toBitVector(const SetOfIntervals& a, size_t targetSize) { std::vector result(targetSize, false); - toBitVector(a, targetSize, std::ranges::begin(result)); + toBitVector(a, targetSize, ql::ranges::begin(result)); return result; } }; diff --git a/src/engine/sparqlExpressions/SparqlExpression.cpp b/src/engine/sparqlExpressions/SparqlExpression.cpp index 7e8fa1648c..b5ec3aa0f7 100644 --- a/src/engine/sparqlExpressions/SparqlExpression.cpp +++ b/src/engine/sparqlExpressions/SparqlExpression.cpp @@ -49,7 +49,7 @@ bool SparqlExpression::containsAggregate() const { return true; } - return std::ranges::any_of( + return ql::ranges::any_of( children(), [](const Ptr& child) { return child->containsAggregate(); }); } @@ -84,10 +84,9 @@ std::optional<::Variable> SparqlExpression::getVariableOrNullopt() const { // _____________________________________________________________________________ bool SparqlExpression::containsLangExpression() const { - return std::ranges::any_of(children(), - [](const SparqlExpression::Ptr& child) { - return child->containsLangExpression(); - }); + return ql::ranges::any_of(children(), [](const SparqlExpression::Ptr& child) { + return child->containsLangExpression(); + }); } // _____________________________________________________________________________ diff --git a/src/engine/sparqlExpressions/SparqlExpressionGenerators.h b/src/engine/sparqlExpressions/SparqlExpressionGenerators.h index 7f5378d4ea..33b46914d0 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionGenerators.h +++ b/src/engine/sparqlExpressions/SparqlExpressionGenerators.h @@ -54,7 +54,7 @@ template requires(std::ranges::input_range) auto resultGenerator(T&& vector, size_t numItems, Transformation transformation = {}) { AD_CONTRACT_CHECK(numItems == vector.size()); - return ad_utility::allView(AD_FWD(vector)) | std::views::transform(std::move(transformation)); + return ad_utility::allView(AD_FWD(vector)) | ql::views::transform(std::move(transformation)); } template diff --git a/src/engine/sparqlExpressions/SparqlExpressionPimpl.cpp b/src/engine/sparqlExpressions/SparqlExpressionPimpl.cpp index 99d3f19e3f..0f6cf3ebb9 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionPimpl.cpp +++ b/src/engine/sparqlExpressions/SparqlExpressionPimpl.cpp @@ -73,7 +73,7 @@ void SparqlExpressionPimpl::setDescriptor(std::string descriptor) { // _____________________________________________________________________________ bool SparqlExpressionPimpl::isVariableContained( const Variable& variable) const { - return std::ranges::any_of( + return ql::ranges::any_of( containedVariables(), [&variable](const auto* varPtr) { return *varPtr == variable; }); } diff --git a/src/engine/sparqlExpressions/SparqlExpressionPimpl.h b/src/engine/sparqlExpressions/SparqlExpressionPimpl.h index 0b4b7b8f48..cd01eda9e5 100644 --- a/src/engine/sparqlExpressions/SparqlExpressionPimpl.h +++ b/src/engine/sparqlExpressions/SparqlExpressionPimpl.h @@ -44,7 +44,7 @@ class SparqlExpressionPimpl { // COUNT(?x) + ?m returns true if and only if ?m is in `groupedVariables`. [[nodiscard]] bool isAggregate( const ad_utility::HashSet& groupedVariables) const { - // TODO This can be std::ranges::all_of as soon as libc++ supports + // TODO This can be ql::ranges::all_of as soon as libc++ supports // it, or the combination of clang + libstdc++ + coroutines works. auto unaggregatedVariables = getUnaggregatedVariables(); for (const auto& var : unaggregatedVariables) { diff --git a/src/engine/sparqlExpressions/StringExpressions.cpp b/src/engine/sparqlExpressions/StringExpressions.cpp index f7e7d9bca7..3378ae7fdb 100644 --- a/src/engine/sparqlExpressions/StringExpressions.cpp +++ b/src/engine/sparqlExpressions/StringExpressions.cpp @@ -128,7 +128,7 @@ using IriOrUriExpression = NARY<1, FV>; [[maybe_unused]] auto strlen = [](std::string_view s) { // Count UTF-8 characters by skipping continuation bytes (those starting with // "10"). - auto utf8Len = std::ranges::count_if( + auto utf8Len = ql::ranges::count_if( s, [](char c) { return (static_cast(c) & 0xC0) != 0x80; }); return Id::makeFromInt(utf8Len); }; @@ -393,7 +393,7 @@ class ConcatExpression : public detail::VariadicExpression { // One of the previous children was not a constant, so we already // store a vector. auto& resultAsVector = std::get(result); - std::ranges::for_each(resultAsVector, [&](std::string& target) { + ql::ranges::for_each(resultAsVector, [&](std::string& target) { target.append(strFromConstant); }); } @@ -417,7 +417,7 @@ class ConcatExpression : public detail::VariadicExpression { // The `result` already is a vector, and the current child also returns // multiple results, so we do the `natural` way. auto& resultAsVec = std::get(result); - // TODO Use `std::views::zip` or `enumerate`. + // TODO Use `ql::views::zip` or `enumerate`. size_t i = 0; for (auto& el : gen) { if (auto str = StringValueGetter{}(std::move(el), ctx); @@ -430,7 +430,7 @@ class ConcatExpression : public detail::VariadicExpression { } ctx->cancellationHandle_->throwIfCancelled(); }; - std::ranges::for_each( + ql::ranges::for_each( childrenVec(), [&ctx, &visitSingleExpressionResult](const auto& child) { std::visit(visitSingleExpressionResult, child->evaluate(ctx)); }); @@ -443,8 +443,8 @@ class ConcatExpression : public detail::VariadicExpression { auto& stringVec = std::get(result); VectorWithMemoryLimit resultAsVec(ctx->_allocator); resultAsVec.reserve(stringVec.size()); - std::ranges::copy(stringVec | std::views::transform(toLiteral), - std::back_inserter(resultAsVec)); + ql::ranges::copy(stringVec | ql::views::transform(toLiteral), + std::back_inserter(resultAsVec)); return resultAsVec; } } diff --git a/src/engine/sparqlExpressions/VariadicExpression.h b/src/engine/sparqlExpressions/VariadicExpression.h index 8a7b81f479..3352c20d1f 100644 --- a/src/engine/sparqlExpressions/VariadicExpression.h +++ b/src/engine/sparqlExpressions/VariadicExpression.h @@ -34,7 +34,7 @@ class VariadicExpression : public SparqlExpression { std::string getCacheKey(const VariableToColumnMap& varColMap) const override { string key = typeid(*this).name(); auto childKeys = ad_utility::lazyStrJoin( - children_ | std::views::transform([&varColMap](const auto& childPtr) { + children_ | ql::views::transform([&varColMap](const auto& childPtr) { return childPtr->getCacheKey(varColMap); }), ", "); diff --git a/src/global/IdTriple.h b/src/global/IdTriple.h index b349743f9a..142dacab5b 100644 --- a/src/global/IdTriple.h +++ b/src/global/IdTriple.h @@ -30,8 +30,8 @@ struct IdTriple { friend std::ostream& operator<<(std::ostream& os, const IdTriple& triple) { os << "IdTriple("; - std::ranges::copy(triple.ids_, std::ostream_iterator(os, ", ")); - std::ranges::copy(triple.payload_, std::ostream_iterator(os, ", ")); + ql::ranges::copy(triple.ids_, std::ostream_iterator(os, ", ")); + ql::ranges::copy(triple.payload_, std::ostream_iterator(os, ", ")); os << ")"; return os; } diff --git a/src/global/SpecialIds.h b/src/global/SpecialIds.h index b844e11724..10ad76b144 100644 --- a/src/global/SpecialIds.h +++ b/src/global/SpecialIds.h @@ -32,13 +32,12 @@ inline const ad_utility::HashMap& specialIds() { // Perform the following checks: All the special IDs are unique, all of them // have the `Undefined` datatype, but none of them is equal to the "actual" // UNDEF value. - auto values = std::views::values(result); + auto values = ql::views::values(result); auto undefTypeButNotUndefValue = [](Id id) { return id != Id::makeUndefined() && id.getDatatype() == Datatype::Undefined; }; - AD_CORRECTNESS_CHECK( - std::ranges::all_of(values, undefTypeButNotUndefValue)); + AD_CORRECTNESS_CHECK(ql::ranges::all_of(values, undefTypeButNotUndefValue)); ad_utility::HashSet uniqueIds(values.begin(), values.end()); AD_CORRECTNESS_CHECK(uniqueIds.size() == result.size()); return result; diff --git a/src/global/ValueId.h b/src/global/ValueId.h index fe429b98fa..91930ff1f3 100644 --- a/src/global/ValueId.h +++ b/src/global/ValueId.h @@ -101,8 +101,8 @@ class ValueId { // Assert that the types in `stringTypes_` are directly adjacent. This is // required to make the comparison of IDs in `ValueIdComparators.h` work. - static constexpr Datatype maxStringType_ = std::ranges::max(stringTypes_); - static constexpr Datatype minStringType_ = std::ranges::min(stringTypes_); + static constexpr Datatype maxStringType_ = ql::ranges::max(stringTypes_); + static constexpr Datatype minStringType_ = ql::ranges::min(stringTypes_); static_assert(static_cast(maxStringType_) - static_cast(minStringType_) + 1 == stringTypes_.size()); diff --git a/src/global/ValueIdComparators.h b/src/global/ValueIdComparators.h index 0b23621c77..bfbf7bb551 100644 --- a/src/global/ValueIdComparators.h +++ b/src/global/ValueIdComparators.h @@ -8,6 +8,7 @@ #include #include "global/ValueId.h" +#include "util/Algorithm.h" #include "util/ComparisonWithNan.h" #include "util/OverloadCallOperator.h" diff --git a/src/index/CompressedRelation.cpp b/src/index/CompressedRelation.cpp index 05ce8a8a82..fc306e892f 100644 --- a/src/index/CompressedRelation.cpp +++ b/src/index/CompressedRelation.cpp @@ -26,7 +26,7 @@ using namespace std::chrono_literals; // A small helper function to obtain the begin and end iterator of a range static auto getBeginAndEnd(auto& range) { - return std::pair{std::ranges::begin(range), std::ranges::end(range)}; + return std::pair{ql::ranges::begin(range), ql::ranges::end(range)}; } // modify the `block` according to the `limitOffset`. Also modify the @@ -156,9 +156,9 @@ bool CompressedRelationReader::FilterDuplicatesAndGraphs:: if (!metadata.graphInfo_.has_value()) { return true; } - return !std::ranges::all_of( - metadata.graphInfo_.value(), - [&wantedGraphs = desiredGraphs_.value()](Id containedGraph) { + const auto& graphInfo = metadata.graphInfo_.value(); + return !ql::ranges::all_of( + graphInfo, [&wantedGraphs = desiredGraphs_.value()](Id containedGraph) { return wantedGraphs.contains(containedGraph); }); } @@ -184,7 +184,7 @@ bool CompressedRelationReader::FilterDuplicatesAndGraphs:: } else { AD_EXPENSIVE_CHECK( !desiredGraphs_.has_value() || - std::ranges::all_of(block, isDesiredGraphId(), graphIdFromRow)); + ql::ranges::all_of(block, isDesiredGraphId(), graphIdFromRow)); } return needsFilteringByGraph; } @@ -224,7 +224,7 @@ bool CompressedRelationReader::FilterDuplicatesAndGraphs::canBlockBeSkipped( return false; } const auto& containedGraphs = block.graphInfo_.value(); - return std::ranges::none_of( + return ql::ranges::none_of( desiredGraphs_.value(), [&containedGraphs](const auto& desiredGraph) { return ad_utility::contains(containedGraphs, desiredGraph); }); @@ -246,7 +246,7 @@ CompressedRelationReader::IdTableGenerator CompressedRelationReader::lazyScan( // Compute the sequence of relevant blocks. If the sequence is empty, there // is nothing to yield. auto relevantBlocks = getRelevantBlocks(scanSpec, blockMetadata); - if (std::ranges::empty(relevantBlocks)) { + if (ql::ranges::empty(relevantBlocks)) { co_return; } @@ -417,10 +417,10 @@ std::vector CompressedRelationReader::getBlocksForJoin( // `!lessThan(a,b) && !lessThan(b, a)` is not transitive. std::vector result; auto blockIsNeeded = [&joinColumn, &lessThan](const auto& block) { - return !std::ranges::equal_range(joinColumn, block, lessThan).empty(); + return !ql::ranges::equal_range(joinColumn, block, lessThan).empty(); }; - std::ranges::copy(relevantBlocks | std::views::filter(blockIsNeeded), - std::back_inserter(result)); + ql::ranges::copy(relevantBlocks | ql::views::filter(blockIsNeeded), + std::back_inserter(result)); // The following check is cheap as there are only few blocks. AD_CORRECTNESS_CHECK(std::ranges::unique(result).empty()); return result; @@ -446,21 +446,22 @@ CompressedRelationReader::getBlocksForJoin( // Transform all the relevant blocks from a `ScanSpecAndBlocksAndBounds` a // `BlockWithFirstAndLastId` struct (see above). - auto getBlocksWithFirstAndLastId = [&blockLessThanBlock]( - const ScanSpecAndBlocksAndBounds& - metadataAndBlocks) { - auto getSingleBlock = - [&metadataAndBlocks]( - const CompressedBlockMetadata& block) -> BlockWithFirstAndLastId { - return {block, + auto getBlocksWithFirstAndLastId = + [&blockLessThanBlock]( + const ScanSpecAndBlocksAndBounds& metadataAndBlocks) { + auto getSingleBlock = + [&metadataAndBlocks](const CompressedBlockMetadata& block) + -> BlockWithFirstAndLastId { + return { + block, getRelevantIdFromTriple(block.firstTriple_, metadataAndBlocks), getRelevantIdFromTriple(block.lastTriple_, metadataAndBlocks)}; - }; - auto result = std::views::transform( - getBlocksFromMetadata(metadataAndBlocks), getSingleBlock); - AD_CORRECTNESS_CHECK(std::ranges::is_sorted(result, blockLessThanBlock)); - return result; - }; + }; + auto result = ql::views::transform( + getBlocksFromMetadata(metadataAndBlocks), getSingleBlock); + AD_CORRECTNESS_CHECK(ql::ranges::is_sorted(result, blockLessThanBlock)); + return result; + }; auto blocksWithFirstAndLastId1 = getBlocksWithFirstAndLastId(metadataAndBlocks1); @@ -475,7 +476,7 @@ CompressedRelationReader::getBlocksForJoin( const auto& otherBlocks) { std::vector result; for (const auto& block : blocks) { - if (!std::ranges::equal_range(otherBlocks, block, blockLessThanBlock) + if (!ql::ranges::equal_range(otherBlocks, block, blockLessThanBlock) .empty()) { result.push_back(block.block_); } @@ -504,8 +505,8 @@ IdTable CompressedRelationReader::scan( // Compute an upper bound for the size and reserve enough space in the // result. auto relevantBlocks = getRelevantBlocks(scanSpec, blocks); - auto sizes = relevantBlocks | - std::views::transform(&CompressedBlockMetadata::numRows_); + auto sizes = + relevantBlocks | ql::views::transform(&CompressedBlockMetadata::numRows_); auto upperBoundSize = std::accumulate(sizes.begin(), sizes.end(), size_t{0}); if (limitOffset._limit.has_value()) { upperBoundSize = std::min(upperBoundSize, @@ -534,9 +535,9 @@ DecompressedBlock CompressedRelationReader::readPossiblyIncompleteBlock( // We first scan the complete block including ALL columns. std::vector allAdditionalColumns; - std::ranges::copy( - std::views::iota(ADDITIONAL_COLUMN_GRAPH_ID, - blockMetadata.offsetsAndCompressedSize_.size()), + ql::ranges::copy( + ql::views::iota(ADDITIONAL_COLUMN_GRAPH_ID, + blockMetadata.offsetsAndCompressedSize_.size()), std::back_inserter(allAdditionalColumns)); ScanSpecification specForAllColumns{std::nullopt, std::nullopt, @@ -578,7 +579,7 @@ DecompressedBlock CompressedRelationReader::readPossiblyIncompleteBlock( return; } const auto& column = block.getColumn(columnIdx); - auto matchingRange = std::ranges::equal_range( + auto matchingRange = ql::ranges::equal_range( column.begin() + beginIdx, column.begin() + endIdx, relevantId.value()); beginIdx = matchingRange.begin() - column.begin(); endIdx = matchingRange.end() - column.begin(); @@ -599,12 +600,12 @@ DecompressedBlock CompressedRelationReader::readPossiblyIncompleteBlock( size_t i = 0; const auto& columnIndices = scanConfig.scanColumns_; for (const auto& inputColIdx : - columnIndices | std::views::filter([&](const auto& idx) { + columnIndices | ql::views::filter([&](const auto& idx) { return !manuallyDeleteGraphColumn || idx != ADDITIONAL_COLUMN_GRAPH_ID; })) { const auto& inputCol = block.getColumn(inputColIdx); - std::ranges::copy(inputCol.begin() + beginIdx, inputCol.begin() + endIdx, - result.getColumn(i).begin()); + ql::ranges::copy(inputCol.begin() + beginIdx, inputCol.begin() + endIdx, + result.getColumn(i).begin()); ++i; } @@ -656,8 +657,8 @@ std::pair CompressedRelationReader::getResultSizeImpl( // First accumulate the complete blocks in the "middle" std::size_t inserted = 0; std::size_t deleted = 0; - std::ranges::for_each( - std::ranges::subrange{beginBlock, endBlock}, [&](const auto& block) { + ql::ranges::for_each( + ql::ranges::subrange{beginBlock, endBlock}, [&](const auto& block) { const auto [ins, del] = locatedTriplesPerBlock.numTriples(block.blockIndex_); if (!exactSize || (ins == 0 && del == 0)) { @@ -766,7 +767,7 @@ IdTable CompressedRelationReader::getDistinctColIdsAndCountsImpl( continue; } const auto& block = optionalBlock.value(); - // TODO: use `std::views::chunk_by`. + // TODO: use `ql::views::chunkd_by`. for (size_t j = 0; j < block.numRows(); ++j) { Id colId = block(j, 0); processColId(colId, 1); @@ -841,7 +842,7 @@ CompressedBlock CompressedRelationReader::readCompressedBlockFromFile( ColumnIndicesRef columnIndices) const { CompressedBlock compressedBuffer; compressedBuffer.resize(columnIndices.size()); - // TODO Use `std::views::zip` + // TODO Use `ql::views::zip` for (size_t i = 0; i < compressedBuffer.size(); ++i) { const auto& offset = blockMetaData.offsetsAndCompressedSize_.at(columnIndices[i]); @@ -944,9 +945,9 @@ static std::pair>> getGraphInfo( // Return the contained graphs, or `nullopt` if there are too many of them. auto graphInfo = [&block]() -> std::optional> { std::vector graphColumn; - std::ranges::copy(block->getColumn(ADDITIONAL_COLUMN_GRAPH_ID), - std::back_inserter(graphColumn)); - std::ranges::sort(graphColumn); + ql::ranges::copy(block->getColumn(ADDITIONAL_COLUMN_GRAPH_ID), + std::back_inserter(graphColumn)); + ql::ranges::sort(graphColumn); auto [endOfUnique, _] = std::ranges::unique(graphColumn); size_t numGraphs = endOfUnique - graphColumn.begin(); if (numGraphs > MAX_NUM_GRAPHS_STORED_IN_BLOCK_METADATA) { @@ -1030,7 +1031,7 @@ CompressedRelationReader::getRelevantBlocks( return blockA.lastTriple_ < blockB.firstTriple_; }; - return std::ranges::equal_range(blockMetadata, key, comp); + return ql::ranges::equal_range(blockMetadata, key, comp); } // _____________________________________________________________________________ @@ -1087,8 +1088,8 @@ std::vector CompressedRelationReader::prepareColumnIndices( ColumnIndicesRef additionalColumns) { std::vector result; result.reserve(baseColumns.size() + additionalColumns.size()); - std::ranges::copy(baseColumns, std::back_inserter(result)); - std::ranges::copy(additionalColumns, std::back_inserter(result)); + ql::ranges::copy(baseColumns, std::back_inserter(result)); + ql::ranges::copy(additionalColumns, std::back_inserter(result)); return result; } @@ -1109,7 +1110,7 @@ std::vector CompressedRelationReader::prepareColumnIndices( // ___________________________________________________________________________ std::pair CompressedRelationReader::prepareLocatedTriples( ColumnIndicesRef columns) { - AD_CORRECTNESS_CHECK(std::ranges::is_sorted(columns)); + AD_CORRECTNESS_CHECK(ql::ranges::is_sorted(columns)); // Compute number of columns that should be read (except the graph column // and any payload columns). size_t numScanColumns = [&]() -> size_t { @@ -1120,7 +1121,7 @@ std::pair CompressedRelationReader::prepareLocatedTriples( } }(); // Check if one of the columns is the graph column. - auto it = std::ranges::find(columns, ADDITIONAL_COLUMN_GRAPH_ID); + auto it = ql::ranges::find(columns, ADDITIONAL_COLUMN_GRAPH_ID); bool containsGraphId = it != columns.end(); if (containsGraphId) { AD_CORRECTNESS_CHECK(it - columns.begin() == @@ -1153,7 +1154,7 @@ CompressedRelationMetadata CompressedRelationWriter::addSmallRelation( smallRelationsBuffer_.resize(offsetInBlock + numRows); for (size_t i = 0; i < relation.numColumns(); ++i) { - std::ranges::copy( + ql::ranges::copy( relation.getColumn(i), smallRelationsBuffer_.getColumn(i).begin() + offsetInBlock); } @@ -1277,7 +1278,7 @@ CompressedRelationMetadata CompressedRelationWriter::addCompleteLargeRelation( Id col0Id, auto&& sortedBlocks) { DistinctIdCounter distinctCol1Counter; for (auto& block : sortedBlocks) { - std::ranges::for_each(block.getColumn(1), std::ref(distinctCol1Counter)); + ql::ranges::for_each(block.getColumn(1), std::ref(distinctCol1Counter)); addBlockForLargeRelation( col0Id, std::make_shared(std::move(block).toDynamic())); } @@ -1373,7 +1374,7 @@ auto CompressedRelationWriter::createPermutationPair( return std::tie(a[0], a[1], a[2], a[3]) < std::tie(b[0], b[1], b[2], b[3]); }; - std::ranges::sort(relation, compare); + ql::ranges::sort(relation, compare); AD_CORRECTNESS_CHECK(!relation.empty()); writer2.compressAndWriteBlock(relation.at(0, 0), relation.at(relation.size() - 1, 0), @@ -1558,7 +1559,7 @@ auto CompressedRelationReader::getScanConfig( return {0, false}; } auto idx = static_cast( - std::ranges::find(columnIndices, ADDITIONAL_COLUMN_GRAPH_ID) - + ql::ranges::find(columnIndices, ADDITIONAL_COLUMN_GRAPH_ID) - columnIndices.begin()); bool deleteColumn = false; if (idx == columnIndices.size()) { diff --git a/src/index/CompressedRelation.h b/src/index/CompressedRelation.h index 36cbf14af0..96fefef2be 100644 --- a/src/index/CompressedRelation.h +++ b/src/index/CompressedRelation.h @@ -4,10 +4,10 @@ #pragma once -#include #include #include +#include "backports/algorithm.h" #include "engine/idTable/IdTable.h" #include "global/Id.h" #include "index/ScanSpecification.h" @@ -299,7 +299,7 @@ class CompressedRelationWriter { std::vector getFinishedBlocks() && { finish(); auto blocks = std::move(*(blockBuffer_.wlock())); - std::ranges::sort( + ql::ranges::sort( blocks, {}, [](const CompressedBlockMetadataNoBlockIndex& bl) { return std::tie(bl.firstTriple_.col0Id_, bl.firstTriple_.col1Id_, bl.firstTriple_.col2Id_); diff --git a/src/index/DeltaTriples.cpp b/src/index/DeltaTriples.cpp index e6b3bcd555..8b69c8d363 100644 --- a/src/index/DeltaTriples.cpp +++ b/src/index/DeltaTriples.cpp @@ -21,7 +21,7 @@ LocatedTriples::iterator& DeltaTriples::LocatedTripleHandles::forPermutation( void DeltaTriples::clear() { triplesInserted_.clear(); triplesDeleted_.clear(); - std::ranges::for_each(locatedTriples(), &LocatedTriplesPerBlock::clear); + ql::ranges::for_each(locatedTriples(), &LocatedTriplesPerBlock::clear); } // ____________________________________________________________________________ @@ -133,9 +133,9 @@ void DeltaTriples::rewriteLocalVocabEntriesAndBlankNodes(Triples& triples) { }; // Convert all local vocab and blank node `Id`s in all `triples`. - std::ranges::for_each(triples, [&convertId](IdTriple<0>& triple) { - std::ranges::for_each(triple.ids_, convertId); - std::ranges::for_each(triple.payload_, convertId); + ql::ranges::for_each(triples, [&convertId](IdTriple<0>& triple) { + ql::ranges::for_each(triple.ids_, convertId); + ql::ranges::for_each(triple.payload_, convertId); }); } @@ -145,26 +145,25 @@ void DeltaTriples::modifyTriplesImpl(CancellationHandle cancellationHandle, TriplesToHandlesMap& targetMap, TriplesToHandlesMap& inverseMap) { rewriteLocalVocabEntriesAndBlankNodes(triples); - std::ranges::sort(triples); + ql::ranges::sort(triples); auto [first, last] = std::ranges::unique(triples); triples.erase(first, last); std::erase_if(triples, [&targetMap](const IdTriple<0>& triple) { return targetMap.contains(triple); }); - std::ranges::for_each(triples, - [this, &inverseMap](const IdTriple<0>& triple) { - auto handle = inverseMap.find(triple); - if (handle != inverseMap.end()) { - eraseTripleInAllPermutations(handle->second); - inverseMap.erase(triple); - } - }); + ql::ranges::for_each(triples, [this, &inverseMap](const IdTriple<0>& triple) { + auto handle = inverseMap.find(triple); + if (handle != inverseMap.end()) { + eraseTripleInAllPermutations(handle->second); + inverseMap.erase(triple); + } + }); std::vector handles = locateAndAddTriples(std::move(cancellationHandle), triples, shouldExist); AD_CORRECTNESS_CHECK(triples.size() == handles.size()); - // TODO: replace with std::views::zip in C++23 + // TODO: replace with ql::views::zip in C++23 for (size_t i = 0; i < triples.size(); i++) { targetMap.insert({triples[i], handles[i]}); } diff --git a/src/index/DocsDB.cpp b/src/index/DocsDB.cpp index 9a2b182735..cf353b264b 100644 --- a/src/index/DocsDB.cpp +++ b/src/index/DocsDB.cpp @@ -4,9 +4,8 @@ #include "DocsDB.h" -#include - #include "../global/Constants.h" +#include "backports/algorithm.h" // _____________________________________________________________________________ void DocsDB::init(const string& fileName) { diff --git a/src/index/IndexBuilderTypes.h b/src/index/IndexBuilderTypes.h index 173e323095..85f3f5d195 100644 --- a/src/index/IndexBuilderTypes.h +++ b/src/index/IndexBuilderTypes.h @@ -102,7 +102,7 @@ class MonotonicBuffer { // the buffer. std::string_view addString(std::string_view input) { auto ptr = charAllocator_->allocate(input.size()); - std::ranges::copy(input, ptr); + ql::ranges::copy(input, ptr); return {ptr, ptr + input.size()}; } }; diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp index 58fe7e577c..bd46c81e53 100644 --- a/src/index/IndexImpl.Text.cpp +++ b/src/index/IndexImpl.Text.cpp @@ -8,13 +8,13 @@ #include -#include #include #include #include #include #include +#include "backports/algorithm.h" #include "engine/CallFixedSize.h" #include "index/FTSAlgorithms.h" #include "parser/ContextFileParser.h" @@ -724,9 +724,9 @@ IdTable IndexImpl::readWordCl( static_cast(tbmd._cl._startWordlist - tbmd._cl._startContextlist), &TextRecordIndex::make); idTable.resize(cids.size()); - std::ranges::transform(cids, idTable.getColumn(0).begin(), - &Id::makeFromTextRecordIndex); - std::ranges::transform( + ql::ranges::transform(cids, idTable.getColumn(0).begin(), + &Id::makeFromTextRecordIndex); + ql::ranges::transform( readFreqComprList( tbmd._cl._nofElements, tbmd._cl._startWordlist, static_cast(tbmd._cl._startScorelist - @@ -748,16 +748,16 @@ IdTable IndexImpl::readWordEntityCl( tbmd._entityCl._startContextlist), &TextRecordIndex::make); idTable.resize(cids.size()); - std::ranges::transform(cids, idTable.getColumn(0).begin(), - &Id::makeFromTextRecordIndex); - std::ranges::copy( + ql::ranges::transform(cids, idTable.getColumn(0).begin(), + &Id::makeFromTextRecordIndex); + ql::ranges::copy( readFreqComprList(tbmd._entityCl._nofElements, tbmd._entityCl._startWordlist, static_cast(tbmd._entityCl._startScorelist - tbmd._entityCl._startWordlist), &Id::fromBits), idTable.getColumn(1).begin()); - std::ranges::transform( + ql::ranges::transform( readFreqComprList( tbmd._entityCl._nofElements, tbmd._entityCl._startScorelist, static_cast(tbmd._entityCl._lastByte + 1 - @@ -961,7 +961,7 @@ size_t IndexImpl::getSizeEstimate(const string& words) const { } return 1 + optTbmd.value().tbmd_._entityCl._nofElements / 100; }; - return std::ranges::min(terms | std::views::transform(termToEstimate)); + return ql::ranges::min(terms | ql::views::transform(termToEstimate)); } // _____________________________________________________________________________ diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index c878e4365c..ed8a6dd526 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -6,7 +6,6 @@ #include "./IndexImpl.h" -#include #include #include #include @@ -16,6 +15,7 @@ #include "CompilationInfo.h" #include "Index.h" #include "absl/strings/str_join.h" +#include "backports/algorithm.h" #include "engine/AddCombinedRowToTable.h" #include "engine/CallFixedSize.h" #include "index/IndexFormatVersion.h" @@ -151,7 +151,7 @@ auto fixBlockAfterPatternJoin(auto block) { static constexpr auto permutation = makePermutationFirstThirdSwitched(); block.value().setColumnSubset(permutation); - std::ranges::for_each( + ql::ranges::for_each( block.value().getColumn(ADDITIONAL_COLUMN_INDEX_OBJECT_PATTERN), [](Id& id) { id = id.isUndefined() ? Id::makeFromInt(NO_PATTERN) : id; }); return std::move(block.value()).template toStatic<0>(); @@ -612,6 +612,8 @@ auto IndexImpl::convertPartialToGlobalIds( auto& result = *resultPtr; auto& internalResult = *internalTriplesPtr; auto triplesGenerator = data.getRows(); + // static_assert(!std::is_const_v); + // static_assert(std::is_const_v); auto it = triplesGenerator.begin(); using Buffer = IdTableStatic; struct Buffers { @@ -782,7 +784,7 @@ IndexImpl::createPermutationPairImpl(size_t numColumns, const string& fileName1, // blocks. auto liftCallback = [](auto callback) { return [callback](const auto& block) mutable { - std::ranges::for_each(block, callback); + ql::ranges::for_each(block, callback); }; }; auto callback1 = @@ -1323,7 +1325,7 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { turtleParserIntegerOverflowBehavior_ = TurtleParserIntegerOverflowBehavior::OverflowingToDouble; } else { - AD_CONTRACT_CHECK(std::ranges::find(allModes, value) == allModes.end()); + AD_CONTRACT_CHECK(ql::ranges::find(allModes, value) == allModes.end()); AD_LOG_ERROR << "Invalid value for " << key << std::endl; AD_LOG_INFO << "The currently supported values are " << absl::StrJoin(allModes, ",") << std::endl; diff --git a/src/index/IndexMetaData.h b/src/index/IndexMetaData.h index a3b4cdccd6..7b865fafa7 100644 --- a/src/index/IndexMetaData.h +++ b/src/index/IndexMetaData.h @@ -6,7 +6,6 @@ #include -#include #include #include #include @@ -14,6 +13,7 @@ #include #include +#include "backports/algorithm.h" #include "global/Id.h" #include "index/CompressedRelation.h" #include "index/MetaDataHandler.h" diff --git a/src/index/LocatedTriples.cpp b/src/index/LocatedTriples.cpp index 05353324a5..c8d977f473 100644 --- a/src/index/LocatedTriples.cpp +++ b/src/index/LocatedTriples.cpp @@ -6,9 +6,8 @@ #include "index/LocatedTriples.h" -#include - #include "absl/strings/str_join.h" +#include "backports/algorithm.h" #include "index/CompressedRelation.h" #include "index/ConstantsIndexBuilding.h" #include "util/ChunkedForLoop.h" @@ -29,9 +28,9 @@ std::vector LocatedTriple::locateTriplesInPermutation( // that larger than or equal to the triple. See `LocatedTriples.h` for a // discussion of the corner cases. size_t blockIndex = - std::ranges::lower_bound(blockMetadata, triple.toPermutedTriple(), - std::less<>{}, - &CompressedBlockMetadata::lastTriple_) - + ql::ranges::lower_bound(blockMetadata, triple.toPermutedTriple(), + std::less<>{}, + &CompressedBlockMetadata::lastTriple_) - blockMetadata.begin(); out.emplace_back(blockIndex, triple, shouldExist); }, @@ -53,7 +52,7 @@ NumAddedAndDeleted LocatedTriplesPerBlock::numTriples(size_t blockIndex) const { } auto blockUpdateTriples = map_.at(blockIndex); - size_t countInserts = std::ranges::count_if( + size_t countInserts = ql::ranges::count_if( blockUpdateTriples, &LocatedTriple::shouldTripleExist_); return {countInserts, blockUpdateTriples.size() - countInserts}; } @@ -169,9 +168,9 @@ IdTable LocatedTriplesPerBlock::mergeTriplesImpl(size_t blockIndex, if (locatedTripleIt != locatedTriples.end()) { AD_CORRECTNESS_CHECK(rowIt == block.end()); - std::ranges::for_each( - std::ranges::subrange(locatedTripleIt, locatedTriples.end()) | - std::views::filter(&LocatedTriple::shouldTripleExist_), + ql::ranges::for_each( + ql::ranges::subrange(locatedTripleIt, locatedTriples.end()) | + ql::views::filter(&LocatedTriple::shouldTripleExist_), writeLocatedTripleToResult); } if (rowIt != block.end()) { @@ -290,7 +289,7 @@ static auto updateGraphMetadata(CompressedBlockMetadata& blockMetadata, // Sort the stored graphs. Note: this is currently not expected by the code // that uses the graph info, but makes testing much easier. - std::ranges::sort(graphs.value()); + ql::ranges::sort(graphs.value()); } // ____________________________________________________________________________ @@ -341,14 +340,14 @@ void LocatedTriplesPerBlock::updateAugmentedMetadata() { // ____________________________________________________________________________ std::ostream& operator<<(std::ostream& os, const LocatedTriples& lts) { os << "{ "; - std::ranges::copy(lts, std::ostream_iterator(os, " ")); + ql::ranges::copy(lts, std::ostream_iterator(os, " ")); os << "}"; return os; } // ____________________________________________________________________________ std::ostream& operator<<(std::ostream& os, const std::vector>& v) { - std::ranges::copy(v, std::ostream_iterator>(os, ", ")); + ql::ranges::copy(v, std::ostream_iterator>(os, ", ")); return os; } @@ -362,7 +361,7 @@ bool LocatedTriplesPerBlock::isLocatedTriple(const IdTriple<0>& triple, return ad_utility::contains(lt, locatedTriple); }; - return std::ranges::any_of(map_, [&blockContains](auto& indexAndBlock) { + return ql::ranges::any_of(map_, [&blockContains](auto& indexAndBlock) { const auto& [index, block] = indexAndBlock; return blockContains(block, index); }); diff --git a/src/index/LocatedTriples.h b/src/index/LocatedTriples.h index a424a9de63..0280ce8cc6 100644 --- a/src/index/LocatedTriples.h +++ b/src/index/LocatedTriples.h @@ -185,9 +185,9 @@ class LocatedTriplesPerBlock { const LocatedTriplesPerBlock& ltpb) { // Get the block indices in sorted order. std::vector blockIndices; - std::ranges::copy(ltpb.map_ | std::views::keys, - std::back_inserter(blockIndices)); - std::ranges::sort(blockIndices); + ql::ranges::copy(ltpb.map_ | ql::views::keys, + std::back_inserter(blockIndices)); + ql::ranges::sort(blockIndices); for (auto blockIndex : blockIndices) { os << "LTs in Block #" << blockIndex << ": " << ltpb.map_.at(blockIndex) << std::endl; diff --git a/src/index/PatternCreator.cpp b/src/index/PatternCreator.cpp index fecb4eb2be..03bf20544c 100644 --- a/src/index/PatternCreator.cpp +++ b/src/index/PatternCreator.cpp @@ -72,7 +72,7 @@ void PatternCreator::finishSubject(Id subject, const Pattern& pattern) { // Note: This has to be done for all triples, including those where the // subject has no pattern. auto curSubject = currentSubject_.value(); - std::ranges::for_each( + ql::ranges::for_each( tripleBuffer_, [this, patternId, &curSubject](const auto& t) { static_assert(NumColumnsIndexBuilding == 4, "The following lines have to be changed when additional " @@ -109,11 +109,11 @@ void PatternCreator::finish() { // TODO Use `ranges::to`. std::vector> orderedPatterns{ patternToIdAndCount_.begin(), patternToIdAndCount_.end()}; - std::ranges::sort(orderedPatterns, std::less<>{}, - [](const auto& a) { return a.second.patternId_; }); + ql::ranges::sort(orderedPatterns, std::less<>{}, + [](const auto& a) { return a.second.patternId_; }); CompactVectorOfStrings::Writer patternWriter{ std::move(patternSerializer_).file()}; - for (const auto& pattern : orderedPatterns | std::views::keys) { + for (const auto& pattern : orderedPatterns | ql::views::keys) { patternWriter.push(pattern.data(), pattern.size()); } patternWriter.finish(); diff --git a/src/index/PrefixHeuristic.cpp b/src/index/PrefixHeuristic.cpp index 2353362f9c..e42b75ccf7 100644 --- a/src/index/PrefixHeuristic.cpp +++ b/src/index/PrefixHeuristic.cpp @@ -4,7 +4,6 @@ #include "./PrefixHeuristic.h" -#include #include #include "../parser/RdfEscaping.h" @@ -13,6 +12,7 @@ #include "../util/File.h" #include "../util/Log.h" #include "../util/StringUtils.h" +#include "backports/algorithm.h" using std::string; diff --git a/src/index/StringSortComparator.h b/src/index/StringSortComparator.h index 9f583480a5..da0324ff5e 100644 --- a/src/index/StringSortComparator.h +++ b/src/index/StringSortComparator.h @@ -797,7 +797,7 @@ class TripleComponentComparator { auto alloc = std::pmr::polymorphic_allocator(allocator->resource()); auto ptr = alloc.allocate(s.size()); - std::ranges::copy(s, ptr); + ql::ranges::copy(s, ptr); return {ptr, ptr + s.size()}; }; LocaleManager::SortKeyView sortKey; diff --git a/src/index/StxxlSortFunctors.h b/src/index/StxxlSortFunctors.h index c9d939e2c1..5c7b839d6e 100644 --- a/src/index/StxxlSortFunctors.h +++ b/src/index/StxxlSortFunctors.h @@ -21,7 +21,7 @@ struct SortTriple { } constexpr auto compare = &Id::compareWithoutLocalVocab; // TODO The manual invoking is ugly, probably we could use - // `std::ranges::lexicographical_compare`, but we have to carefully measure + // `ql::ranges::lexicographical_compare`, but we have to carefully measure // that this change doesn't slow down the index build. auto c1 = std::invoke(compare, a[i0], b[i0]); if (c1 != 0) { diff --git a/src/index/Vocabulary.cpp b/src/index/Vocabulary.cpp index 9afc172f19..ab2cb52505 100644 --- a/src/index/Vocabulary.cpp +++ b/src/index/Vocabulary.cpp @@ -30,7 +30,7 @@ Vocabulary::PrefixRanges::PrefixRanges( template bool Vocabulary::PrefixRanges::contain( IndexT index) const { - return std::ranges::any_of( + return ql::ranges::any_of( ranges_, [index](const std::pair& range) { return range.first <= index && index < range.second; }); @@ -125,7 +125,7 @@ bool Vocabulary::shouldEntityBeExternalized( } // Otherwise, externalize if and only if there is a prefix match for one of // `externalizedPrefixes_`. - return std::ranges::any_of(externalizedPrefixes_, [&word](const auto& p) { + return ql::ranges::any_of(externalizedPrefixes_, [&word](const auto& p) { return word.starts_with(p); }); } diff --git a/src/index/Vocabulary.h b/src/index/Vocabulary.h index f2533873a4..3ae8a09da7 100644 --- a/src/index/Vocabulary.h +++ b/src/index/Vocabulary.h @@ -8,7 +8,6 @@ #pragma once -#include #include #include #include @@ -17,6 +16,7 @@ #include #include +#include "backports/algorithm.h" #include "global/Constants.h" #include "global/Id.h" #include "global/Pattern.h" diff --git a/src/index/VocabularyMergerImpl.h b/src/index/VocabularyMergerImpl.h index 31925ab2a0..2b8113daf5 100644 --- a/src/index/VocabularyMergerImpl.h +++ b/src/index/VocabularyMergerImpl.h @@ -94,7 +94,7 @@ auto VocabularyMerger::mergeVocabulary(const std::string& basename, 0.8 * memoryToUse, generators, lessThanForQueue); ad_utility::ProgressBar progressBar{metaData_.numWordsTotal(), "Words merged: "}; - for (QueueWord& currentWord : std::views::join(mergedWords)) { + for (QueueWord& currentWord : ql::views::join(mergedWords)) { // Accumulate the globally ordered queue words in a buffer. sortedBuffer.push_back(std::move(currentWord)); @@ -318,10 +318,10 @@ inline ItemVec vocabMapsToVector(ItemMapArray& map) { futures.push_back( std::async(std::launch::async, [&singleMap, &els, &offsets, i] { using T = ItemVec::value_type; - std::ranges::transform(singleMap.map_, els.begin() + offsets[i], - [](auto& el) -> T { - return {el.first, std::move(el.second)}; - }); + ql::ranges::transform(singleMap.map_, els.begin() + offsets[i], + [](auto& el) -> T { + return {el.first, std::move(el.second)}; + }); })); ++i; } @@ -339,13 +339,13 @@ void sortVocabVector(ItemVec* vecPtr, StringSortComparator comp, auto& els = *vecPtr; if constexpr (USE_PARALLEL_SORT) { if (doParallelSort) { - ad_utility::parallel_sort(std::ranges::begin(els), std::ranges::end(els), + ad_utility::parallel_sort(ql::ranges::begin(els), ql::ranges::end(els), comp, ad_utility::parallel_tag(10)); } else { - std::ranges::sort(els, comp); + ql::ranges::sort(els, comp); } } else { - std::ranges::sort(els, comp); + ql::ranges::sort(els, comp); (void)doParallelSort; // avoid compiler warning for unused value. } } diff --git a/src/index/vocabulary/CompressionWrappers.h b/src/index/vocabulary/CompressionWrappers.h index 21a21f908b..6005f731e7 100644 --- a/src/index/vocabulary/CompressionWrappers.h +++ b/src/index/vocabulary/CompressionWrappers.h @@ -110,7 +110,7 @@ struct PrefixCompressionWrapper : detail::DecoderMultiplexer { static BulkResult compressAll(const Strings& strings) { PrefixCompressor compressor; auto stringsCopy = strings; - std::ranges::sort(stringsCopy); + ql::ranges::sort(stringsCopy); auto prefixes = calculatePrefixes(stringsCopy, NUM_COMPRESSION_PREFIXES, 1, true); compressor.buildCodebook(prefixes); diff --git a/src/index/vocabulary/VocabularyBinarySearchMixin.h b/src/index/vocabulary/VocabularyBinarySearchMixin.h index 2d9f5b0c6e..ccc928a70e 100644 --- a/src/index/vocabulary/VocabularyBinarySearchMixin.h +++ b/src/index/vocabulary/VocabularyBinarySearchMixin.h @@ -4,11 +4,11 @@ #pragma once -#include #include #include #include +#include "backports/algorithm.h" #include "index/vocabulary/VocabularyTypes.h" #include "util/Algorithm.h" @@ -40,7 +40,7 @@ class VocabularyBinarySearchMixin { Idx endIdx = std::nullopt) const { auto [begin, end] = getIterators(beginIdx, endIdx); return impl().iteratorToWordAndIndex( - std::ranges::lower_bound(begin, end, word, comparator)); + ql::ranges::lower_bound(begin, end, word, comparator)); } // Return the first entry that is greater than `word`. The interface is the @@ -51,7 +51,7 @@ class VocabularyBinarySearchMixin { Idx endIdx = std::nullopt) const { auto [begin, end] = getIterators(beginIdx, endIdx); return impl().iteratorToWordAndIndex( - std::ranges::upper_bound(begin, end, word, comparator)); + ql::ranges::upper_bound(begin, end, word, comparator)); } // These functions are similar to `lower_bound` and `upper_bound` (see above), diff --git a/src/index/vocabulary/VocabularyInMemoryBinSearch.cpp b/src/index/vocabulary/VocabularyInMemoryBinSearch.cpp index 204bbfaeed..268cc33721 100644 --- a/src/index/vocabulary/VocabularyInMemoryBinSearch.cpp +++ b/src/index/vocabulary/VocabularyInMemoryBinSearch.cpp @@ -24,7 +24,7 @@ void VocabularyInMemoryBinSearch::open(const string& fileName) { // _____________________________________________________________________________ std::optional VocabularyInMemoryBinSearch::operator[]( uint64_t index) const { - auto it = std::ranges::lower_bound(indices_, index); + auto it = ql::ranges::lower_bound(indices_, index); if (it != indices_.end() && *it == index) { return words_[it - indices_.begin()]; } diff --git a/src/parser/LiteralOrIri.cpp b/src/parser/LiteralOrIri.cpp index 077b189c26..10d2ac1bbe 100644 --- a/src/parser/LiteralOrIri.cpp +++ b/src/parser/LiteralOrIri.cpp @@ -4,9 +4,9 @@ #include "parser/LiteralOrIri.h" -#include #include +#include "backports/algorithm.h" #include "index/IndexImpl.h" namespace ad_utility::triple_component { diff --git a/src/parser/ParsedQuery.cpp b/src/parser/ParsedQuery.cpp index a08fc09f30..71351b43d6 100644 --- a/src/parser/ParsedQuery.cpp +++ b/src/parser/ParsedQuery.cpp @@ -96,10 +96,10 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) { const bool isExplicitGroupBy = !_groupByVariables.empty(); const bool isImplicitGroupBy = - std::ranges::any_of(getAliases(), - [](const Alias& alias) { - return alias._expression.containsAggregate(); - }) && + ql::ranges::any_of(getAliases(), + [](const Alias& alias) { + return alias._expression.containsAggregate(); + }) && !isExplicitGroupBy; const bool isGroupBy = isExplicitGroupBy || isImplicitGroupBy; using namespace std::string_literals; @@ -176,7 +176,7 @@ void ParsedQuery::addSolutionModifiers(SolutionModifiers modifiers) { // part of the group by statement. const auto& aliases = selectClause().getAliases(); for (const Variable& var : selectClause().getSelectedVariables()) { - if (auto it = std::ranges::find(aliases, var, &Alias::_target); + if (auto it = ql::ranges::find(aliases, var, &Alias::_target); it != aliases.end()) { const auto& alias = *it; auto relevantVariables = groupVariables; diff --git a/src/parser/RdfEscaping.cpp b/src/parser/RdfEscaping.cpp index fca4610347..58559f3eff 100644 --- a/src/parser/RdfEscaping.cpp +++ b/src/parser/RdfEscaping.cpp @@ -326,8 +326,8 @@ std::string normalizedContentFromLiteralOrIri(std::string&& input) { static NormalizedString toNormalizedString(std::string_view input) { NormalizedString normalizedString; normalizedString.resize(input.size()); - std::ranges::transform(input.begin(), input.end(), normalizedString.begin(), - [](char c) { return NormalizedChar{c}; }); + ql::ranges::transform(input.begin(), input.end(), normalizedString.begin(), + [](char c) { return NormalizedChar{c}; }); return normalizedString; } diff --git a/src/parser/RdfParser.cpp b/src/parser/RdfParser.cpp index 503919f372..8b3bf0681e 100644 --- a/src/parser/RdfParser.cpp +++ b/src/parser/RdfParser.cpp @@ -1040,8 +1040,8 @@ void RdfParallelParser::initialize(const string& filename) { this->prefixMap_ = std::move(declarationParser.getPrefixMap()); auto remainder = declarationParser.getUnparsedRemainder(); remainingBatchFromInitialization.reserve(remainder.size()); - std::ranges::copy(remainder, - std::back_inserter(remainingBatchFromInitialization)); + ql::ranges::copy(remainder, + std::back_inserter(remainingBatchFromInitialization)); } auto feedBatches = [this, firstBatch = std::move( diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 55fe72eff2..5629f1452c 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -504,8 +504,8 @@ ParsedQuery Visitor::visit(Parser::DeleteWhereContext* ctx) { ParsedQuery Visitor::visit(Parser::ModifyContext* ctx) { auto isVisibleIfVariable = [this](const TripleComponent& component) { if (component.isVariable()) { - return std::ranges::find(parsedQuery_.getVisibleVariables(), - component.getVariable()) != + return ql::ranges::find(parsedQuery_.getVisibleVariables(), + component.getVariable()) != parsedQuery_.getVisibleVariables().end(); } else { return true; @@ -643,8 +643,8 @@ vector Visitor::visit(Parser::QuadsContext* ctx) { ctx->triplesTemplate(), [this](Parser::TriplesTemplateContext* ctx) { return transformTriplesTemplate(ctx, std::monostate{}); }); - std::ranges::move(visitVector(ctx->quadsNotTriples()), - std::back_inserter(triplesWithGraph)); + ql::ranges::move(visitVector(ctx->quadsNotTriples()), + std::back_inserter(triplesWithGraph)); return ad_utility::flatten(std::move(triplesWithGraph)); } @@ -994,7 +994,7 @@ OrderClause Visitor::visit(Parser::OrderClauseContext* ctx) { auto isDescending = [](const auto& variant) { return std::visit([](const auto& k) { return k.isDescending_; }, variant); }; - if (std::ranges::any_of(orderKeys, isDescending)) { + if (ql::ranges::any_of(orderKeys, isDescending)) { reportError(ctx, "When using the `INTERNAL SORT BY` modifier, all sorted " "variables have to be ascending"); @@ -1560,7 +1560,7 @@ vector Visitor::visit( for (auto&& [predicate, object] : std::move(predicateObjectPairs)) { triples.emplace_back(subject, std::move(predicate), std::move(object)); } - std::ranges::copy(additionalTriples, std::back_inserter(triples)); + ql::ranges::copy(additionalTriples, std::back_inserter(triples)); for (const auto& triple : triples) { setMatchingWordAndScoreVisibleIfPresent(ctx, triple); } @@ -1599,8 +1599,8 @@ PathObjectPairsAndTriples Visitor::visit( vector pairsAndTriples = visitVector(ctx->tupleWithoutPath()); for (auto& [newPairs, newTriples] : pairsAndTriples) { - std::ranges::move(newPairs, std::back_inserter(pairs)); - std::ranges::move(newTriples, std::back_inserter(triples)); + ql::ranges::move(newPairs, std::back_inserter(pairs)); + ql::ranges::move(newTriples, std::back_inserter(triples)); } return result; } @@ -1656,16 +1656,16 @@ ObjectsAndPathTriples Visitor::visit(Parser::ObjectListPathContext* ctx) { auto objectAndTriplesVec = visitVector(ctx->objectPath()); // First collect all the objects. std::vector objects; - std::ranges::copy( - objectAndTriplesVec | std::views::transform(ad_utility::first), + ql::ranges::copy( + objectAndTriplesVec | ql::views::transform(ad_utility::first), std::back_inserter(objects)); // Collect all the triples. Node: `views::join` flattens the input. std::vector triples; - std::ranges::copy(objectAndTriplesVec | - std::views::transform(ad_utility::second) | - std::views::join, - std::back_inserter(triples)); + ql::ranges::copy(objectAndTriplesVec | + ql::views::transform(ad_utility::second) | + ql::views::join, + std::back_inserter(triples)); return {std::move(objects), std::move(triples)}; } @@ -2380,7 +2380,7 @@ ExpressionPtr Visitor::visit(Parser::AggregateContext* ctx) { std::string functionName = ad_utility::getLowercase(children.at(0)->getText()); - const bool distinct = std::ranges::any_of(children, [](auto* child) { + const bool distinct = ql::ranges::any_of(children, [](auto* child) { return ad_utility::getLowercase(child->getText()) == "distinct"; }); // the only case that there is no child expression is COUNT(*), so we can diff --git a/src/util/Algorithm.h b/src/util/Algorithm.h index 9a5beabb6a..c2229e0e82 100644 --- a/src/util/Algorithm.h +++ b/src/util/Algorithm.h @@ -6,12 +6,12 @@ #ifndef QLEVER_ALGORITHM_H #define QLEVER_ALGORITHM_H -#include #include #include #include #include +#include "backports/algorithm.h" #include "util/Exception.h" #include "util/Forward.h" #include "util/HashSet.h" @@ -34,8 +34,8 @@ constexpr bool contains(Container&& container, const T& element) { ad_utility::isSimilar) { return container.find(element) != container.npos; } else { - return std::ranges::find(std::begin(container), std::end(container), - element) != std::end(container); + return ql::ranges::find(std::begin(container), std::end(container), + element) != std::end(container); } } @@ -75,7 +75,7 @@ auto transform(Range&& input, F unaryOp) { unaryOp, *ad_utility::makeForwardingIterator(input.begin())))>; std::vector out; out.reserve(input.size()); - std::ranges::transform( + ql::ranges::transform( ad_utility::makeForwardingIterator(input.begin()), ad_utility::makeForwardingIterator(input.end()), std::back_inserter(out), unaryOp); @@ -95,7 +95,7 @@ std::vector> zipVectors(const std::vector& vectorA, std::vector> vectorsPairedUp{}; vectorsPairedUp.reserve(vectorA.size()); - std::ranges::transform( + ql::ranges::transform( vectorA, vectorB, std::back_inserter(vectorsPairedUp), [](const auto& a, const auto& b) { return std::make_pair(a, b); }); diff --git a/src/util/BatchedPipeline.h b/src/util/BatchedPipeline.h index dd5879e326..0bf1c09f2d 100644 --- a/src/util/BatchedPipeline.h +++ b/src/util/BatchedPipeline.h @@ -129,7 +129,7 @@ class Batcher { } res.isPipelineGood_ = true; res.content_.reserve(opt->size()); - std::ranges::move(*opt, std::back_inserter(res.content_)); + ql::ranges::move(*opt, std::back_inserter(res.content_)); return res; } else { res.isPipelineGood_ = true; diff --git a/src/util/BlankNodeManager.cpp b/src/util/BlankNodeManager.cpp index 44295b3aeb..9ff9c45352 100644 --- a/src/util/BlankNodeManager.cpp +++ b/src/util/BlankNodeManager.cpp @@ -59,11 +59,11 @@ bool BlankNodeManager::LocalBlankNodeManager::containsBlankNodeIndex( return index >= block.startIdx_ && index < block.nextIdx_; }; - return std::ranges::any_of(*blocks_, containsIndex) || - std::ranges::any_of( + return ql::ranges::any_of(*blocks_, containsIndex) || + ql::ranges::any_of( otherBlocks_, [&](const std::shared_ptr>& blocks) { - return std::ranges::any_of(*blocks, containsIndex); + return ql::ranges::any_of(*blocks, containsIndex); }); } diff --git a/src/util/BlankNodeManager.h b/src/util/BlankNodeManager.h index afdc748281..3ff7613768 100644 --- a/src/util/BlankNodeManager.h +++ b/src/util/BlankNodeManager.h @@ -97,7 +97,7 @@ class BlankNodeManager { if (l == nullptr) { continue; } - std::ranges::copy(l->otherBlocks_, inserter); + ql::ranges::copy(l->otherBlocks_, inserter); *inserter = l->blocks_; } } diff --git a/src/util/ChunkedForLoop.h b/src/util/ChunkedForLoop.h index 9569633e24..3e2dbdc4e8 100644 --- a/src/util/ChunkedForLoop.h +++ b/src/util/ChunkedForLoop.h @@ -5,10 +5,11 @@ #ifndef QLEVER_CHUNKEDFORLOOP_H #define QLEVER_CHUNKEDFORLOOP_H -#include #include #include +#include "backports/algorithm.h" + namespace ad_utility { namespace detail { @@ -69,7 +70,7 @@ template concept SizedInputRange = std::ranges::sized_range && std::ranges::input_range; -// Similar to `std::ranges::copy`, but invokes `chunkOperation` every +// Similar to `ql::ranges::copy`, but invokes `chunkOperation` every // `chunkSize` elements. (Round up to the next chunk size if the range size is // not a multiple of `chunkSize`.) template @@ -77,16 +78,16 @@ inline void chunkedCopy(R&& inputRange, O result, std::ranges::range_difference_t chunkSize, const std::invocable auto& chunkOperation) requires std::indirectly_copyable, O> { - auto begin = std::ranges::begin(inputRange); - auto end = std::ranges::end(inputRange); + auto begin = ql::ranges::begin(inputRange); + auto end = ql::ranges::end(inputRange); auto target = result; - while (std::ranges::distance(begin, end) >= chunkSize) { + while (ql::ranges::distance(begin, end) >= chunkSize) { auto start = begin; std::ranges::advance(begin, chunkSize); - target = std::ranges::copy(start, begin, target).out; + target = ql::ranges::copy(start, begin, target).out; chunkOperation(); } - std::ranges::copy(begin, end, target); + ql::ranges::copy(begin, end, target); chunkOperation(); } @@ -95,22 +96,22 @@ template concept SizedOutputRange = std::ranges::sized_range && std::ranges::output_range; -// Similar to `std::ranges::fill`, but invokes `chunkOperation` every +// Similar to `ql::ranges::fill`, but invokes `chunkOperation` every // `chunkSize` elements. (Round up to the next chunk size if the range size is // not a multiple of `chunkSize`.) template R> inline void chunkedFill(R&& outputRange, const T& value, std::ranges::range_difference_t chunkSize, const std::invocable auto& chunkOperation) { - auto begin = std::ranges::begin(outputRange); - auto end = std::ranges::end(outputRange); - while (std::ranges::distance(begin, end) >= chunkSize) { + auto begin = ql::ranges::begin(outputRange); + auto end = ql::ranges::end(outputRange); + while (ql::ranges::distance(begin, end) >= chunkSize) { auto start = begin; std::ranges::advance(begin, chunkSize); - std::ranges::fill(start, begin, value); + ql::ranges::fill(start, begin, value); chunkOperation(); } - std::ranges::fill(begin, end, value); + ql::ranges::fill(begin, end, value); chunkOperation(); } } // namespace ad_utility diff --git a/src/util/ConfigManager/ConfigManager.cpp b/src/util/ConfigManager/ConfigManager.cpp index 12d7db1313..9ff8c4d3d8 100644 --- a/src/util/ConfigManager/ConfigManager.cpp +++ b/src/util/ConfigManager/ConfigManager.cpp @@ -9,7 +9,6 @@ #include #include -#include #include #include #include @@ -23,6 +22,7 @@ #include #include +#include "backports/algorithm.h" #include "util/Algorithm.h" #include "util/ComparisonWithNan.h" #include "util/ConfigManager/ConfigExceptions.h" @@ -160,26 +160,26 @@ void ConfigManager::visitHashMapEntries(Visitor&& vis, bool sortByCreationOrder, using Pair = decltype(configurationOptions_)::value_type; // Check the hash map entries before using them. - std::ranges::for_each(configurationOptions_, [&pathPrefix](const Pair& pair) { + ql::ranges::for_each(configurationOptions_, [&pathPrefix](const Pair& pair) { const auto& [jsonPath, hashMapEntry] = pair; verifyHashMapEntry(absl::StrCat(pathPrefix, jsonPath), hashMapEntry); }); - // `std::reference_wrapper` works with `std::ranges::sort`. `const + // `std::reference_wrapper` works with `ql::ranges::sort`. `const // Pair&` does not. std::vector> hashMapEntries( configurationOptions_.begin(), configurationOptions_.end()); // Sort the collected `HashMapEntry`s, if wanted. if (sortByCreationOrder) { - std::ranges::sort(hashMapEntries, {}, [](const Pair& pair) { + ql::ranges::sort(hashMapEntries, {}, [](const Pair& pair) { const HashMapEntry& hashMapEntry = pair.second; return hashMapEntry.getInitializationId(); }); } // Call a wrapper for `vis` with the `HashMapEntry::visit` of every entry. - std::ranges::for_each(hashMapEntries, [&vis](const Pair& pair) { + ql::ranges::for_each(hashMapEntries, [&vis](const Pair& pair) { auto& [jsonPath, hashMapEntry] = pair; hashMapEntry.visit( [&jsonPath, &vis](auto& data) { std::invoke(vis, jsonPath, data); }); @@ -242,14 +242,13 @@ requires std::is_object_v auto ConfigManager::allHashMapEntries( hashMapEntry.getSubManager().value()->configurationOptions_, pathToCurrentEntry, predicate); allHashMapEntry.reserve(recursiveResults.size()); - std::ranges::move(std::move(recursiveResults), - std::back_inserter(allHashMapEntry)); + ql::ranges::move(std::move(recursiveResults), + std::back_inserter(allHashMapEntry)); } }; // Collect all the entries in the given `hashMap`. - std::ranges::for_each(hashMap, addHashMapEntryToCollectedOptions, - verifyEntry); + ql::ranges::for_each(hashMap, addHashMapEntryToCollectedOptions, verifyEntry); return allHashMapEntry; } @@ -299,7 +298,7 @@ std::string ConfigManager::createJsonPointerString( // We don't use a `lazyStrJoin` here, so that an empty `keys` produces an // empty string. - std::ranges::for_each( + ql::ranges::for_each( keys, [&escapeSpecialCharacters, &pointerString](std::string_view key) { pointerString << "/" << escapeSpecialCharacters(key); }); @@ -320,7 +319,7 @@ void ConfigManager::verifyPath(const std::vector& path) const { A string must be a valid `NAME` in the short hand. Otherwise, an option can't get accessed with the short hand. */ - if (auto failedKey = std::ranges::find_if_not(path, isNameInShortHand); + if (auto failedKey = ql::ranges::find_if_not(path, isNameInShortHand); failedKey != path.end()) { /* One of the keys failed. `failedKey` is an iterator pointing to the key. @@ -346,8 +345,8 @@ void ConfigManager::verifyPath(const std::vector& path) const { - The path of an already exiting option/manager is a prefix of the new path. The reasons, why it's not allowed, are basically the same. */ - std::ranges::for_each( - std::views::keys(configurationOptions_), + ql::ranges::for_each( + ql::views::keys(configurationOptions_), [&path, this](std::string_view alreadyAddedPath) { const std::string pathAsJsonPointerString = createJsonPointerString(path); @@ -647,7 +646,7 @@ std::string ConfigManager::generateConfigurationDocDetailedList( if (const auto& validators = assignment.getEntriesUnderKey(key); !validators.empty()) { // Validators should be sorted by their creation order. - AD_CORRECTNESS_CHECK(std::ranges::is_sorted( + AD_CORRECTNESS_CHECK(ql::ranges::is_sorted( validators, {}, [](const ConfigOptionValidatorManager& validator) { return validator.getInitializationId(); })); @@ -729,11 +728,11 @@ auto ConfigManager::getValidatorAssignment() const // Assign to the configuration options. const auto& allValidators = validators(true); - std::ranges::for_each( - std::views::filter(allValidators, - [](const ConfigOptionValidatorManager& val) { - return val.configOptionToBeChecked().size() == 1; - }), + ql::ranges::for_each( + ql::views::filter(allValidators, + [](const ConfigOptionValidatorManager& val) { + return val.configOptionToBeChecked().size() == 1; + }), [&assignment](const ConfigOptionValidatorManager& val) { // The validator manager only has one element, so this should be okay. const ConfigOption& opt = **val.configOptionToBeChecked().begin(); @@ -752,18 +751,17 @@ auto ConfigManager::getValidatorAssignment() const *pair.second.getSubManager().value()); })}; allManager.emplace_back(*this); - std::ranges::for_each( - allManager, [&assignment](const ConfigManager& manager) { - std::ranges::for_each( - std::views::filter( - manager.validators_, - [](const auto& validator) { - return validator.configOptionToBeChecked().size() > 1; - }), - [&assignment, &manager](const auto& validator) { - assignment.addEntryUnderKey(manager, validator); - }); - }); + ql::ranges::for_each(allManager, [&assignment](const ConfigManager& manager) { + ql::ranges::for_each( + ql::views::filter(manager.validators_, + [](const auto& validator) { + return validator.configOptionToBeChecked().size() > + 1; + }), + [&assignment, &manager](const auto& validator) { + assignment.addEntryUnderKey(manager, validator); + }); + }); return assignment; } @@ -802,7 +800,7 @@ std::string ConfigManager::printConfigurationDoc(bool detailed) const { std::string ConfigManager::vectorOfKeysForJsonToString( const std::vector& keys) { std::ostringstream keysToString; - std::ranges::for_each(keys, [&keysToString](std::string_view key) { + ql::ranges::for_each(keys, [&keysToString](std::string_view key) { keysToString << "[" << key << "]"; }); return std::move(keysToString).str(); @@ -822,8 +820,8 @@ ConfigManager::validators(const bool sortByInitialization) const { allSubManager{allHashMapEntries( configurationOptions_, "", [](const HashMapEntry& entry) { return entry.holdsSubManager(); })}; - std::ranges::for_each( - std::views::values(allSubManager), + ql::ranges::for_each( + ql::views::values(allSubManager), [&allValidators](const ConfigManager::HashMapEntry& entry) { appendVector(allValidators, entry.getSubManager().value()->validators(false)); @@ -831,17 +829,17 @@ ConfigManager::validators(const bool sortByInitialization) const { // Sort the validators, if wanted. if (sortByInitialization) { - std::ranges::sort(allValidators, {}, - [](const ConfigOptionValidatorManager& validator) { - return validator.getInitializationId(); - }); + ql::ranges::sort(allValidators, {}, + [](const ConfigOptionValidatorManager& validator) { + return validator.getInitializationId(); + }); } return allValidators; } // ____________________________________________________________________________ void ConfigManager::verifyWithValidators() const { - std::ranges::for_each(validators(false), [](auto& validator) { + ql::ranges::for_each(validators(false), [](auto& validator) { validator.get().checkValidator(); }); }; @@ -850,8 +848,8 @@ void ConfigManager::verifyWithValidators() const { bool ConfigManager::containsOption(const ConfigOption& opt) const { const auto allOptions = configurationOptions(); return ad_utility::contains( - std::views::values(allOptions) | - std::views::transform( + ql::views::values(allOptions) | + ql::views::transform( [](const ConfigOption& option) { return &option; }), &opt); } diff --git a/src/util/ConfigManager/ConfigOption.cpp b/src/util/ConfigManager/ConfigOption.cpp index 26249649f7..4f6e1a9cd5 100644 --- a/src/util/ConfigManager/ConfigOption.cpp +++ b/src/util/ConfigManager/ConfigOption.cpp @@ -98,7 +98,7 @@ void ConfigOption::setValueWithJson(const nlohmann::json& json) { */ return j.is_array() && [&j, &isValueTypeSubType]( const std::vector&) { - return std::ranges::all_of(j, [&isValueTypeSubType](const auto& entry) { + return ql::ranges::all_of(j, [&isValueTypeSubType](const auto& entry) { return isValueTypeSubType.template operator()( entry, AD_FWD(isValueTypeSubType)); }); @@ -177,7 +177,7 @@ std::string ConfigOption::contentOfAvailableTypesToString( stream << "["; ad_utility::lazyStrJoin( &stream, - std::views::transform( + ql::views::transform( content, [&variantSubTypeToString](const VectorEntryType& entry) { return variantSubTypeToString(entry, variantSubTypeToString); diff --git a/src/util/ConstexprMap.h b/src/util/ConstexprMap.h index dfcda81d59..273332dd67 100644 --- a/src/util/ConstexprMap.h +++ b/src/util/ConstexprMap.h @@ -5,9 +5,10 @@ #ifndef QLEVER_CONSTEXPRMAP_H #define QLEVER_CONSTEXPRMAP_H -#include #include +#include "backports/algorithm.h" + namespace ad_utility { /// A const and constexpr map from `Key`s to `Value`s. diff --git a/src/util/ConstexprUtils.h b/src/util/ConstexprUtils.h index 12ece671fe..6661748788 100644 --- a/src/util/ConstexprUtils.h +++ b/src/util/ConstexprUtils.h @@ -7,6 +7,7 @@ #include #include +#include "backports/algorithm.h" #include "util/Exception.h" #include "util/Forward.h" #include "util/TypeTraits.h" @@ -169,7 +170,7 @@ template constexpr std::array integerToArray(Int value, Int numValues) { std::array res; - for (auto& el : res | std::views::reverse) { + for (auto& el : res | ql::views::reverse) { el = value % numValues; value /= numValues; } diff --git a/src/util/FsstCompressor.h b/src/util/FsstCompressor.h index 1e673f33ae..b5c54c4d4d 100644 --- a/src/util/FsstCompressor.h +++ b/src/util/FsstCompressor.h @@ -95,7 +95,7 @@ class FsstRepeatedDecoder { nextInput = result; }; - std::ranges::for_each(std::views::reverse(decoders_), decompressSingle); + ql::ranges::for_each(ql::views::reverse(decoders_), decompressSingle); return result; } // Allow this type to be trivially serializable, diff --git a/src/util/Generator.h b/src/util/Generator.h index bd5f0e32de..604ba7e7c3 100644 --- a/src/util/Generator.h +++ b/src/util/Generator.h @@ -12,6 +12,7 @@ #include #include +#include "backports/algorithm.h" #include "util/Exception.h" #include "util/TypeTraits.h" @@ -115,7 +116,7 @@ class generator_promise { struct generator_sentinel {}; -template +template class generator_iterator { using promise_type = generator_promise; using coroutine_handle = std::coroutine_handle; @@ -180,7 +181,9 @@ template class [[nodiscard]] generator { public: using promise_type = detail::generator_promise; - using iterator = detail::generator_iterator; + using iterator = detail::generator_iterator; + // TODO Check if this fixes anything wrt ::ranges + // using const_iterator = detail::generator_iterator; using value_type = typename iterator::value_type; generator() noexcept : m_coroutine(nullptr) {} @@ -213,10 +216,21 @@ class [[nodiscard]] generator { return iterator{m_coroutine}; } + /* + iterator begin() const; + detail::generator_sentinel end() const; + */ + detail::generator_sentinel end() noexcept { return detail::generator_sentinel{}; } + /* + // Not defined and not useful, but required for range-v3 + const_iterator begin() const; + const_iterator end() const; + */ + void swap(generator& other) noexcept { std::swap(m_coroutine, other.m_coroutine); } diff --git a/src/util/JoinAlgorithms/FindUndefRanges.h b/src/util/JoinAlgorithms/FindUndefRanges.h index bab300a763..7b3f3296cb 100644 --- a/src/util/JoinAlgorithms/FindUndefRanges.h +++ b/src/util/JoinAlgorithms/FindUndefRanges.h @@ -40,9 +40,9 @@ auto findSmallerUndefRangesForRowsWithoutUndef( using Row = typename std::iterator_traits::value_type; assert(row.size() == (*begin).size()); assert( - std::ranges::is_sorted(begin, end, std::ranges::lexicographical_compare)); - assert((std::ranges::all_of( - row, [](Id id) { return id != Id::makeUndefined(); }))); + ql::ranges::is_sorted(begin, end, ql::ranges::lexicographical_compare)); + assert((ql::ranges::all_of(row, + [](Id id) { return id != Id::makeUndefined(); }))); size_t numJoinColumns = row.size(); // TODO This can be done without copying. Row rowLower = row; @@ -56,7 +56,7 @@ auto findSmallerUndefRangesForRowsWithoutUndef( } auto [begOfUndef, endOfUndef] = std::equal_range( - begin, end, rowLower, std::ranges::lexicographical_compare); + begin, end, rowLower, ql::ranges::lexicographical_compare); for (auto it = begOfUndef; it != endOfUndef; ++it) { co_yield it; } @@ -80,7 +80,7 @@ auto findSmallerUndefRangesForRowsWithUndefInLastColumns( assert(row.size() == (*begin).size()); assert(numJoinColumns >= numLastUndefined); assert( - std::ranges::is_sorted(begin, end, std::ranges::lexicographical_compare)); + ql::ranges::is_sorted(begin, end, ql::ranges::lexicographical_compare)); const size_t numDefinedColumns = numJoinColumns - numLastUndefined; for (size_t i = 0; i < numDefinedColumns; ++i) { assert(row[i] != Id::makeUndefined()); @@ -107,11 +107,11 @@ auto findSmallerUndefRangesForRowsWithUndefInLastColumns( } auto begOfUndef = std::lower_bound(begin, end, rowLower, - std::ranges::lexicographical_compare); + ql::ranges::lexicographical_compare); rowLower[numDefinedColumns - 1] = Id::fromBits(rowLower[numDefinedColumns - 1].getBits() + 1); auto endOfUndef = std::lower_bound(begin, end, rowLower, - std::ranges::lexicographical_compare); + ql::ranges::lexicographical_compare); for (; begOfUndef != endOfUndef; ++begOfUndef) { resultMightBeUnsorted = true; co_yield begOfUndef; @@ -127,12 +127,12 @@ auto findSmallerUndefRangesArbitrary(const auto& row, It begin, It end, -> cppcoro::generator { assert(row.size() == (*begin).size()); assert( - std::ranges::is_sorted(begin, end, std::ranges::lexicographical_compare)); + ql::ranges::is_sorted(begin, end, ql::ranges::lexicographical_compare)); // To only get smaller entries, we first find a suitable upper bound in the // input range. We use `std::lower_bound` because the input row itself is not // a valid match. - end = std::lower_bound(begin, end, row, std::ranges::lexicographical_compare); + end = std::lower_bound(begin, end, row, ql::ranges::lexicographical_compare); const size_t numJoinColumns = row.size(); auto isCompatible = [&](const auto& otherRow) { @@ -171,8 +171,8 @@ auto findSmallerUndefRanges(const auto& row, It begin, It end, -> cppcoro::generator { size_t numLastUndefined = 0; assert(row.size() > 0); - auto it = std::ranges::rbegin(row); - auto rend = std::ranges::rend(row); + auto it = ql::ranges::rbegin(row); + auto rend = ql::ranges::rend(row); for (; it < rend; ++it) { if (*it != Id::makeUndefined()) { break; diff --git a/src/util/JoinAlgorithms/JoinAlgorithms.h b/src/util/JoinAlgorithms/JoinAlgorithms.h index 7231f9396f..36af501651 100644 --- a/src/util/JoinAlgorithms/JoinAlgorithms.h +++ b/src/util/JoinAlgorithms/JoinAlgorithms.h @@ -4,11 +4,11 @@ #pragma once -#include #include #include #include +#include "backports/algorithm.h" #include "engine/idTable/IdTable.h" #include "global/Id.h" #include "util/Generator.h" @@ -160,7 +160,7 @@ template ) { return row != Id::makeUndefined(); } else { - return (std::ranges::none_of( + return (ql::ranges::none_of( row, [](Id id) { return id == Id::makeUndefined(); })); } }; @@ -525,7 +525,7 @@ void specialOptionalJoin( // TODO We could probably also apply this optimization if both // inputs contain UNDEF values only in the last column, and possibly // also not only for `OPTIONAL` joins. - auto endOfUndef = std::ranges::find_if_not(leftSub, &Id::isUndefined); + auto endOfUndef = ql::ranges::find_if_not(leftSub, &Id::isUndefined); auto findSmallerUndefRangeLeft = [leftSub, endOfUndef](auto&&...) -> cppcoro::generator { @@ -596,22 +596,22 @@ class BlockAndSubrange { bool empty() const { return subrange_.second == subrange_.first; } - // Return the currently specified subrange as a `std::ranges::subrange` + // Return the currently specified subrange as a `ql::ranges::subrange` // object. auto subrange() { - return std::ranges::subrange{fullBlock().begin() + subrange_.first, - fullBlock().begin() + subrange_.second}; + return ql::ranges::subrange{fullBlock().begin() + subrange_.first, + fullBlock().begin() + subrange_.second}; } // The const overload of the `subrange` method (see above). auto subrange() const { - return std::ranges::subrange{fullBlock().begin() + subrange_.first, - fullBlock().begin() + subrange_.second}; + return ql::ranges::subrange{fullBlock().begin() + subrange_.first, + fullBlock().begin() + subrange_.second}; } // Get a view that iterates over all the indices that belong to the subrange. auto getIndexRange() const { - return std::views::iota(subrange_.first, subrange_.second); + return ql::views::iota(subrange_.first, subrange_.second); } Range getIndices() const { return subrange_; } @@ -649,7 +649,7 @@ class BlockAndSubrange { // Overload of `setSubrange` for an actual subrange object. template void setSubrange(const Subrange& subrange) { - setSubrange(std::ranges::begin(subrange), std::ranges::end(subrange)); + setSubrange(ql::ranges::begin(subrange), ql::ranges::end(subrange)); } }; @@ -683,7 +683,7 @@ JoinSide(It, End, const Projection&) -> JoinSide; // keeping them valid until the join is completed. template auto makeJoinSide(Blocks& blocks, const auto& projection) { - return JoinSide{std::ranges::begin(blocks), std::ranges::end(blocks), + return JoinSide{ql::ranges::begin(blocks), ql::ranges::end(blocks), projection}; } @@ -799,14 +799,14 @@ struct BlockZipperJoinImpl { auto& end = side.end_; for (size_t numBlocksRead = 0; it != end && numBlocksRead < 3; ++it, ++numBlocksRead) { - if (std::ranges::empty(*it)) { + if (ql::ranges::empty(*it)) { continue; } if (!eq((*it)[0], currentEl)) { AD_CORRECTNESS_CHECK(lessThan_(currentEl, (*it)[0])); return true; } - AD_CORRECTNESS_CHECK(std::ranges::is_sorted(*it, lessThan_)); + AD_CORRECTNESS_CHECK(ql::ranges::is_sorted(*it, lessThan_)); side.currentBlocks_.emplace_back(std::move(*it)); } return it == end; @@ -850,7 +850,7 @@ struct BlockZipperJoinImpl { // Delete the part from the last block that is `<= lastProcessedElement`. decltype(auto) remainingBlock = blocks.at(0).subrange(); - auto beginningOfUnjoined = std::ranges::upper_bound( + auto beginningOfUnjoined = ql::ranges::upper_bound( remainingBlock, lastProcessedElement, lessThan_); blocks.at(0).setSubrange(beginningOfUnjoined, remainingBlock.end()); if (blocks.at(0).empty()) { @@ -867,7 +867,9 @@ struct BlockZipperJoinImpl { const ProjectedEl& currentEl) { AD_CORRECTNESS_CHECK(!currentBlocks.empty()); const auto& first = currentBlocks.at(0); - auto it = std::ranges::lower_bound(first.subrange(), currentEl, lessThan_); + // TODO ql::ranges::lower_bound doesn't work here. + auto it = std::lower_bound(first.subrange().begin(), first.subrange().end(), + currentEl, lessThan_); return std::tuple{std::ref(first.fullBlock()), first.subrange(), it}; } @@ -883,7 +885,7 @@ struct BlockZipperJoinImpl { // blocks on the right and add them to the result. void addCartesianProduct(const LeftBlocks& blocksLeft, const RightBlocks& blocksRight) { - // TODO use `std::views::cartesian_product`. + // TODO use `ql::views::cartesian_product`. for (const auto& lBlock : blocksLeft) { for (const auto& rBlock : blocksRight) { compatibleRowAction_.setInput(lBlock.fullBlock(), rBlock.fullBlock()); @@ -903,10 +905,10 @@ struct BlockZipperJoinImpl { const LeftBlocks& blocksLeft, const RightBlocks& blocksRight) { if constexpr (DoOptionalJoin) { if (!hasUndef(rightSide_) && - std::ranges::all_of( - blocksRight | std::views::transform( + ql::ranges::all_of( + blocksRight | ql::views::transform( [](const auto& inp) { return inp.subrange(); }), - std::ranges::empty)) { + ql::ranges::empty)) { for (const auto& lBlock : blocksLeft) { compatibleRowAction_.setOnlyLeftInputForOptionalJoin( lBlock.fullBlock()); @@ -938,8 +940,11 @@ struct BlockZipperJoinImpl { return result; } auto& last = result.back(); - last.setSubrange( - std::ranges::equal_range(last.subrange(), currentEl, lessThan_)); + // TODO `ql::ranges::equal_range` doesn't work here for some + // reason. + auto [begin, end] = std::equal_range( + last.subrange().begin(), last.subrange().end(), currentEl, lessThan_); + last.setSubrange(begin, end); return result; } @@ -1035,16 +1040,16 @@ struct BlockZipperJoinImpl { // yields iterators to the individual undefined values. if constexpr (potentiallyHasUndef) { [[maybe_unused]] auto res = zipperJoinWithUndef( - std::ranges::subrange{subrangeLeft.begin(), currentElItL}, - std::ranges::subrange{subrangeRight.begin(), currentElItR}, lessThan_, + ql::ranges::subrange{subrangeLeft.begin(), currentElItL}, + ql::ranges::subrange{subrangeRight.begin(), currentElItR}, lessThan_, addRowIndex, findUndefValues(fullBlockLeft, fullBlockRight, begL, begR), findUndefValues(fullBlockLeft, fullBlockRight, begL, begR), addNotFoundRowIndex); } else { [[maybe_unused]] auto res = zipperJoinWithUndef( - std::ranges::subrange{subrangeLeft.begin(), currentElItL}, - std::ranges::subrange{subrangeRight.begin(), currentElItR}, lessThan_, + ql::ranges::subrange{subrangeLeft.begin(), currentElItL}, + ql::ranges::subrange{subrangeRight.begin(), currentElItR}, lessThan_, addRowIndex, noop, noop, addNotFoundRowIndex); } compatibleRowAction_.flush(); @@ -1063,7 +1068,7 @@ struct BlockZipperJoinImpl { while (targetBuffer.empty() && it != end) { auto& el = *it; if (!el.empty()) { - AD_CORRECTNESS_CHECK(std::ranges::is_sorted(el, lessThan_)); + AD_CORRECTNESS_CHECK(ql::ranges::is_sorted(el, lessThan_)); targetBuffer.emplace_back(std::move(el)); } ++it; @@ -1245,7 +1250,7 @@ struct BlockZipperJoinImpl { // The reference of `it` is there on purpose. for (auto& it = side.it_; it != side.end_; ++it) { auto& el = *it; - if (std::ranges::empty(el) || !isUndefined_(el.front())) { + if (ql::ranges::empty(el) || !isUndefined_(el.front())) { return; } bool endIsUndefined = isUndefined_(el.back()); @@ -1253,11 +1258,12 @@ struct BlockZipperJoinImpl { if (!endIsUndefined) { auto& lastUndefinedBlock = side.undefBlocks_.back(); side.currentBlocks_.push_back(lastUndefinedBlock); - auto subrange = std::ranges::equal_range( - lastUndefinedBlock.subrange(), - lastUndefinedBlock.subrange().front(), lessThan_); - size_t undefCount = std::ranges::size(subrange); - lastUndefinedBlock.setSubrange(std::move(subrange)); + // TODO ql::ranges::equal_range doesn't work for some reason. + decltype(auto) subrange = lastUndefinedBlock.subrange(); + auto [begin, end] = std::equal_range(subrange.begin(), subrange.end(), + subrange.front(), lessThan_); + size_t undefCount = std::distance(begin, end); + lastUndefinedBlock.setSubrange(begin, end); auto& firstDefinedBlock = side.currentBlocks_.back(); firstDefinedBlock.setSubrange( firstDefinedBlock.fullBlock().begin() + undefCount, diff --git a/src/util/MemorySize/MemorySize.h b/src/util/MemorySize/MemorySize.h index 59d7e47a19..92e2f01465 100644 --- a/src/util/MemorySize/MemorySize.h +++ b/src/util/MemorySize/MemorySize.h @@ -7,7 +7,6 @@ #include -#include #include #include #include @@ -17,6 +16,7 @@ #include #include +#include "backports/algorithm.h" #include "util/ConstexprMap.h" #include "util/ConstexprUtils.h" #include "util/Exception.h" diff --git a/src/util/ParallelMultiwayMerge.h b/src/util/ParallelMultiwayMerge.h index 21f4d85d08..27db2e4e69 100644 --- a/src/util/ParallelMultiwayMerge.h +++ b/src/util/ParallelMultiwayMerge.h @@ -71,7 +71,7 @@ cppcoro::generator> lazyBinaryMerge( // Turn the ranges into `(iterator, end)` pairs. auto makeItPair = [](auto& range) { - return std::pair{std::ranges::begin(range), std::ranges::end(range)}; + return std::pair{ql::ranges::begin(range), ql::ranges::end(range)}; }; auto it1 = makeItPair(range1); @@ -126,7 +126,7 @@ cppcoro::generator> lazyBinaryMerge( auto yieldRemainder = [&buffer, &isBufferLargeEnough, &clearBuffer, &pushToBuffer](auto& itPair) -> cppcoro::generator> { - for (auto& el : std::ranges::subrange(itPair.first, itPair.second)) { + for (auto& el : ql::ranges::subrange(itPair.first, itPair.second)) { pushToBuffer(el); if (isBufferLargeEnough()) { co_yield buffer; @@ -194,20 +194,19 @@ cppcoro::generator> parallelMultiwayMergeImpl( maxMemPerNode, blocksize, moveIf(rangeOfRanges[0]), moveIf(rangeOfRanges[1]), comparison); } else { - size_t size = std::ranges::size(rangeOfRanges); + size_t size = ql::ranges::size(rangeOfRanges); size_t split = size / 2; auto beg = rangeOfRanges.begin(); auto splitIt = beg + split; auto end = rangeOfRanges.end(); auto join = [](auto&& view) { - return std::views::join(ad_utility::OwningView{AD_FWD(view)}); + return ql::views::join(ad_utility::OwningView{AD_FWD(view)}); }; auto parallelMerge = [join, blocksize, comparison, maxMemPerNode]( auto it, auto end) { return join(parallelMultiwayMergeImpl( - maxMemPerNode, blocksize, std::ranges::subrange{it, end}, - comparison)); + maxMemPerNode, blocksize, ql::ranges::subrange{it, end}, comparison)); }; return ad_utility::streams::runStreamAsync( @@ -233,7 +232,7 @@ cppcoro::generator> parallelMultiwayMerge( size_t blocksize = 100) { // There is one suboperation per input in the recursion tree, so we have to // divide the memory limit. - auto maxMemPerNode = memoryLimit / std::ranges::size(rangeOfRanges); + auto maxMemPerNode = memoryLimit / ql::ranges::size(rangeOfRanges); return detail::parallelMultiwayMergeImpl( maxMemPerNode, blocksize, AD_FWD(rangeOfRanges), std::move(comparison)); } diff --git a/src/util/PriorityQueue.h b/src/util/PriorityQueue.h index bbf4359be6..fd78cc63ca 100644 --- a/src/util/PriorityQueue.h +++ b/src/util/PriorityQueue.h @@ -16,7 +16,6 @@ #pragma once -#include #include #include #include @@ -25,6 +24,7 @@ #include "./Exception.h" #include "./HashMap.h" #include "./Log.h" +#include "backports/algorithm.h" namespace ad_utility { using std::make_shared; diff --git a/src/util/Random.h b/src/util/Random.h index 0be93b0f81..b5af0d3d66 100644 --- a/src/util/Random.h +++ b/src/util/Random.h @@ -6,7 +6,6 @@ #pragma once -#include #include #include #include @@ -15,6 +14,7 @@ #include #include +#include "backports/algorithm.h" #include "global/TypedIndex.h" namespace ad_utility { diff --git a/src/util/Serializer/ByteBufferSerializer.h b/src/util/Serializer/ByteBufferSerializer.h index 6ab5f80ad1..263621437d 100644 --- a/src/util/Serializer/ByteBufferSerializer.h +++ b/src/util/Serializer/ByteBufferSerializer.h @@ -5,12 +5,12 @@ #ifndef QLEVER_BYTEBUFFERSERIALIZER_H #define QLEVER_BYTEBUFFERSERIALIZER_H -#include #include #include #include "../Exception.h" #include "./Serializer.h" +#include "backports/algorithm.h" namespace ad_utility::serialization { /** diff --git a/src/util/Simple8bCode.h b/src/util/Simple8bCode.h index ff13bfb8e9..8af2011776 100644 --- a/src/util/Simple8bCode.h +++ b/src/util/Simple8bCode.h @@ -6,7 +6,7 @@ #include #include -#include +#include "backports/algorithm.h" namespace ad_utility { diff --git a/src/util/StringUtils.cpp b/src/util/StringUtils.cpp index 5ea3aaa85e..bf3aafe7b7 100644 --- a/src/util/StringUtils.cpp +++ b/src/util/StringUtils.cpp @@ -5,9 +5,15 @@ #include "util/StringUtils.h" +#include #include #include +#include "util/Algorithm.h" +#include "util/Exception.h" +#include "util/Forward.h" +#include "util/StringUtilsImpl.h" + namespace ad_utility { // ____________________________________________________________________________ string_view commonPrefix(string_view a, const string_view b) { @@ -55,10 +61,10 @@ bool isLanguageMatch(string& languageTag, string& languageRange) { if (languageRange.ends_with("*")) { languageRange.pop_back(); } - std::ranges::transform(languageTag, std::begin(languageTag), - [](unsigned char c) { return std::tolower(c); }); - std::ranges::transform(languageRange, std::begin(languageRange), - [](unsigned char c) { return std::tolower(c); }); + ql::ranges::transform(languageTag, std::begin(languageTag), + [](unsigned char c) { return std::tolower(c); }); + ql::ranges::transform(languageRange, std::begin(languageRange), + [](unsigned char c) { return std::tolower(c); }); return languageTag.compare(0, languageRange.length(), languageRange) == 0; } } @@ -192,5 +198,4 @@ std::string addIndentation(std::string_view str, absl::StrReplaceAll(str, {{"\n", absl::StrCat("\n", indentationSymbol)}})); } - } // namespace ad_utility diff --git a/src/util/StringUtils.h b/src/util/StringUtils.h index cd0fcaf250..493e7759cc 100644 --- a/src/util/StringUtils.h +++ b/src/util/StringUtils.h @@ -9,7 +9,7 @@ #include -#include "util/Algorithm.h" +#include "backports/algorithm.h" #include "util/Concepts.h" #include "util/ConstexprSmallString.h" #include "util/CtreHelpers.h" @@ -137,6 +137,12 @@ number. @param str The input string. @param separatorSymbol What symbol to put between groups of thousands. + +Note: To avoid cyclic dependencies, this function is defined in a separate file +`StringUtilsImpl.h`. This file is then included in the `StringUtils.cpp` with an +explicit instantiation for the default template argument `.`. The tests include +the impl file directly to exhaustively test the behavior for other template +arguments. */ template std::string insertThousandSeparator(const std::string_view str, @@ -201,11 +207,11 @@ void lazyStrJoin(std::ostream* stream, Range&& r, std::string_view separator) { // Add the remaining entries. ++begin; - std::ranges::for_each(begin, end, - [&stream, &separator](const auto& listItem) { - *stream << separator << listItem; - }, - {}); + ql::ranges::for_each(begin, end, + [&stream, &separator](const auto& listItem) { + *stream << separator << listItem; + }, + {}); } // _________________________________________________________________________ @@ -218,88 +224,6 @@ std::string lazyStrJoin(Range&& r, std::string_view separator) { return std::move(stream).str(); } -// ___________________________________________________________________________ -template -std::string insertThousandSeparator(const std::string_view str, - const char separatorSymbol) { - static const auto isDigit = [](const char c) { - // `char` is ASCII. So the number symbols are the codes from 48 to 57. - return '0' <= c && c <= '9'; - }; - AD_CONTRACT_CHECK(!isDigit(separatorSymbol) && - !isDigit(floatingPointSignifier)); - - /* - Create a `ctll::fixed_string` of `floatingPointSignifier`, that can be used - inside regex character classes, without being confused with one of the - reserved characters. - */ - static constexpr auto adjustFloatingPointSignifierForRegex = []() { - constexpr ctll::fixed_string floatingPointSignifierAsFixedString( - {floatingPointSignifier, '\0'}); - - // Inside a regex character class are fewer reserved character. - if constexpr (contains(R"--(^-[]\)--", floatingPointSignifier)) { - return "\\" + floatingPointSignifierAsFixedString; - } else { - return floatingPointSignifierAsFixedString; - } - }; - - /* - As string view doesn't support the option to insert new values between old - values, so we create a new string in the wanted format. - */ - std::ostringstream ostream; - - /* - Insert separator into the given string and add it into the `ostream`. Ignores - content of the given string, just works based on length. - */ - auto insertSeparator = [&separatorSymbol, - &ostream](const std::string_view s) { - // Nothing to do, if the string is to short. - AD_CORRECTNESS_CHECK(s.length() > 3); - - /* - For walking over the string view. - Our initialization value skips the leading digits, so that only the digits - remain, where we have to put the separator in front of every three chars. - */ - size_t currentIdx{s.length() % 3 == 0 ? 3 : s.length() % 3}; - ostream << s.substr(0, currentIdx); - for (; currentIdx < s.length(); currentIdx += 3) { - ostream << separatorSymbol << s.substr(currentIdx, 3); - } - }; - - /* - The pattern finds any digit sequence, that is long enough for inserting - thousand separators and is not the fractual part of a floating point. - */ - static constexpr ctll::fixed_string regexPatDigitSequence{ - "(?:^|[^\\d" + adjustFloatingPointSignifierForRegex() + - "])(?\\d{4,})"}; - auto parseIterator = std::begin(str); - std::ranges::for_each( - ctre::range(str), - [&parseIterator, &ostream, &insertSeparator](const auto& match) { - /* - The digit sequence, that must be transformed. Note: The string view - iterators point to entries in the `str` string. - */ - const std::string_view& digitSequence{match.template get<"digit">()}; - - // Insert the transformed digit sequence, and the string between it and - // the `parseIterator`, into the stream. - ostream << std::string_view(parseIterator, std::begin(digitSequence)); - insertSeparator(digitSequence); - parseIterator = std::end(digitSequence); - }); - ostream << std::string_view(std::move(parseIterator), std::end(str)); - return ostream.str(); -} - // The implementation of `constexprStrCat` below. namespace detail::constexpr_str_cat_impl { // We currently have a fixed upper bound of 100 characters on the inputs. @@ -308,8 +232,8 @@ namespace detail::constexpr_str_cat_impl { // more complicated. using ConstexprString = ad_utility::ConstexprSmallString<100>; -// Concatenate the elements of `arr` into a single array with an additional zero -// byte at the end. `sz` must be the sum of the sizes in `arr`, else the +// Concatenate the elements of `arr` into a single array with an additional +// zero byte at the end. `sz` must be the sum of the sizes in `arr`, else the // behavior is undefined. template constexpr std::array catImpl( @@ -324,8 +248,8 @@ constexpr std::array catImpl( } return buf; }; -// Concatenate the `strings` into a single `std::array` with an additional -// zero byte at the end. +// Concatenate the `strings` into a single `std::array` with an +// additional zero byte at the end. template constexpr auto constexprStrCatBufferImpl() { constexpr size_t sz = (size_t{0} + ... + strings.size()); @@ -344,8 +268,8 @@ constexpr inline auto constexprStrCatBufferVar = // Return the concatenation of the `strings` as a `string_view`. Can be // evaluated at compile time. The buffer that backs the returned `string_view` -// will be zero-terminated, so it is safe to pass pointers into the result into -// legacy C-APIs. +// will be zero-terminated, so it is safe to pass pointers into the result +// into legacy C-APIs. template constexpr std::string_view constexprStrCat() { const auto& b = diff --git a/src/util/StringUtilsImpl.h b/src/util/StringUtilsImpl.h new file mode 100644 index 0000000000..87db3aa645 --- /dev/null +++ b/src/util/StringUtilsImpl.h @@ -0,0 +1,97 @@ +// Copyright 2023, University of Freiburg, Chair of Algorithms and Data +// Structures. +// Authors: Andre Schlegel (schlegea@informatik.uni-freiburg.de) +// Johannes Kalmbach, kalmbach@cs.uni-freiburg.de + +#pragma once + +#include "util/Algorithm.h" +#include "util/Exception.h" +#include "util/StringUtils.h" + +namespace ad_utility { +// _____________________________________________________________________________ +template +std::string insertThousandSeparator(const std::string_view str, + const char separatorSymbol) { + static const auto isDigit = [](const char c) { + // `char` is ASCII. So the number symbols are the codes from 48 to 57. + return '0' <= c && c <= '9'; + }; + AD_CONTRACT_CHECK(!isDigit(separatorSymbol) && + !isDigit(floatingPointSignifier)); + + /* + Create a `ctll::fixed_string` of `floatingPointSignifier`, that can be used + inside regex character classes, without being confused with one of the + reserved characters. + */ + static constexpr auto adjustFloatingPointSignifierForRegex = []() { + constexpr ctll::fixed_string floatingPointSignifierAsFixedString( + {floatingPointSignifier, '\0'}); + + // Inside a regex character class are fewer reserved character. + if constexpr (contains(R"--(^-[]\)--", floatingPointSignifier)) { + return "\\" + floatingPointSignifierAsFixedString; + } else { + return floatingPointSignifierAsFixedString; + } + }; + + /* + As string view doesn't support the option to insert new values between old + values, so we create a new string in the wanted format. + */ + std::ostringstream ostream; + + /* + Insert separator into the given string and add it into the `ostream`. + Ignores content of the given string, just works based on length. + */ + auto insertSeparator = [&separatorSymbol, + &ostream](const std::string_view s) { + // Nothing to do, if the string is to short. + AD_CORRECTNESS_CHECK(s.length() > 3); + + /* + For walking over the string view. + Our initialization value skips the leading digits, so that only the digits + remain, where we have to put the separator in front of every three chars. + */ + size_t currentIdx{s.length() % 3 == 0 ? 3 : s.length() % 3}; + ostream << s.substr(0, currentIdx); + for (; currentIdx < s.length(); currentIdx += 3) { + ostream << separatorSymbol << s.substr(currentIdx, 3); + } + }; + + /* + The pattern finds any digit sequence, that is long enough for inserting + thousand separators and is not the fractual part of a floating point. + */ + static constexpr ctll::fixed_string regexPatDigitSequence{ + "(?:^|[^\\d" + adjustFloatingPointSignifierForRegex() + + "])(?\\d{4,})"}; + auto parseIterator = std::begin(str); + ql::ranges::for_each( + ctre::range(str), + [&parseIterator, &ostream, &insertSeparator](const auto& match) { + /* + The digit sequence, that must be transformed. Note: The string view + iterators point to entries in the `str` string. + */ + const std::string_view& digitSequence{match.template get<"digit">()}; + + // Insert the transformed digit sequence, and the string between it + // and the `parseIterator`, into the stream. + ostream << std::string_view(parseIterator, std::begin(digitSequence)); + insertSeparator(digitSequence); + parseIterator = std::end(digitSequence); + }); + ostream << std::string_view(std::move(parseIterator), std::end(str)); + return ostream.str(); +} + +template std::string insertThousandSeparator<'.'>(const std::string_view str, + const char separatorSymbol); +} // namespace ad_utility diff --git a/src/util/TaskQueue.h b/src/util/TaskQueue.h index 878f13c70e..c18b0a4e8d 100644 --- a/src/util/TaskQueue.h +++ b/src/util/TaskQueue.h @@ -145,7 +145,7 @@ class TaskQueue { // that set `startedFinishing_` from false to true. void finishImpl() { queuedTasks_.finish(); - std::ranges::for_each(threads_, [](auto& thread) { + ql::ranges::for_each(threads_, [](auto& thread) { // If `finish` was called from inside the queue, the calling thread cannot // join itself. AD_CORRECTNESS_CHECK(thread.joinable()); diff --git a/src/util/ThreadSafeQueue.h b/src/util/ThreadSafeQueue.h index f24e2717f1..6c6828d411 100644 --- a/src/util/ThreadSafeQueue.h +++ b/src/util/ThreadSafeQueue.h @@ -274,7 +274,7 @@ cppcoro::generator queueManager(size_t queueSize, std::vector threads; std::atomic numUnfinishedThreads{static_cast(numThreads)}; absl::Cleanup queueFinisher{[&queue] { queue.finish(); }}; - for ([[maybe_unused]] auto i : std::views::iota(0u, numThreads)) { + for ([[maybe_unused]] auto i : ql::views::iota(0u, numThreads)) { threads.emplace_back( detail::makeQueueTask(queue, producer, numUnfinishedThreads)); } diff --git a/src/util/Views.h b/src/util/Views.h index 8a075c179e..4666a28228 100644 --- a/src/util/Views.h +++ b/src/util/Views.h @@ -8,6 +8,8 @@ #include #include +#include "backports/algorithm.h" +#include "backports/concepts.h" #include "util/Generator.h" #include "util/Log.h" @@ -83,8 +85,8 @@ cppcoro::generator uniqueView(SortedView view) { // Takes a view of blocks and yields the elements of the same view, but removes // consecutive duplicates inside the blocks and across block boundaries. template >> + typename ValueType = ql::ranges::range_value_t< + ql::ranges::range_value_t>> cppcoro::generator uniqueBlockView( SortedBlockView view) { size_t numInputs = 0; @@ -97,7 +99,7 @@ cppcoro::generator uniqueBlockView( } numInputs += block.size(); auto beg = lastValueFromPreviousBlock - ? std::ranges::find_if( + ? ql::ranges::find_if( block, [&p = lastValueFromPreviousBlock.value()]( const auto& el) { return el != p; }) : block.begin(); @@ -113,11 +115,14 @@ cppcoro::generator uniqueBlockView( } // A view that owns its underlying storage. It is a replacement for -// `std::ranges::owning_view` which is not yet supported by `GCC 11`. The -// implementation is taken from libstdc++-13. -template -requires std::movable class OwningView - : public std::ranges::view_interface> { +// `std::ranges::owning_view` which is not yet supported by `GCC 11` and +// `range-v3`. The implementation is taken from libstdc++-13. The additional +// optional `supportsConst` argument explicitly disables const iteration for +// this view when set to false, see `OwningViewNoConst` below for details. +CPP_template(typename UnderlyingRange, bool supportConst = true)( + requires ql::ranges::range CPP_and + ql::concepts::movable) class OwningView + : public ql::ranges::view_interface> { private: UnderlyingRange underlyingRange_ = UnderlyingRange(); @@ -145,93 +150,107 @@ requires std::movable class OwningView return std::move(underlyingRange_); } - constexpr std::ranges::iterator_t begin() { - return std::ranges::begin(underlyingRange_); + constexpr ql::ranges::iterator_t begin() { + return ql::ranges::begin(underlyingRange_); } - constexpr std::ranges::sentinel_t end() { - return std::ranges::end(underlyingRange_); + constexpr ql::ranges::sentinel_t end() { + return ql::ranges::end(underlyingRange_); } constexpr auto begin() const - requires std::ranges::range { - return std::ranges::begin(underlyingRange_); + requires(supportConst && ql::ranges::range) { + return ql::ranges::begin(underlyingRange_); } - constexpr auto end() const requires std::ranges::range - { - return std::ranges::end(underlyingRange_); + constexpr auto end() const + requires(supportConst && ql::ranges::range) { + return ql::ranges::end(underlyingRange_); } constexpr bool empty() - requires requires { std::ranges::empty(underlyingRange_); } { - return std::ranges::empty(underlyingRange_); + requires requires { ql::ranges::empty(underlyingRange_); } { + return ql::ranges::empty(underlyingRange_); } constexpr bool empty() const - requires requires { std::ranges::empty(underlyingRange_); } { - return std::ranges::empty(underlyingRange_); + requires requires { ql::ranges::empty(underlyingRange_); } { + return ql::ranges::empty(underlyingRange_); } - constexpr auto size() requires std::ranges::sized_range { - return std::ranges::size(underlyingRange_); + constexpr auto size() requires ql::ranges::sized_range { + return ql::ranges::size(underlyingRange_); } constexpr auto size() const - requires std::ranges::sized_range { - return std::ranges::size(underlyingRange_); + requires ql::ranges::sized_range { + return ql::ranges::size(underlyingRange_); } - constexpr auto data() requires std::ranges::contiguous_range - { - return std::ranges::data(underlyingRange_); + constexpr auto data() requires ql::ranges::contiguous_range { + return ql::ranges::data(underlyingRange_); } constexpr auto data() const - requires std::ranges::contiguous_range { - return std::ranges::data(underlyingRange_); + requires ql::ranges::contiguous_range { + return ql::ranges::data(underlyingRange_); } }; +// Like `OwningView` above, but the const overloads to `begin()` and `end()` do +// not exist. This is currently used in the `CompressedExternalIdTable.h`, where +// have a deeply nested stack of views, one of which is `OnwingView` +// which doesn't properly propagate the possibility of const iteration in +// range-v3`. +template +struct OwningViewNoConst : OwningView { + using OwningView::OwningView; +}; + +template +OwningViewNoConst(T&&) -> OwningViewNoConst; + // Helper concept for `ad_utility::allView`. namespace detail { template -concept can_ref_view = - requires(Range&& range) { std::ranges::ref_view{AD_FWD(range)}; }; -} +CPP_requires(can_ref_view, + requires(Range&& range)(ql::ranges::ref_view{AD_FWD(range)})); +template +CPP_concept can_ref_view = CPP_requires_ref(can_ref_view, Range); +} // namespace detail -// A simple drop-in replacement for `std::views::all` which is required because -// GCC 11 doesn't support `std::owning_view` (see above). As soon as we don't -// support GCC 11 anymore, we can throw out those implementations. +// A simple drop-in replacement for `ql::views::all` which is required because +// GCC 11 and range-v3 currently don't support `std::owning_view` (see above). +// As soon as we don't support GCC 11 anymore, we can throw out those +// implementations. template constexpr auto allView(Range&& range) { if constexpr (std::ranges::view>) { return AD_FWD(range); } else if constexpr (detail::can_ref_view) { - return std::ranges::ref_view{AD_FWD(range)}; + return ql::ranges::ref_view{AD_FWD(range)}; } else { return ad_utility::OwningView{AD_FWD(range)}; } } // Returns a view that contains all the values in `[0, upperBound)`, similar to -// Python's `range` function. Avoids the common pitfall in `std::views::iota` +// Python's `range` function. Avoids the common pitfall in `ql::views::iota` // that the count variable is only derived from the first argument. For example, -// `std::views::iota(0, size_t(INT_MAX) + 1)` leads to undefined behavior +// `ql::views::iota(0, size_t(INT_MAX) + 1)` leads to undefined behavior // because of an integer overflow, but `ad_utility::integerRange(size_t(INT_MAX) // + 1)` is perfectly safe and behaves as expected. template auto integerRange(Int upperBound) { - return std::views::iota(Int{0}, upperBound); + return ql::views::iota(Int{0}, upperBound); } // The implementation of `inPlaceTransformView`, see below for details. namespace detail { -template > - Transformation> -requires std::ranges::view +template > + Transformation> +requires(ql::ranges::view && ql::ranges::input_range) auto inPlaceTransformViewImpl(Range range, Transformation transformation) { // Take a range and yield pairs of [pointerToElementOfRange, // boolThatIsInitiallyFalse]. The bool is yielded as a reference and if its @@ -251,7 +270,7 @@ auto inPlaceTransformViewImpl(Range range, Transformation transformation) { // Lift the transformation to work on the result of `makePtrAndBool` and to // only apply the transformation once for each element. - // Note: This works because `std::views::transform` calls the transformation + // Note: This works because `ql::views::transform` calls the transformation // each time an iterator is dereferenced, so the following lambda is called // multiple times for the same element if the same iterator is dereferenced // multiple times and we therefore have to remember whether the transformation @@ -268,31 +287,35 @@ auto inPlaceTransformViewImpl(Range range, Transformation transformation) { }; // Combine everything to the actual result range. - return std::views::transform( + return ql::views::transform( ad_utility::OwningView{makeElementPtrAndBool(std::move(range))}, actualTransformation); } } // namespace detail -// Similar to `std::views::transform` but for transformation functions that +// Similar to `ql::views::transform` but for transformation functions that // transform a value in place. The result is always only an input range, // independent of the actual range category of the input. -template > - Transformation> -auto inPlaceTransformView(Range&& range, Transformation transformation) { - return detail::inPlaceTransformViewImpl(std::views::all(AD_FWD(range)), +CPP_template(typename Range, typename Transformation)( + requires ql::ranges::input_range CPP_and + ad_utility::InvocableWithExactReturnType< + Transformation, void, + ql::ranges::range_reference_t< + Range>>) auto inPlaceTransformView(Range&& range, + Transformation + transformation) { + return detail::inPlaceTransformViewImpl(ql::views::all(AD_FWD(range)), std::move(transformation)); } /// Create a generator the consumes the input generator until it finds the given /// separator and the yields spans of the chunks of data received inbetween. -template -inline cppcoro::generator> reChunkAtSeparator( - Range generator, ElementType separator) { +CPP_template(typename Range, typename ElementType)( + requires ql::ranges::input_range) inline cppcoro:: + generator> reChunkAtSeparator( + Range generator, ElementType separator) { std::vector buffer; - for (std::ranges::input_range auto chunk : generator) { + for (QL_OPT_CONCEPT(ql::ranges::input_range) auto const& chunk : generator) { for (ElementType c : chunk) { if (c == separator) { co_yield std::span{buffer.data(), buffer.size()}; @@ -310,7 +333,14 @@ inline cppcoro::generator> reChunkAtSeparator( } // namespace ad_utility // Enabling of "borrowed" ranges for `OwningView`. +#ifdef QLEVER_CPP_17 +template +inline constexpr bool ::ranges::enable_borrowed_range< + ad_utility::OwningView> = enable_borrowed_range; + +#else template inline constexpr bool std::ranges::enable_borrowed_range> = std::ranges::enable_borrowed_range; +#endif diff --git a/src/util/http/MediaTypes.cpp b/src/util/http/MediaTypes.cpp index 096cb3a3c1..d91e7584df 100644 --- a/src/util/http/MediaTypes.cpp +++ b/src/util/http/MediaTypes.cpp @@ -151,12 +151,12 @@ std::optional getMediaTypeFromAcceptHeader( return detail::SUPPORTED_MEDIA_TYPES.at(0); } else if constexpr (ad_utility::isSimilar< T, MediaTypeWithQuality::TypeWithWildcard>) { - auto it = std::ranges::find_if( + auto it = ql::ranges::find_if( detail::SUPPORTED_MEDIA_TYPES, [&part](const auto& el) { return getType(el) == part._type; }); return it == detail::SUPPORTED_MEDIA_TYPES.end() ? noValue : *it; } else if constexpr (ad_utility::isSimilar) { - auto it = std::ranges::find(detail::SUPPORTED_MEDIA_TYPES, part); + auto it = ql::ranges::find(detail::SUPPORTED_MEDIA_TYPES, part); return it != detail::SUPPORTED_MEDIA_TYPES.end() ? part : noValue; } else { static_assert(ad_utility::alwaysFalse); @@ -178,7 +178,7 @@ std::optional getMediaTypeFromAcceptHeader( std::string getErrorMessageForSupportedMediaTypes() { return "Currently the following media types are supported: " + lazyStrJoin( - detail::SUPPORTED_MEDIA_TYPES | std::views::transform(toString), + detail::SUPPORTED_MEDIA_TYPES | ql::views::transform(toString), ", "); } diff --git a/test/AddCombinedRowToTableTest.cpp b/test/AddCombinedRowToTableTest.cpp index c4a6b1c874..2c409a830b 100644 --- a/test/AddCombinedRowToTableTest.cpp +++ b/test/AddCombinedRowToTableTest.cpp @@ -11,7 +11,7 @@ namespace { static constexpr auto U = Id::makeUndefined(); void testWithAllBuffersizes(const auto& testFunction) { - for (auto bufferSize : std::views::iota(0, 10)) { + for (auto bufferSize : ql::views::iota(0, 10)) { testFunction(bufferSize); } testFunction(100'000); diff --git a/test/AlgorithmTest.cpp b/test/AlgorithmTest.cpp index 485f6930cc..187e0aa82e 100644 --- a/test/AlgorithmTest.cpp +++ b/test/AlgorithmTest.cpp @@ -16,8 +16,8 @@ using namespace ad_utility; TEST(Algorithm, Contains) { std::vector v{1, 42, 5, 3}; ASSERT_TRUE( - std::ranges::all_of(v, [&v](const auto& el) { return contains(v, el); })); - ASSERT_TRUE(std::ranges::none_of( + ql::ranges::all_of(v, [&v](const auto& el) { return contains(v, el); })); + ASSERT_TRUE(ql::ranges::none_of( std::vector{ 28, 2, @@ -29,30 +29,30 @@ TEST(Algorithm, Contains) { StringLike s{"hal"}; { std::vector substrings{"h", "a", "l", "ha", "al", "hal"}; - ASSERT_TRUE(std::ranges::all_of( + ASSERT_TRUE(ql::ranges::all_of( substrings, [&s](const auto& el) { return contains(s, el); })); std::vector noSubstrings{"x", "hl", "hel"}; // codespell-ignore - ASSERT_TRUE(std::ranges::none_of( + ASSERT_TRUE(ql::ranges::none_of( noSubstrings, [&s](const auto& el) { return contains(s, el); })); } { std::vector substrings{"h", "a", "l", "ha", "al", "hal"}; - ASSERT_TRUE(std::ranges::all_of( + ASSERT_TRUE(ql::ranges::all_of( substrings, [&s](const auto& el) { return contains(s, el); })); std::vector noSubstrings{"x", "hl", "hel"}; // codespell-ignore - ASSERT_TRUE(std::ranges::none_of( + ASSERT_TRUE(ql::ranges::none_of( noSubstrings, [&s](const auto& el) { return contains(s, el); })); } std::vector subchars{'h', 'a', 'l'}; - ASSERT_TRUE(std::ranges::all_of( + ASSERT_TRUE(ql::ranges::all_of( subchars, [&s](const auto& el) { return contains(s, el); })); std::vector noSubchars{'i', 'b', 'm'}; - ASSERT_TRUE(std::ranges::none_of( + ASSERT_TRUE(ql::ranges::none_of( noSubchars, [&s](const auto& el) { return contains(s, el); })); }; testStringLike.template operator()(); @@ -104,7 +104,7 @@ TEST(Algorithm, Transform) { ASSERT_EQ((std::vector{"hix", "byex", "whyx"}), v3); ASSERT_EQ(3u, v.size()); // The individual elements of `v` were moved from. - ASSERT_TRUE(std::ranges::all_of(v, &std::string::empty)); + ASSERT_TRUE(ql::ranges::all_of(v, &std::string::empty)); } // _____________________________________________________________________________ @@ -114,8 +114,8 @@ TEST(Algorithm, Flatten) { ASSERT_EQ((std::vector{"hi", "bye", "why", "me"}), v3); ASSERT_EQ(3u, v.size()); // The individual elements of `v` were moved from. - ASSERT_TRUE(std::ranges::all_of(v, [](const auto& inner) { - return std::ranges::all_of(inner, &std::string::empty); + ASSERT_TRUE(ql::ranges::all_of(v, [](const auto& inner) { + return ql::ranges::all_of(inner, &std::string::empty); })); } @@ -168,8 +168,8 @@ TEST(AlgorithmTest, transformArray) { TEST(AlgorithmTest, lowerUpperBoundIterator) { std::vector input; FastRandomIntGenerator randomGenerator; - std::ranges::generate_n(std::back_inserter(input), 1000, - std::ref(randomGenerator)); + ql::ranges::generate_n(std::back_inserter(input), 1000, + std::ref(randomGenerator)); auto compForLowerBound = [](auto iterator, size_t value) { return *iterator < value; @@ -180,9 +180,9 @@ TEST(AlgorithmTest, lowerUpperBoundIterator) { for (auto value : input) { EXPECT_EQ(ad_utility::lower_bound_iterator(input.begin(), input.end(), value, compForLowerBound), - std::ranges::lower_bound(input, value)); + ql::ranges::lower_bound(input, value)); EXPECT_EQ(ad_utility::upper_bound_iterator(input.begin(), input.end(), value, compForUpperBound), - std::ranges::upper_bound(input, value)); + ql::ranges::upper_bound(input, value)); } } diff --git a/test/AsyncStreamTest.cpp b/test/AsyncStreamTest.cpp index d0fe3addf1..dfd2ac9af1 100644 --- a/test/AsyncStreamTest.cpp +++ b/test/AsyncStreamTest.cpp @@ -49,6 +49,6 @@ TEST(AsyncStream, EnsureBuffersArePassedCorrectly) { const std::vector testData{"Abc", "Def", "Ghi"}; auto generator = runStreamAsync(testData, 2); - ASSERT_TRUE(std::ranges::equal(testData.begin(), testData.end(), - generator.begin(), generator.end())); + ASSERT_TRUE(ql::ranges::equal(testData.begin(), testData.end(), + generator.begin(), generator.end())); } diff --git a/test/BenchmarkMeasurementContainerTest.cpp b/test/BenchmarkMeasurementContainerTest.cpp index a500c3df2c..dc3c9aaeb7 100644 --- a/test/BenchmarkMeasurementContainerTest.cpp +++ b/test/BenchmarkMeasurementContainerTest.cpp @@ -158,7 +158,7 @@ TEST(BenchmarkMeasurementContainerTest, ResultTable) { on creation, because you can't add columns after creation and a table without columns is quite the stupid idea. Additionally, operations on such an empty table can create segmentation faults. The string conversion of `Result` - uses `std::ranges::max`, which really doesn't play well with empty vectors. + uses `ql::ranges::max`, which really doesn't play well with empty vectors. */ ASSERT_ANY_THROW(ResultTable("1 by 0 table", {"Test"}, {})); @@ -352,11 +352,11 @@ TEST(BenchmarkMeasurementContainerTest, ResultGroupDeleteMember) { group.deleteMeasurement(*entryToDelete); group.deleteTable(*tableToDelete); auto getAddressOfObject = [](const auto& obj) { return obj.get(); }; - ASSERT_TRUE(std::ranges::find(group.resultEntries_, entryToDelete, - getAddressOfObject) == + ASSERT_TRUE(ql::ranges::find(group.resultEntries_, entryToDelete, + getAddressOfObject) == std::end(group.resultEntries_)); - ASSERT_TRUE(std::ranges::find(group.resultTables_, tableToDelete, - getAddressOfObject) == + ASSERT_TRUE(ql::ranges::find(group.resultTables_, tableToDelete, + getAddressOfObject) == std::end(group.resultTables_)); // Test, if trying to delete a non-existent member results in an error. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9dd3a733a9..c260f34d4e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -103,6 +103,7 @@ endfunction() add_subdirectory(engine) add_subdirectory(parser) add_subdirectory(index) +add_subdirectory(backports) addLinkAndDiscoverTest(ValueIdComparatorsTest util) diff --git a/test/CallFixedSizeTest.cpp b/test/CallFixedSizeTest.cpp index 52b865b27a..ba6e82c255 100644 --- a/test/CallFixedSizeTest.cpp +++ b/test/CallFixedSizeTest.cpp @@ -175,7 +175,7 @@ TEST(CallFixedSize, CallFixedSize2) { } }; // TODO the ranges of the loop can be greatly simplified - // using `std::views::iota`, but views don't work yet on clang. + // using `ql::views::iota`, but views don't work yet on clang. // TODO We can then also setup a lambda that does the loop, // going from 4*4 to just 4 lines of calling code. for (int i = 0; i <= m; ++i) { diff --git a/test/CompactStringVectorTest.cpp b/test/CompactStringVectorTest.cpp index e66d9b875f..f551fc54bd 100644 --- a/test/CompactStringVectorTest.cpp +++ b/test/CompactStringVectorTest.cpp @@ -50,7 +50,7 @@ TEST(CompactVectorOfStrings, Iterator) { s.build(input); auto it = s.begin(); - using std::ranges::equal; + using ql::ranges::equal; ASSERT_TRUE(equal(input[0], *it)); ASSERT_TRUE(equal(input[0], *it++)); ASSERT_TRUE(equal(input[1], *it)); diff --git a/test/ComparisonWithNanTest.cpp b/test/ComparisonWithNanTest.cpp index 6458de6d17..5cde3fc249 100644 --- a/test/ComparisonWithNanTest.cpp +++ b/test/ComparisonWithNanTest.cpp @@ -4,9 +4,9 @@ #include -#include #include +#include "backports/algorithm.h" #include "util/ComparisonWithNan.h" namespace { @@ -25,7 +25,7 @@ auto gt = ad_utility::makeComparatorForNans(std::greater{}); TEST(ComparisonWithNan, Sorting) { std::vector input{NaN, 3.0, -3.0, NaN, negInf, NaN, inf}; std::vector expected{negInf, -3.0, 3.0, inf, NaN, NaN, NaN}; - std::ranges::sort(input, lt); + ql::ranges::sort(input, lt); ASSERT_EQ(input.size(), expected.size()); for (size_t i = 0; i < input.size(); ++i) { auto a = input[i]; diff --git a/test/CompressedRelationsTest.cpp b/test/CompressedRelationsTest.cpp index e9ab85df14..3166664941 100644 --- a/test/CompressedRelationsTest.cpp +++ b/test/CompressedRelationsTest.cpp @@ -44,7 +44,7 @@ size_t getNumColumns(const std::vector& input) { return 2; } auto result = input.at(0).size(); - AD_CONTRACT_CHECK(std::ranges::all_of( + AD_CONTRACT_CHECK(ql::ranges::all_of( input, [result](const auto& vec) { return vec.size() == result; })); return result; } @@ -54,7 +54,7 @@ size_t getNumColumns(const std::vector& vec) { return 2; } auto result = getNumColumns(vec.at(0).col1And2_); - AD_CONTRACT_CHECK(std::ranges::all_of(vec, [&result](const auto& relation) { + AD_CONTRACT_CHECK(ql::ranges::all_of(vec, [&result](const auto& relation) { return getNumColumns(relation.col1And2_) == result; })); return result; @@ -107,13 +107,12 @@ compressedRelationTestWriteCompressedRelations( auto inputs, std::string filename, ad_utility::MemorySize blocksize) { // First check the invariants of the `inputs`. They must be sorted by the // `col0_` and for each of the `inputs` the `col1And2_` must also be sorted. - AD_CONTRACT_CHECK(std::ranges::is_sorted( + AD_CONTRACT_CHECK(ql::ranges::is_sorted( inputs, {}, [](const RelationInput& r) { return r.col0_; })); - AD_CONTRACT_CHECK(std::ranges::all_of(inputs, [](const RelationInput& r) { - return std::ranges::is_sorted( - r.col1And2_, [](const auto& a, const auto& b) { - return std::ranges::lexicographical_compare(a, b); - }); + AD_CONTRACT_CHECK(ql::ranges::all_of(inputs, [](const RelationInput& r) { + return ql::ranges::is_sorted(r.col1And2_, [](const auto& a, const auto& b) { + return ql::ranges::lexicographical_compare(a, b); + }); })); // First create the on-disk permutation. @@ -142,7 +141,7 @@ compressedRelationTestWriteCompressedRelations( }; for (const auto& arr : input.col1And2_) { std::vector row{V(input.col0_)}; - std::ranges::transform(arr, std::back_inserter(row), V); + ql::ranges::transform(arr, std::back_inserter(row), V); buffer.push_back(row); if (buffer.numRows() > writer.blocksize()) { addBlock(); @@ -283,14 +282,14 @@ void testCompressedRelations(const auto& inputsOriginalBeforeCopy, std::make_shared>(); // Check the contents of the metadata. - // TODO `std::ranges::to`. + // TODO `ql::ranges::to`. std::vector additionalColumns; - std::ranges::copy(std::views::iota(3ul, getNumColumns(inputs) + 1), - std::back_inserter(additionalColumns)); + ql::ranges::copy(ql::views::iota(3ul, getNumColumns(inputs) + 1), + std::back_inserter(additionalColumns)); auto getMetadata = [&, &metaData = metaData](size_t i) { Id col0 = V(inputs[i].col0_); - auto it = std::ranges::lower_bound(metaData, col0, {}, - &CompressedRelationMetadata::col0Id_); + auto it = ql::ranges::lower_bound(metaData, col0, {}, + &CompressedRelationMetadata::col0Id_); if (it != metaData.end() && it->col0Id_ == col0) { return *it; } diff --git a/test/ConfigManagerTest.cpp b/test/ConfigManagerTest.cpp index 0d90283201..315e02d7a3 100644 --- a/test/ConfigManagerTest.cpp +++ b/test/ConfigManagerTest.cpp @@ -435,7 +435,7 @@ TEST(ConfigManagerTest, ParseConfigWithSubManager) { const std::vector>& wantedValues) { m.parseConfig(j); - std::ranges::for_each( + ql::ranges::for_each( wantedValues, [](const std::pair& wantedValue) -> void { ASSERT_EQ(*wantedValue.first, wantedValue.second); }); @@ -2131,7 +2131,7 @@ TEST(ConfigManagerTest, ContainsOption) { auto checkContainmentStatus = [](const ConfigManager& m, const ContainmentStatusVector& optionsAndWantedStatus) { - std::ranges::for_each( + ql::ranges::for_each( optionsAndWantedStatus, [&m](const ContainmentStatusVector::value_type& p) { if (p.second) { @@ -2308,7 +2308,7 @@ TEST(ConfigManagerTest, ValidatorsSorting) { // For generating better messages, when failing a test. auto trace{generateLocationTrace(l, "checkOrder")}; - ASSERT_TRUE(std::ranges::equal( + ASSERT_TRUE(ql::ranges::equal( manager.validators(true), order.validators_, {}, [](const ConfigOptionValidatorManager& validatorManager) { return validatorManager.getDescription(); @@ -2434,9 +2434,9 @@ TEST(ConfigManagerTest, ConfigurationDocValidatorAssignment) { std::pair>>& pairVector) { // Simply insert all the entries. - std::ranges::for_each(pairVector, [&assignment](const auto& pair) { + ql::ranges::for_each(pairVector, [&assignment](const auto& pair) { const auto& [key, validatorVector] = pair; - std::ranges::for_each( + ql::ranges::for_each( validatorVector, [&assignment, &key](const ConfigOptionValidatorManager& validator) { @@ -2460,16 +2460,16 @@ TEST(ConfigManagerTest, ConfigurationDocValidatorAssignment) { ad_utility::source_location::current()) { // For generating better messages, when failing a test. auto trace{generateLocationTrace(l, "testPairVector")}; - std::ranges::for_each(pairVector, [&assignment](const auto& pair) { + ql::ranges::for_each(pairVector, [&assignment](const auto& pair) { const auto& [key, expectedValidatorVector] = pair; // Are the entries under `key` the objects in the expected vector? auto toPointer = [](const ConfigOptionValidatorManager& x) { return &x; }; - ASSERT_TRUE(std::ranges::equal(assignment.getEntriesUnderKey(key), - expectedValidatorVector, {}, toPointer, - toPointer)); + ASSERT_TRUE(ql::ranges::equal(assignment.getEntriesUnderKey(key), + expectedValidatorVector, {}, toPointer, + toPointer)); }); }; diff --git a/test/DeltaTriplesTest.cpp b/test/DeltaTriplesTest.cpp index 10a4792d29..88ec0c76e5 100644 --- a/test/DeltaTriplesTest.cpp +++ b/test/DeltaTriplesTest.cpp @@ -47,7 +47,7 @@ class DeltaTriplesTest : public ::testing::Test { std::vector makeTurtleTriples( const std::vector& turtles) { RdfStringParser> parser; - std::ranges::for_each(turtles, [&parser](const std::string& turtle) { + ql::ranges::for_each(turtles, [&parser](const std::string& turtle) { parser.parseUtf8String(turtle); }); AD_CONTRACT_CHECK(parser.getTriples().size() == turtles.size()); diff --git a/test/FindUndefRangesTest.cpp b/test/FindUndefRangesTest.cpp index a3f3ee269f..a63afc8d5d 100644 --- a/test/FindUndefRangesTest.cpp +++ b/test/FindUndefRangesTest.cpp @@ -65,7 +65,7 @@ void testSmallerUndefRangesForRowsWithoutUndef( const std::vector& positions, source_location l = source_location::current()) { auto t = generateLocationTrace(l); - ASSERT_TRUE(std::ranges::is_sorted(range)); + ASSERT_TRUE(ql::ranges::is_sorted(range)); std::vector foundPositions; // TODO also actually test the bool; [[maybe_unused]] bool outOfOrder; @@ -112,7 +112,7 @@ void testSmallerUndefRangesForRowsWithUndefInLastColumns( const std::vector& positions, source_location l = source_location::current()) { auto t = generateLocationTrace(l); - ASSERT_TRUE(std::ranges::is_sorted(range)); + ASSERT_TRUE(ql::ranges::is_sorted(range)); std::vector foundPositions; // TODO also actually test the bool; [[maybe_unused]] bool outOfOrder; diff --git a/test/GeoPointTest.cpp b/test/GeoPointTest.cpp index 3d5dc60b7f..a9b39e0f87 100644 --- a/test/GeoPointTest.cpp +++ b/test/GeoPointTest.cpp @@ -11,6 +11,7 @@ #include "parser/GeoPoint.h" #include "util/GTestHelpers.h" #include "util/GeoSparqlHelpers.h" +#include "util/HashSet.h" // _____________________________________________________________________________ TEST(GeoPoint, GeoPoint) { diff --git a/test/GroupByTest.cpp b/test/GroupByTest.cpp index a2c28a1b5e..0c2e314e46 100644 --- a/test/GroupByTest.cpp +++ b/test/GroupByTest.cpp @@ -1207,10 +1207,10 @@ TEST_F(GroupByOptimizations, hashMapOptimizationMinMaxSumIntegers) { auto unsignedLongToValueId = [](unsigned long value) { return ValueId::makeFromInt(static_cast(value)); }; - std::ranges::transform(firstColumn.begin(), firstColumn.end(), - firstTableColumn.begin(), unsignedLongToValueId); - std::ranges::transform(secondColumn.begin(), secondColumn.end(), - secondTableColumn.begin(), unsignedLongToValueId); + ql::ranges::transform(firstColumn.begin(), firstColumn.end(), + firstTableColumn.begin(), unsignedLongToValueId); + ql::ranges::transform(secondColumn.begin(), secondColumn.end(), + secondTableColumn.begin(), unsignedLongToValueId); auto values = ad_utility::makeExecutionTree( qec, std::move(testTable), variables, false); diff --git a/test/HttpTest.cpp b/test/HttpTest.cpp index 05e3e7cf7b..12e5233c7e 100644 --- a/test/HttpTest.cpp +++ b/test/HttpTest.cpp @@ -22,7 +22,7 @@ namespace { /// Join all of the bytes into a big string. std::string toString(cppcoro::generator> generator) { std::string result; - for (std::byte byte : generator | std::ranges::views::join) { + for (std::byte byte : generator | ql::ranges::views::join) { result.push_back(static_cast(byte)); } return result; diff --git a/test/IdTableHelpersTest.cpp b/test/IdTableHelpersTest.cpp index b6cbcebbd5..842cc1ea32 100644 --- a/test/IdTableHelpersTest.cpp +++ b/test/IdTableHelpersTest.cpp @@ -11,6 +11,7 @@ #include #include "./util/IdTableHelpers.h" +#include "backports/algorithm.h" #include "engine/idTable/IdTable.h" #include "global/ValueId.h" #include "util/Algorithm.h" @@ -29,30 +30,30 @@ elements will not be ignored. @param setToCalculateFor The container to calculate all sub-sets for. Will only be read. */ -template >> -std::vector> calculateAllSubSets(R&& setToCalculateFor) { +CPP_template(typename R, + typename E = std::iter_value_t>)( + requires ql::ranges::forward_range) + std::vector> calculateAllSubSets(R&& setToCalculateFor) { // Getting rid of duplicated elements. std::vector> calculatedSubSets; // There will be exactly $setToCalculateFor.size()^2$ items added. calculatedSubSets.reserve( - ad_utility::pow(2, std::ranges::size(setToCalculateFor))); + ad_utility::pow(2, ql::ranges::size(setToCalculateFor))); // The empty set is always a sub-set. calculatedSubSets.push_back({}); // Calculate all sub-sets. - std::ranges::for_each( - setToCalculateFor, [&calculatedSubSets](const E& entry) { - ad_utility::appendVector( - calculatedSubSets, - ad_utility::transform(calculatedSubSets, - [&entry](std::vector subSet) { - subSet.push_back(entry); - return subSet; - })); - }); + ql::ranges::for_each(setToCalculateFor, [&calculatedSubSets](const E& entry) { + ad_utility::appendVector( + calculatedSubSets, + ad_utility::transform(calculatedSubSets, + [&entry](std::vector subSet) { + subSet.push_back(entry); + return subSet; + })); + }); return calculatedSubSets; } @@ -64,8 +65,8 @@ TEST(IdTableHelpersHelpersTest, calculateAllSubSets) { std::vector> result{calculateAllSubSets(input)}; // For comparison, we have to sort both vectors. - std::ranges::sort(expectedOutput, std::ranges::lexicographical_compare); - std::ranges::sort(result, std::ranges::lexicographical_compare); + ql::ranges::sort(expectedOutput, ql::ranges::lexicographical_compare); + ql::ranges::sort(result, ql::ranges::lexicographical_compare); ASSERT_EQ(expectedOutput, result); }; @@ -96,8 +97,8 @@ void generalIdTableCheck(const IdTable& table, ASSERT_EQ(table.numColumns(), expectedNumberOfColumns); if (allEntriesWereSet) { - ASSERT_TRUE(std::ranges::all_of(table, [](const auto& row) { - return std::ranges::all_of(row, [](const ValueId& entry) { + ASSERT_TRUE(ql::ranges::all_of(table, [](const auto& row) { + return ql::ranges::all_of(row, [](const ValueId& entry) { return ad_utility::testing::VocabId(0) <= entry && entry <= ad_utility::testing::VocabId(ValueId::maxIndex); }); @@ -136,7 +137,7 @@ TEST(IdTableHelpersTest, createRandomlyFilledIdTableWithoutGenerators) { // Checks, if all entries of are within a given inclusive range. auto checkColumn = [](const IdTable& table, const size_t& columnNumber, const size_t& lowerBound, const size_t& upperBound) { - ASSERT_TRUE(std::ranges::all_of( + ASSERT_TRUE(ql::ranges::all_of( table.getColumn(columnNumber), [&lowerBound, &upperBound](const ValueId& entry) { return ad_utility::testing::VocabId(lowerBound) <= entry && @@ -165,7 +166,7 @@ TEST(IdTableHelpersTest, createRandomlyFilledIdTableWithoutGenerators) { `JoinColumnAndBounds`, in the case of generating tables with 40 rows and 10 columns. */ - std::ranges::for_each( + ql::ranges::for_each( calculateAllSubSets(std::vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), [&checkColumn, &result](const std::vector& joinColumns) { result = createRandomlyFilledIdTable( @@ -177,10 +178,10 @@ TEST(IdTableHelpersTest, createRandomlyFilledIdTableWithoutGenerators) { generalIdTableCheck(result, 40, 10, true); // Are the join columns like we wanted them? - std::ranges::for_each(joinColumns, - [&result, &checkColumn](const size_t& jc) { - checkColumn(result, jc, jc * 10, jc * 10 + 9); - }); + ql::ranges::for_each(joinColumns, + [&result, &checkColumn](const size_t& jc) { + checkColumn(result, jc, jc * 10, jc * 10 + 9); + }); }); } @@ -229,7 +230,7 @@ TEST(IdTableHelpersTest, createRandomlyFilledIdTableWithGenerators) { // Exhaustive test, if the creation of a randomly filled table works, // regardless of the amount of join columns and their position. - std::ranges::for_each( + ql::ranges::for_each( calculateAllSubSets(std::vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), [&createCountUpGenerator, &compareColumnsWithVectors](const std::vector& joinColumns) { @@ -247,7 +248,7 @@ TEST(IdTableHelpersTest, createRandomlyFilledIdTableWithGenerators) { // have the correct content. generalIdTableCheck(resultMultiGenerator, 10, 10, true); generalIdTableCheck(resultSingleGenerator, 10, 10, true); - std::ranges::for_each( + ql::ranges::for_each( joinColumns, [&resultMultiGenerator, &resultSingleGenerator, &joinColumns, &compareColumnsWithVectors](const size_t& num) { @@ -255,7 +256,7 @@ TEST(IdTableHelpersTest, createRandomlyFilledIdTableWithGenerators) { {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}); const size_t indexOfTheColumn = - std::ranges::find(joinColumns, num) - joinColumns.begin(); + ql::ranges::find(joinColumns, num) - joinColumns.begin(); compareColumnsWithVectors( resultSingleGenerator, num, {indexOfTheColumn, indexOfTheColumn + joinColumns.size(), @@ -294,7 +295,7 @@ TEST(IdTableHelpersTest, generateIdTable) { std::vector row(width); // Fill the row. - std::ranges::fill(row, ad_utility::testing::VocabId(i)); + ql::ranges::fill(row, ad_utility::testing::VocabId(i)); i++; return row; @@ -312,7 +313,7 @@ TEST(IdTableHelpersTest, generateIdTable) { std::vector row(i < 3 ? 5 : 20); // Fill the row. - std::ranges::fill(row, ad_utility::testing::VocabId(4)); + ql::ranges::fill(row, ad_utility::testing::VocabId(4)); i++; return row; @@ -322,7 +323,7 @@ TEST(IdTableHelpersTest, generateIdTable) { IdTable table{generateIdTable(5, 5, createCountUpGenerator(5))}; generalIdTableCheck(table, 5, 5, true); for (size_t row = 0; row < 5; row++) { - ASSERT_TRUE(std::ranges::all_of(table[row], [&row](const auto& entry) { + ASSERT_TRUE(ql::ranges::all_of(table[row], [&row](const auto& entry) { return entry == ad_utility::testing::VocabId(row); })); } @@ -337,7 +338,7 @@ TEST(IdTableHelpersTest, randomSeed) { constexpr size_t NUM_ROWS = 100; constexpr size_t NUM_COLUMNS = 200; - std::ranges::for_each( + ql::ranges::for_each( createArrayOfRandomSeeds<5>(), [](const ad_utility::RandomSeed seed) { // Simply generate and compare. ASSERT_EQ( diff --git a/test/IdTableTest.cpp b/test/IdTableTest.cpp index 483f3bcb92..441048a54e 100644 --- a/test/IdTableTest.cpp +++ b/test/IdTableTest.cpp @@ -164,7 +164,7 @@ TEST(IdTable, rowIterators) { ASSERT_FALSE( std::is_sorted(std::as_const(row).begin(), std::as_const(row).end())); - std::ranges::sort(row.begin(), row.end()); + ql::ranges::sort(row.begin(), row.end()); ASSERT_EQ(-1, row[0]); ASSERT_EQ(0, row[1]); ASSERT_EQ(1, row[2]); @@ -203,8 +203,8 @@ TEST(IdTable, rowIterators) { std::sort(std::move(row).begin(), std::move(row).end()); // The following calls all would not compile: // std::sort(row.begin(), row.end()); - // std::ranges::sort(row); - // std::ranges::sort(std::move(row)); + // ql::ranges::sort(row); + // ql::ranges::sort(std::move(row)); ASSERT_EQ(-1, row[0]); ASSERT_EQ(0, row[1]); ASSERT_EQ(1, row[2]); @@ -570,8 +570,7 @@ TEST(IdTable, sortTest) { // Now try the actual sort test = orig.clone(); - std::ranges::sort(test, std::less<>{}, - [](const auto& row) { return row[0]; }); + ql::ranges::sort(test, std::less<>{}, [](const auto& row) { return row[0]; }); // The sorted order of the orig tables should be: // 3, 2, 0, 4, 5, 1 @@ -1100,8 +1099,8 @@ TEST(IdTable, shrinkToFit) { TEST(IdTable, staticAsserts) { static_assert(std::is_trivially_copyable_v::iterator>); static_assert(std::is_trivially_copyable_v::const_iterator>); - static_assert(std::ranges::random_access_range); - static_assert(std::ranges::random_access_range>); + static_assert(ql::ranges::random_access_range); + static_assert(ql::ranges::random_access_range>); } TEST(IdTable, constructorsAreSfinaeFriendly) { diff --git a/test/JoinAlgorithmsTest.cpp b/test/JoinAlgorithmsTest.cpp index 1829de1519..6996191e59 100644 --- a/test/JoinAlgorithmsTest.cpp +++ b/test/JoinAlgorithmsTest.cpp @@ -72,7 +72,7 @@ void testJoin(const NestedBlock& a, const NestedBlock& b, JoinResult expected, zipperJoinForBlocksWithoutUndef(a, b, compare, adder); } // The result must be sorted on the first column - EXPECT_TRUE(std::ranges::is_sorted(result, std::less<>{}, ad_utility::first)); + EXPECT_TRUE(ql::ranges::is_sorted(result, std::less<>{}, ad_utility::first)); // The exact order of the elements with the same first column is not important // and depends on implementation details. We therefore do not enforce it here. EXPECT_THAT(result, ::testing::UnorderedElementsAreArray(expected)); @@ -89,7 +89,7 @@ void testJoin(const NestedBlock& a, const NestedBlock& b, JoinResult expected, auto adder = makeRowAdder(result); zipperJoinForBlocksWithoutUndef(b, a, compare, adder); EXPECT_TRUE( - std::ranges::is_sorted(result, std::less<>{}, ad_utility::first)); + ql::ranges::is_sorted(result, std::less<>{}, ad_utility::first)); EXPECT_THAT(result, ::testing::UnorderedElementsAreArray(expected)); } } diff --git a/test/JoinTest.cpp b/test/JoinTest.cpp index d5a0117078..6ab090214f 100644 --- a/test/JoinTest.cpp +++ b/test/JoinTest.cpp @@ -97,14 +97,14 @@ void runTestCasesForAllJoinAlgorithms( // For sorting IdTableAndJoinColumn by their join column. auto sortByJoinColumn = [](IdTableAndJoinColumn& idTableAndJC) { - std::ranges::sort(idTableAndJC.idTable, {}, - [&idTableAndJC](const auto& row) { - return row[idTableAndJC.joinColumn]; - }); + ql::ranges::sort(idTableAndJC.idTable, {}, + [&idTableAndJC](const auto& row) { + return row[idTableAndJC.joinColumn]; + }); }; // Random shuffle both tables, run hashJoin, check result. - std::ranges::for_each(testSet, [](JoinTestCase& testCase) { + ql::ranges::for_each(testSet, [](JoinTestCase& testCase) { randomShuffle(testCase.leftInput.idTable.begin(), testCase.leftInput.idTable.end()); randomShuffle(testCase.rightInput.idTable.begin(), @@ -115,7 +115,7 @@ void runTestCasesForAllJoinAlgorithms( // Sort the larger table by join column, run hashJoin, check result (this time // it's sorted). - std::ranges::for_each(testSet, [&sortByJoinColumn](JoinTestCase& testCase) { + ql::ranges::for_each(testSet, [&sortByJoinColumn](JoinTestCase& testCase) { IdTableAndJoinColumn& largerInputTable = (testCase.leftInput.idTable.size() >= testCase.rightInput.idTable.size()) @@ -128,7 +128,7 @@ void runTestCasesForAllJoinAlgorithms( // Sort both tables, run merge join and hash join, check result. (Which has to // be sorted.) - std::ranges::for_each(testSet, [&sortByJoinColumn](JoinTestCase& testCase) { + ql::ranges::for_each(testSet, [&sortByJoinColumn](JoinTestCase& testCase) { sortByJoinColumn(testCase.leftInput); sortByJoinColumn(testCase.rightInput); testCase.resultMustBeSortedByJoinColumn = true; @@ -213,7 +213,7 @@ std::vector createJoinTestSet() { IdTable createIdTableOfSizeWithValue(size_t size, Id value) { IdTable idTable{1, ad_utility::testing::makeAllocator()}; idTable.resize(size); - std::ranges::fill(idTable.getColumn(0), value); + ql::ranges::fill(idTable.getColumn(0), value); return idTable; } } // namespace diff --git a/test/LocalVocabTest.cpp b/test/LocalVocabTest.cpp index c192a8b35f..369b61f713 100644 --- a/test/LocalVocabTest.cpp +++ b/test/LocalVocabTest.cpp @@ -187,7 +187,7 @@ TEST(LocalVocab, merge) { auto id2 = vocE.getBlankNodeIndex(&bnm); auto vocabs3 = std::vector{&std::as_const(localVocabMerged2), &std::as_const(vocF)}; - vocE.mergeWith(vocabs3 | std::views::transform( + vocE.mergeWith(vocabs3 | ql::views::transform( [](const LocalVocab* l) -> const LocalVocab& { return *l; })); @@ -219,8 +219,8 @@ TEST(LocalVocab, propagation) { return LiteralOrIri::literalWithoutQuotes(word); } }; - std::ranges::transform(expectedWordsAsStrings, - std::back_inserter(expectedWords), toLitOrIri); + ql::ranges::transform(expectedWordsAsStrings, + std::back_inserter(expectedWords), toLitOrIri); std::shared_ptr resultTable = operation.getResult(); ASSERT_TRUE(resultTable) << "Operation: " << operation.getDescriptor() << std::endl; @@ -228,7 +228,7 @@ TEST(LocalVocab, propagation) { resultTable->localVocab().getAllWordsForTesting(); // We currently allow the local vocab to have multiple IDs for the same // word, so we have to deduplicate first. - std::ranges::sort(localVocabWords); + ql::ranges::sort(localVocabWords); localVocabWords.erase(std::ranges::unique(localVocabWords).begin(), localVocabWords.end()); ASSERT_THAT(localVocabWords, diff --git a/test/LocatedTriplesTest.cpp b/test/LocatedTriplesTest.cpp index 4a46644aff..9fb9933ef5 100644 --- a/test/LocatedTriplesTest.cpp +++ b/test/LocatedTriplesTest.cpp @@ -20,7 +20,7 @@ int g = 123948; void addGraphColumn(IdTable& block) { block.addEmptyColumn(); - std::ranges::fill(block.getColumn(block.numColumns() - 1), V(g)); + ql::ranges::fill(block.getColumn(block.numColumns() - 1), V(g)); } auto IT = [](const auto& c1, const auto& c2, const auto& c3, int graph = g) { diff --git a/test/MemorySizeTest.cpp b/test/MemorySizeTest.cpp index 22b2c614c7..cde91e5c35 100644 --- a/test/MemorySizeTest.cpp +++ b/test/MemorySizeTest.cpp @@ -207,7 +207,7 @@ TEST(MemorySize, AsString) { ASSERT_STREQ(stream.str().c_str(), testCase.stringRepresentation_.data()); }; - std::ranges::for_each(generalAsStringTestCases(), doTest); + ql::ranges::for_each(generalAsStringTestCases(), doTest); // Check, if it always uses the right unit. doTest({99'999_B, "99999 B"}); @@ -232,7 +232,7 @@ TEST(MemorySize, Parse) { }; // General testing. - std::ranges::for_each(generalAsStringTestCases(), doTest); + ql::ranges::for_each(generalAsStringTestCases(), doTest); // Does `Byte` only work with whole, positive numbers? doExceptionTest("-46 B"); @@ -240,21 +240,21 @@ TEST(MemorySize, Parse) { doExceptionTest("-4.2 B"); // Nothing should work with negative numbers. - std::ranges::for_each(generalAsStringTestCases(), doExceptionTest, - [](const MemorySizeAndStringRepresentation& testCase) { - return absl::StrCat("-", - testCase.stringRepresentation_); - }); + ql::ranges::for_each(generalAsStringTestCases(), doExceptionTest, + [](const MemorySizeAndStringRepresentation& testCase) { + return absl::StrCat("-", + testCase.stringRepresentation_); + }); // Byte sizes can only be set with `B`. - std::ranges::for_each(std::vector{"42 BYTE", "42 BYTe", "42 BYtE", "42 BYte", - "42 ByTE", "42 ByTe", "42 BytE", "42 Byte", - "42 bYTE", "42 bYTe", "42 bYtE", "42 bYte", - "42 byTE", "42 byTe", "42 bytE", "42 byte"}, - doExceptionTest); + ql::ranges::for_each(std::vector{"42 BYTE", "42 BYTe", "42 BYtE", "42 BYte", + "42 ByTE", "42 ByTe", "42 BytE", "42 Byte", + "42 bYTE", "42 bYTe", "42 bYtE", "42 bYte", + "42 byTE", "42 byTe", "42 bytE", "42 byte"}, + doExceptionTest); // Is our grammar truly case insensitive? - std::ranges::for_each( + ql::ranges::for_each( std::vector{{42_B, "42 B"}, {42_B, "42 b"}, {42_kB, "42 KB"}, @@ -277,7 +277,7 @@ TEST(MemorySize, Parse) { // Does our short hand (memory unit without the `B` at the end) work? And is // it case insensitive? - std::ranges::for_each( + ql::ranges::for_each( std::vector{{42_kB, "42 K"}, {42_kB, "42 k"}, {42_MB, "42 M"}, @@ -289,7 +289,7 @@ TEST(MemorySize, Parse) { doTest); // Check if whitespace between unit and amount is truly optional - std::ranges::for_each( + ql::ranges::for_each( std::vector{{42_B, "42B"}, {42_B, "42b"}, {42_kB, "42KB"}, @@ -310,7 +310,7 @@ TEST(MemorySize, Parse) { {42_TB, "42tb"}}, doTest); - std::ranges::for_each( + ql::ranges::for_each( std::vector{{42_kB, "42K"}, {42_kB, "42k"}, {42_MB, "42M"}, @@ -322,13 +322,13 @@ TEST(MemorySize, Parse) { doTest); // Test if multiple spaces are fine too - std::ranges::for_each( + ql::ranges::for_each( std::vector{{42_kB, "42 K"}, {42_kB, "42 k"}}, doTest); // We only take memory units up to `TB`. Not further. - std::ranges::for_each(std::vector{"42 P", "42 PB"}, doExceptionTest); + ql::ranges::for_each(std::vector{"42 P", "42 PB"}, doExceptionTest); } TEST(MemorySize, ArithmeticOperators) { diff --git a/test/OrderByTest.cpp b/test/OrderByTest.cpp index cef7d1f1e4..046f55d1f6 100644 --- a/test/OrderByTest.cpp +++ b/test/OrderByTest.cpp @@ -61,9 +61,9 @@ void testOrderBy(IdTable input, const IdTable& expected, // Apply the current permutation of the `sortColumns` to `expected` and // `input`. for (size_t i = 0; i < sortColumns.size(); ++i) { - std::ranges::copy(input.getColumn(i), - permutedInput.getColumn(sortColumns[i].first).begin()); - std::ranges::copy( + ql::ranges::copy(input.getColumn(i), + permutedInput.getColumn(sortColumns[i].first).begin()); + ql::ranges::copy( expected.getColumn(i), permutedExpected.getColumn(sortColumns[i].first).begin()); // Also put the information which columns are descending into the correct @@ -192,7 +192,7 @@ TEST(OrderBy, mixedDatatypes) { testOrderBy(makeIdTableFromVector(input), makeIdTableFromVector(expected), {false}); - std::ranges::reverse(expected); + ql::ranges::reverse(expected); testOrderBy(makeIdTableFromVector(input), makeIdTableFromVector(expected), {true}); } diff --git a/test/ParallelMultiwayMergeTest.cpp b/test/ParallelMultiwayMergeTest.cpp index 95c416ef90..2174a9ec35 100644 --- a/test/ParallelMultiwayMergeTest.cpp +++ b/test/ParallelMultiwayMergeTest.cpp @@ -15,10 +15,10 @@ using namespace ad_utility::memory_literals; // Join a range of ranges into a single vector, e.g. `array> // -> vector`. auto join = [](Range&& range) { - std::vector>> + std::vector>> result; - auto view = std::views::join(ad_utility::OwningView{AD_FWD(range)}); - std::ranges::copy(view, std::back_inserter(result)); + auto view = ql::views::join(ad_utility::OwningView{AD_FWD(range)}); + ql::ranges::copy(view, std::back_inserter(result)); return result; }; @@ -34,22 +34,22 @@ void testRandomInts() { numRowsGen = ad_utility::SlowRandomIntGenerator( minVecSize, maxVecSize)]() mutable { std::vector res(numRowsGen()); - std::ranges::generate(res, gen); - std::ranges::sort(res); + ql::ranges::generate(res, gen); + ql::ranges::sort(res); return res; }; std::vector> input(numVecs); - std::ranges::generate(input, generateRandomVec); + ql::ranges::generate(input, generateRandomVec); auto expected = join(std::vector>{input}); - std::ranges::sort(expected); + ql::ranges::sort(expected); std::vector result; - std::ranges::copy(std::views::join(ad_utility::OwningView{ - ad_utility::parallelMultiwayMerge( - 1_GB, input, std::less<>{}, blocksize)}), - std::back_inserter(result)); + ql::ranges::copy(ql::views::join(ad_utility::OwningView{ + ad_utility::parallelMultiwayMerge( + 1_GB, input, std::less<>{}, blocksize)}), + std::back_inserter(result)); EXPECT_THAT(result, ::testing::ElementsAreArray(expected)); } diff --git a/test/PrefilterExpressionIndexTest.cpp b/test/PrefilterExpressionIndexTest.cpp index 2b31304d4f..5eeb1baaca 100644 --- a/test/PrefilterExpressionIndexTest.cpp +++ b/test/PrefilterExpressionIndexTest.cpp @@ -218,7 +218,7 @@ class PrefilterExpressionOnMetadataTest : public ::testing::Test { std::vector expectedAdjusted; // This is for convenience, we automatically insert all mixed and possibly // incomplete blocks which must be always returned. - std::ranges::set_union( + ql::ranges::set_union( expected, useBlocksIncomplete ? mixedAndIncompleteBlocks : mixedBlocks, std::back_inserter(expectedAdjusted), [](const BlockMetadata& b1, const BlockMetadata& b2) { diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index 48fb419571..ea22a00431 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -357,8 +357,8 @@ static constexpr auto GroupBy = // TODO Also test the aliases. auto aliasesToStrings = [](const std::vector& aliases) { std::vector result; - std::ranges::transform(aliases, std::back_inserter(result), - &Alias::getDescriptor); + ql::ranges::transform(aliases, std::back_inserter(result), + &Alias::getDescriptor); return result; }; diff --git a/test/RandomTest.cpp b/test/RandomTest.cpp index fdcae374e8..034d7ac4c1 100644 --- a/test/RandomTest.cpp +++ b/test/RandomTest.cpp @@ -51,7 +51,7 @@ void testSeed( // For every seed test, if the random number generators return the same // numbers. - std::ranges::for_each( + ql::ranges::for_each( createArrayOfRandomSeeds(), [&randomNumberGeneratorFactory](const RandomSeed seed) { // What type of generator does the factory create? @@ -62,7 +62,7 @@ void testSeed( // The generators, that should create the same numbers. std::array generators; - std::ranges::generate( + ql::ranges::generate( generators, [&randomNumberGeneratorFactory, &seed]() { return std::invoke(randomNumberGeneratorFactory, seed); }); @@ -71,10 +71,10 @@ void testSeed( for (size_t numCall = 0; numCall < NUM_RANDOM_NUMBER; numCall++) { const NumberType expectedNumber = std::invoke(generators.front()); - std::ranges::for_each(std::views::drop(generators, 1), - [&expectedNumber](GeneratorType& g) { - ASSERT_EQ(std::invoke(g), expectedNumber); - }); + ql::ranges::for_each(ql::views::drop(generators, 1), + [&expectedNumber](GeneratorType& g) { + ASSERT_EQ(std::invoke(g), expectedNumber); + }); } }); } @@ -115,8 +115,8 @@ void testSeedWithRange( // For generating better messages, when failing a test. auto trace{generateLocationTrace(l, "testSeedWithRange")}; - std::ranges::for_each(ranges, [&randomNumberGeneratorFactory]( - const NumericalRange& r) { + ql::ranges::for_each(ranges, [&randomNumberGeneratorFactory]( + const NumericalRange& r) { testSeed([&r, &randomNumberGeneratorFactory](RandomSeed seed) { return std::invoke(randomNumberGeneratorFactory, r.minimum_, r.maximum_, seed); @@ -147,7 +147,7 @@ void testRange( constexpr size_t NUM_RANDOM_NUMBER = 500; static_assert(NUM_RANDOM_NUMBER > 1); - std::ranges::for_each(ranges, [](const NumericalRange& r) { + ql::ranges::for_each(ranges, [](const NumericalRange& r) { Generator generator(r.minimum_, r.maximum_); const auto& generatedNumber = std::invoke(generator); ASSERT_LE(generatedNumber, r.maximum_); @@ -244,25 +244,24 @@ TEST(RandomShuffleTest, Seed) { For every random seed test, if the shuffled array is the same, if given identical input and seed. */ - std::ranges::for_each( + ql::ranges::for_each( createArrayOfRandomSeeds(), [](const RandomSeed seed) { std::array, NUM_SHUFFLED_ARRAY> inputArrays{}; // Fill the first input array with random values, then copy it into the // other 'slots'. - std::ranges::generate(inputArrays.front(), - FastRandomIntGenerator{}); - std::ranges::fill(std::views::drop(inputArrays, 1), - inputArrays.front()); + ql::ranges::generate(inputArrays.front(), + FastRandomIntGenerator{}); + ql::ranges::fill(ql::views::drop(inputArrays, 1), inputArrays.front()); // Shuffle and compare, if they are all the same. - std::ranges::for_each( + ql::ranges::for_each( inputArrays, [&seed](std::array& inputArray) { randomShuffle(inputArray.begin(), inputArray.end(), seed); }); - std::ranges::for_each( - std::views::drop(inputArrays, 1), + ql::ranges::for_each( + ql::views::drop(inputArrays, 1), [&inputArrays](const std::array& inputArray) { ASSERT_EQ(inputArrays.front(), inputArray); }); diff --git a/test/RdfParserTest.cpp b/test/RdfParserTest.cpp index 6969961003..bbde3375a5 100644 --- a/test/RdfParserTest.cpp +++ b/test/RdfParserTest.cpp @@ -1132,7 +1132,7 @@ TEST(RdfParserTest, multifileParser) { Parser p{specs}; std::vector result; while (auto batch = p.getBatch()) { - std::ranges::copy(batch.value(), std::back_inserter(result)); + ql::ranges::copy(batch.value(), std::back_inserter(result)); } EXPECT_THAT(result, ::testing::UnorderedElementsAreArray(expected)); }; diff --git a/test/RelationalExpressionTest.cpp b/test/RelationalExpressionTest.cpp index 7c53af843b..50e369109f 100644 --- a/test/RelationalExpressionTest.cpp +++ b/test/RelationalExpressionTest.cpp @@ -169,7 +169,7 @@ auto expectUndefined = [](const SparqlExpression& expression, AD_CORRECTNESS_CHECK( (std::holds_alternative>(result))); const auto& vec = std::get>(result); - EXPECT_TRUE(std::ranges::all_of( + EXPECT_TRUE(ql::ranges::all_of( vec, [](Id id) { return id == Id::makeUndefined(); })); } }; @@ -679,8 +679,8 @@ void testWithExplicitResult(auto leftValue, auto rightValue, source_location l = source_location::current()) { auto t = generateLocationTrace(l); std::vector expected; - std::ranges::transform(expectedAsBool, std::back_inserter(expected), - Id::makeFromBool); + ql::ranges::transform(expectedAsBool, std::back_inserter(expected), + Id::makeFromBool); testWithExplicitIdResult(std::move(leftValue), std::move(rightValue), expected); diff --git a/test/ResultTableColumnOperationsTest.cpp b/test/ResultTableColumnOperationsTest.cpp index a04ac895ff..9fc246c548 100644 --- a/test/ResultTableColumnOperationsTest.cpp +++ b/test/ResultTableColumnOperationsTest.cpp @@ -317,7 +317,7 @@ TEST(ResultTableColumnOperations, calculateSpeedupOfColumn) { }; // Test things for a range of speedups. - std::ranges::for_each( + ql::ranges::for_each( std::array{2.f, 16.f, 73.696f, 4.2f}, [&fillColumnsForSpeedup](const float wantedSpeedup, ad_utility::source_location l = diff --git a/test/SortTest.cpp b/test/SortTest.cpp index 9461576a45..ab51138815 100644 --- a/test/SortTest.cpp +++ b/test/SortTest.cpp @@ -54,10 +54,10 @@ void testSort(IdTable input, const IdTable& expected, // Apply the current permutation of the `sortColumns` to `expected` and // `input`. for (size_t i = 0; i < sortColumns.size(); ++i) { - std::ranges::copy(input.getColumn(sortColumns[i]), - permutedInput.getColumn(i).begin()); - std::ranges::copy(expected.getColumn(sortColumns[i]), - permutedExpected.getColumn(i).begin()); + ql::ranges::copy(input.getColumn(sortColumns[i]), + permutedInput.getColumn(i).begin()); + ql::ranges::copy(expected.getColumn(sortColumns[i]), + permutedExpected.getColumn(i).begin()); } for (size_t i = 0; i < 5; ++i) { diff --git a/test/StringUtilsTest.cpp b/test/StringUtilsTest.cpp index e448aff7e1..b7c26fd5e6 100644 --- a/test/StringUtilsTest.cpp +++ b/test/StringUtilsTest.cpp @@ -17,6 +17,7 @@ #include "util/Forward.h" #include "util/Generator.h" #include "util/StringUtils.h" +#include "util/StringUtilsImpl.h" using ad_utility::constantTimeEquals; using ad_utility::constexprStrCat; @@ -109,22 +110,22 @@ TEST(StringUtilsTest, listToString) { multiValueVector, " -> "); /* - `std::ranges::views` can cause dangling pointers, if a `std::identity` is + `ql::ranges::views` can cause dangling pointers, if a `std::identity` is called with one, that returns r-values. */ /* - TODO Do a test, where the `std::views::transform` uses an r-value vector, + TODO Do a test, where the `ql::views::transform` uses an r-value vector, once we no longer support `gcc-11`. The compiler has a bug, where it doesn't allow that code, even though it's correct. */ - auto plus10View = std::views::transform( + auto plus10View = ql::views::transform( multiValueVector, [](const int& num) -> int { return num + 10; }); doTestForAllOverloads("50,51,52,53", plus10View, plus10View, ","); - auto identityView = std::views::transform(multiValueVector, std::identity{}); + auto identityView = ql::views::transform(multiValueVector, std::identity{}); doTestForAllOverloads("40,41,42,43", identityView, identityView, ","); - // Test, that uses an actual `std::ranges::input_range`. That is, a range who + // Test, that uses an actual `ql::ranges::input_range`. That is, a range who // doesn't know it's own size and can only be iterated once. // Returns the content of a given vector, element by element. diff --git a/test/ThreadSafeQueueTest.cpp b/test/ThreadSafeQueueTest.cpp index 055c01f4db..2588ce5551 100644 --- a/test/ThreadSafeQueueTest.cpp +++ b/test/ThreadSafeQueueTest.cpp @@ -146,7 +146,7 @@ TEST(ThreadSafeQueue, Concurrency) { // order, for the `ThreadSafeQueue` the order is unspecified and we only // check the content. if (ad_utility::isInstantiation) { - std::ranges::sort(result); + ql::ranges::sort(result); } EXPECT_THAT(result, ::testing::ElementsAreArray( std::views::iota(0UL, numValues * numThreads))); @@ -253,13 +253,13 @@ TEST(ThreadSafeQueue, DisablePush) { if (ad_utility::similarToInstantiation) { // When terminating early, we cannot actually say much about the result, // other than that it contains no duplicate values - std::ranges::sort(result); + ql::ranges::sort(result); EXPECT_TRUE(std::unique(result.begin(), result.end()) == result.end()); } else { // For the ordered queue we have the guarantee that all the pushed values // were in order. EXPECT_THAT(result, - ::testing::ElementsAreArray(std::views::iota(0U, 400U))); + ::testing::ElementsAreArray(ql::views::iota(0U, 400U))); } }; runWithBothQueueTypes(runTest); @@ -309,7 +309,7 @@ TEST(ThreadSafeQueue, SafeExceptionHandling) { // 1. Queue, 2. WorkerThreads, 3. `Cleanup` that finishes the queue. absl::Cleanup cleanup{[&queue] { queue.finish(); }}; - for ([[maybe_unused]] auto i : std::views::iota(0u, numValues)) { + for ([[maybe_unused]] auto i : ql::views::iota(0u, numValues)) { auto opt = queue.pop(); if (!opt) { return; @@ -400,7 +400,7 @@ TEST(ThreadSafeQueue, queueManager) { // order, for the `ThreadSafeQueue` the order is unspecified and we only // check the content. if (ad_utility::isInstantiation) { - std::ranges::sort(result); + ql::ranges::sort(result); } EXPECT_THAT(result, ::testing::ElementsAreArray( std::views::iota(0UL, numValues))); diff --git a/test/ViewsTest.cpp b/test/ViewsTest.cpp index addf37af87..24fc744afb 100644 --- a/test/ViewsTest.cpp +++ b/test/ViewsTest.cpp @@ -107,7 +107,7 @@ TEST(Views, uniqueBlockView) { i = nextI; } - auto unique = std::views::join( + auto unique = ql::views::join( ad_utility::OwningView{ad_utility::uniqueBlockView(inputs)}); std::vector result; for (const auto& element : unique) { @@ -124,14 +124,14 @@ TEST(Views, uniqueBlockView) { TEST(Views, owningView) { using namespace ad_utility; // Static asserts for the desired concepts. - static_assert(std::ranges::input_range>>); + static_assert(ql::ranges::input_range>>); static_assert( - !std::ranges::forward_range>>); - static_assert(std::ranges::random_access_range>>); + !ql::ranges::forward_range>>); + static_assert(ql::ranges::random_access_range>>); auto toVec = [](auto& range) { std::vector result; - std::ranges::copy(range, std::back_inserter(result)); + ql::ranges::copy(range, std::back_inserter(result)); return result; }; @@ -166,7 +166,7 @@ TEST(Views, integerRange) { } std::vector actual; - std::ranges::copy(ad_utility::integerRange(42u), std::back_inserter(actual)); + ql::ranges::copy(ad_utility::integerRange(42u), std::back_inserter(actual)); ASSERT_THAT(actual, ::testing::ElementsAreArray(expected)); } @@ -202,7 +202,7 @@ std::string_view toView(std::span span) { TEST(Views, verifyLineByLineWorksWithMinimalChunks) { auto range = std::string_view{"\nabc\ndefghij\n"} | - std::views::transform([](char c) { return std::ranges::single_view(c); }); + ql::views::transform([](char c) { return ql::ranges::single_view(c); }); auto lineByLineGenerator = ad_utility::reChunkAtSeparator(std::move(range), '\n'); @@ -224,8 +224,8 @@ TEST(Views, verifyLineByLineWorksWithMinimalChunks) { // __________________________________________________________________________ TEST(Views, verifyLineByLineWorksWithNoTrailingNewline) { - auto range = std::string_view{"abc"} | std::views::transform([](char c) { - return std::ranges::single_view(c); + auto range = std::string_view{"abc"} | ql::views::transform([](char c) { + return ql::ranges::single_view(c); }); auto lineByLineGenerator = diff --git a/test/backports/CMakeLists.txt b/test/backports/CMakeLists.txt new file mode 100644 index 0000000000..3e3f2e9865 --- /dev/null +++ b/test/backports/CMakeLists.txt @@ -0,0 +1,6 @@ + +add_executable(AlgorithmBackportTests algorithmTest.cpp) +add_executable(DebugJoinView DebugJoinView.cpp) +qlever_target_link_libraries(DebugJoinView) +target_link_libraries(AlgorithmBackportTests GTest::gtest GTest::gmock_main) +gtest_discover_tests(AlgorithmBackportTests AlgorithmBackportTests) \ No newline at end of file diff --git a/test/backports/DebugJoinView.cpp b/test/backports/DebugJoinView.cpp new file mode 100644 index 0000000000..ff372cc7fa --- /dev/null +++ b/test/backports/DebugJoinView.cpp @@ -0,0 +1,38 @@ +// +// Created by kalmbacj on 12/10/24. +// + +#include + +#include "engine/idTable/IdTable.h" +#include "util/Generator.h" +#include "util/Views.h" + +cppcoro::generator> inner() { return {}; } + +auto joinOwning() { return ql::views::join(ad_utility::OwningView{inner()}); } + +/* +auto joinOwning() { + return +ql::views::join(ad_utility::OwningView{std::vector>{}}); +} +*/ + +auto vec() { + std::vector vec; + vec.push_back(joinOwning()); + return vec; +} + +auto joinOuter() { + // return ad_utility::OwningView{vec()}; + // return ql::views::join(ad_utility::OwningView{vec()}); + return ql::views::join(ad_utility::OwningViewNoConst{vec()}); +} + +int main() { + auto view = joinOuter(); + [[maybe_unused]] auto it = view.begin(); +} diff --git a/test/backports/algorithmTest.cpp b/test/backports/algorithmTest.cpp new file mode 100644 index 0000000000..0e213c192d --- /dev/null +++ b/test/backports/algorithmTest.cpp @@ -0,0 +1,9 @@ +// +// Created by kalmbacj on 12/6/24. +// + +#include + +#include "backports/algorithm.h" + +TEST(Range, Sort) {} diff --git a/test/engine/BindTest.cpp b/test/engine/BindTest.cpp index cecec3c4c7..34ef0eb370 100644 --- a/test/engine/BindTest.cpp +++ b/test/engine/BindTest.cpp @@ -95,7 +95,7 @@ TEST( auto* qec = ad_utility::testing::getQec(); IdTable table{1, ad_utility::makeUnlimitedAllocator()}; table.resize(Bind::CHUNK_SIZE + 1); - std::ranges::fill(table, row); + ql::ranges::fill(table, row); auto valuesTree = ad_utility::makeExecutionTree( qec, table.clone(), Vars{Variable{"?a"}}, false, std::vector{}, LocalVocab{}, std::nullopt, true); @@ -109,7 +109,7 @@ TEST( row = IdTable::row_type{2}; row[0] = val; row[1] = val; - std::ranges::fill(table, row); + ql::ranges::fill(table, row); { qec->getQueryTreeCache().clearAll(); auto result = bind.getResult(false, ComputationMode::FULLY_MATERIALIZED); diff --git a/test/engine/CartesianProductJoinTest.cpp b/test/engine/CartesianProductJoinTest.cpp index 90488dca5c..8727aa223a 100644 --- a/test/engine/CartesianProductJoinTest.cpp +++ b/test/engine/CartesianProductJoinTest.cpp @@ -406,8 +406,8 @@ class CartesianProductJoinLazyTest // `start` to `end` wrapped as Ids. static void fillColumn(IdTable& table, size_t column, int64_t start, int64_t end) { - std::ranges::copy( - std::views::iota(start, end) | std::views::transform(Id::makeFromInt), + ql::ranges::copy( + ql::views::iota(start, end) | ql::views::transform(Id::makeFromInt), table.getColumn(column).begin()); } }; @@ -476,8 +476,8 @@ TEST_P(CartesianProductJoinLazyTest, leftTableBiggerThanChunk) { bigTable.addEmptyColumn(); bigTable.addEmptyColumn(); auto fillWithVocabValue = [&bigTable](size_t column, uint64_t vocabIndex) { - std::ranges::fill(bigTable.getColumn(column), - Id::makeFromVocabIndex(VocabIndex::make(vocabIndex))); + ql::ranges::fill(bigTable.getColumn(column), + Id::makeFromVocabIndex(VocabIndex::make(vocabIndex))); }; fillWithVocabValue(3, 100); diff --git a/test/engine/DistinctTest.cpp b/test/engine/DistinctTest.cpp index 0b66ca9748..c20d0ba5c6 100644 --- a/test/engine/DistinctTest.cpp +++ b/test/engine/DistinctTest.cpp @@ -74,7 +74,7 @@ TEST(Distinct, testChunkEdgeCases) { { input.resize(1); row[0] = Id::makeFromInt(0); - std::ranges::fill(input, row); + ql::ranges::fill(input, row); IdTable result = distinct.outOfPlaceDistinct<1>(input); ASSERT_EQ(makeIdTableFromVector({{0}}, &Id::makeFromInt), result); @@ -83,7 +83,7 @@ TEST(Distinct, testChunkEdgeCases) { { input.resize(Distinct::CHUNK_SIZE + 1); row[0] = Id::makeFromInt(0); - std::ranges::fill(input, row); + ql::ranges::fill(input, row); IdTable result = distinct.outOfPlaceDistinct<1>(input); ASSERT_EQ(makeIdTableFromVector({{0}}, &Id::makeFromInt), result); @@ -92,7 +92,7 @@ TEST(Distinct, testChunkEdgeCases) { { input.resize(Distinct::CHUNK_SIZE + 1); row[0] = Id::makeFromInt(0); - std::ranges::fill(input, row); + ql::ranges::fill(input, row); input.at(Distinct::CHUNK_SIZE, 0) = Id::makeFromInt(1); IdTable result = distinct.outOfPlaceDistinct<1>(input); @@ -102,7 +102,7 @@ TEST(Distinct, testChunkEdgeCases) { { input.resize(2 * Distinct::CHUNK_SIZE); row[0] = Id::makeFromInt(0); - std::ranges::fill(input, row); + ql::ranges::fill(input, row); IdTable result = distinct.outOfPlaceDistinct<1>(input); ASSERT_EQ(makeIdTableFromVector({{0}}, &Id::makeFromInt), result); @@ -111,7 +111,7 @@ TEST(Distinct, testChunkEdgeCases) { { input.resize(2 * Distinct::CHUNK_SIZE + 2); row[0] = Id::makeFromInt(0); - std::ranges::fill(input, row); + ql::ranges::fill(input, row); input.at(2 * Distinct::CHUNK_SIZE + 1, 0) = Id::makeFromInt(1); IdTable result = distinct.outOfPlaceDistinct<1>(input); diff --git a/test/engine/IndexScanTest.cpp b/test/engine/IndexScanTest.cpp index 6663beaf3f..2c526787a3 100644 --- a/test/engine/IndexScanTest.cpp +++ b/test/engine/IndexScanTest.cpp @@ -64,7 +64,7 @@ void testLazyScan(Permutation::IdTableGenerator partialLazyScanResult, if (limitOffset.isUnconstrained()) { for (auto [lower, upper] : expectedRows) { - for (auto index : std::views::iota(lower, upper)) { + for (auto index : ql::views::iota(lower, upper)) { expected.push_back(resFullScan.at(index)); } } diff --git a/test/engine/ValuesForTesting.h b/test/engine/ValuesForTesting.h index 6009ccd216..c02a9826bc 100644 --- a/test/engine/ValuesForTesting.h +++ b/test/engine/ValuesForTesting.h @@ -63,10 +63,9 @@ class ValuesForTesting : public Operation { resultSortedColumns_{std::move(sortedColumns)}, localVocab_{std::move(localVocab)}, multiplicity_{std::nullopt} { - AD_CONTRACT_CHECK( - std::ranges::all_of(tables_, [this](const IdTable& table) { - return variables_.size() == table.numColumns(); - })); + AD_CONTRACT_CHECK(ql::ranges::all_of(tables_, [this](const IdTable& table) { + return variables_.size() == table.numColumns(); + })); size_t totalRows = 0; for (const IdTable& idTable : tables_) { totalRows += idTable.numRows(); @@ -125,7 +124,7 @@ class ValuesForTesting : public Operation { // ___________________________________________________________________________ string getCacheKeyImpl() const override { std::stringstream str; - auto numRowsView = tables_ | std::views::transform(&IdTable::numRows); + auto numRowsView = tables_ | ql::views::transform(&IdTable::numRows); auto totalNumRows = std::reduce(numRowsView.begin(), numRowsView.end(), 0); auto numCols = tables_.empty() ? 0 : tables_.at(0).numColumns(); str << "Values for testing with " << numCols << " columns and " @@ -177,7 +176,7 @@ class ValuesForTesting : public Operation { vector getChildren() override { return {}; } bool knownEmptyResult() override { - return std::ranges::all_of( + return ql::ranges::all_of( tables_, [](const IdTable& table) { return table.empty(); }); } @@ -189,9 +188,9 @@ class ValuesForTesting : public Operation { continue; } bool containsUndef = - std::ranges::any_of(tables_, [&i](const IdTable& table) { - return std::ranges::any_of(table.getColumn(i), - [](Id id) { return id.isUndefined(); }); + ql::ranges::any_of(tables_, [&i](const IdTable& table) { + return ql::ranges::any_of(table.getColumn(i), + [](Id id) { return id.isUndefined(); }); }); using enum ColumnIndexAndTypeInfo::UndefStatus; m[variables_.at(i).value()] = ColumnIndexAndTypeInfo{ diff --git a/test/engine/idTable/CompressedExternalIdTableTest.cpp b/test/engine/idTable/CompressedExternalIdTableTest.cpp index 49c5f2a23f..a5d4fde240 100644 --- a/test/engine/idTable/CompressedExternalIdTableTest.cpp +++ b/test/engine/idTable/CompressedExternalIdTableTest.cpp @@ -33,10 +33,10 @@ auto idTableFromBlockGenerator = [](auto& generator) -> CopyableIdTable<0> { size_t numColumns = result.numColumns(); size_t size = result.size(); result.resize(result.size() + block.size()); - for (auto i : std::views::iota(0U, numColumns)) { + for (auto i : ql::views::iota(0U, numColumns)) { decltype(auto) blockCol = block.getColumn(i); decltype(auto) resultCol = result.getColumn(i); - std::ranges::copy(blockCol, resultCol.begin() + size); + ql::ranges::copy(blockCol, resultCol.begin() + size); } } return result; @@ -75,8 +75,8 @@ TEST(CompressedExternalIdTable, compressedExternalIdTableWriter) { using namespace ::testing; std::vector> result; - auto tr = std::ranges::transform_view(generators, idTableFromBlockGenerator); - std::ranges::copy(tr, std::back_inserter(result)); + auto tr = ql::ranges::transform_view(generators, idTableFromBlockGenerator); + ql::ranges::copy(tr, std::back_inserter(result)); ASSERT_THAT(result, ElementsAreArray(tables)); } @@ -103,7 +103,7 @@ void testExternalSorterImpl(size_t numDynamicColumns, size_t numRows, writer.push(row); } - std::ranges::sort(randomTable, SortByOSP{}); + ql::ranges::sort(randomTable, SortByOSP{}); if (mergeMultipleTimes) { writer.moveResultOnMerge() = false; @@ -114,7 +114,7 @@ void testExternalSorterImpl(size_t numDynamicColumns, size_t numRows, // number of inputs. auto blocksize = k == 1 ? 1 : 17; using namespace ::testing; - auto generator = k == 0 ? std::views::join(ad_utility::OwningView{ + auto generator = k == 0 ? ql::views::join(ad_utility::OwningView{ writer.getSortedBlocks(blocksize)}) : writer.sortedView(); if (mergeMultipleTimes || k == 0) { diff --git a/test/index/PatternCreatorTest.cpp b/test/index/PatternCreatorTest.cpp index 3cdfa9b790..698b2e1ec7 100644 --- a/test/index/PatternCreatorTest.cpp +++ b/test/index/PatternCreatorTest.cpp @@ -106,7 +106,7 @@ auto createExamplePatterns(PatternCreator& creator) { push({V(3), V(11), V(29)}, false, 0); push({V(3), V(11), V(45)}, false, 0); - std::ranges::sort(expected, SortByOSP{}); + ql::ranges::sort(expected, SortByOSP{}); auto tripleOutputs = std::move(creator).getTripleSorter(); auto& triples = *tripleOutputs.triplesWithSubjectPatternsSortedByOsp_; static constexpr size_t numCols = NumColumnsIndexBuilding + 1; @@ -160,7 +160,7 @@ void assertPatternContents(const std::string& filename, expectedTriples.push_back(std::array{V(0), pat, I(0)}); expectedTriples.push_back(std::array{V(1), pat, I(1)}); expectedTriples.push_back(std::array{V(3), pat, I(0)}); - std::ranges::sort(expectedTriples, SortByPSO{}); + ql::ranges::sort(expectedTriples, SortByPSO{}); EXPECT_THAT(addedTriples, ::testing::ElementsAreArray(expectedTriples)); } diff --git a/test/util/IdTableHelpers.cpp b/test/util/IdTableHelpers.cpp index 0b3b0a6a2e..55ac6209f9 100644 --- a/test/util/IdTableHelpers.cpp +++ b/test/util/IdTableHelpers.cpp @@ -23,16 +23,16 @@ void compareIdTableWithExpectedContent( std::stringstream traceMessage{}; auto writeIdTableToStream = [&traceMessage](const IdTable& idTable) { - std::ranges::for_each(idTable, - [&traceMessage](const auto& row) { - // TODO Use std::views::join_with for both - // loops. - for (size_t i = 0; i < row.numColumns(); i++) { - traceMessage << row[i] << " "; - } - traceMessage << "\n"; - }, - {}); + ql::ranges::for_each(idTable, + [&traceMessage](const auto& row) { + // TODO Use ql::views::join_with for both + // loops. + for (size_t i = 0; i < row.numColumns(); i++) { + traceMessage << row[i] << " "; + } + traceMessage << "\n"; + }, + {}); }; traceMessage << "compareIdTableWithExpectedContent comparing IdTable\n"; @@ -48,13 +48,13 @@ void compareIdTableWithExpectedContent( if (resultMustBeSortedByJoinColumn) { // Is the table sorted by join column? - ASSERT_TRUE(std::ranges::is_sorted(localTable.getColumn(joinColumn))); + ASSERT_TRUE(ql::ranges::is_sorted(localTable.getColumn(joinColumn))); } // Sort both the table and the expectedContent, so that both have a definite // form for comparison. - std::ranges::sort(localTable, std::ranges::lexicographical_compare); - std::ranges::sort(localExpectedContent, std::ranges::lexicographical_compare); + ql::ranges::sort(localTable, ql::ranges::lexicographical_compare); + ql::ranges::sort(localExpectedContent, ql::ranges::lexicographical_compare); ASSERT_EQ(localTable, localExpectedContent); } @@ -76,7 +76,7 @@ IdTable generateIdTable( table.resize(numberRows); // Fill the table. - std::ranges::for_each( + ql::ranges::for_each( /* The iterator of an `IdTable` dereference to an `row_reference_restricted`, which only allows write access, if it is a r-value. Otherwise, we can't @@ -90,7 +90,7 @@ IdTable generateIdTable( std::vector generatedRow = rowGenerator(); AD_CONTRACT_CHECK(generatedRow.size() == numberColumns); - std::ranges::copy(generatedRow, AD_FWD(row).begin()); + ql::ranges::copy(generatedRow, AD_FWD(row).begin()); }); return table; @@ -105,22 +105,22 @@ IdTable createRandomlyFilledIdTable( AD_CONTRACT_CHECK(numberRows > 0 && numberColumns > 0); // Views for clearer access. - auto joinColumnNumberView = std::views::keys(joinColumnWithGenerator); - auto joinColumnGeneratorView = std::views::values(joinColumnWithGenerator); + auto joinColumnNumberView = ql::views::keys(joinColumnWithGenerator); + auto joinColumnGeneratorView = ql::views::values(joinColumnWithGenerator); // Are all the join column numbers within the max column number? - AD_CONTRACT_CHECK(std::ranges::all_of( + AD_CONTRACT_CHECK(ql::ranges::all_of( joinColumnNumberView, [&numberColumns](const size_t num) { return num < numberColumns; })); // Are there no duplicates in the join column numbers? std::vector sortedJoinColumnNumbers = ad_utility::transform(joinColumnNumberView, std::identity{}); - std::ranges::sort(sortedJoinColumnNumbers); + ql::ranges::sort(sortedJoinColumnNumbers); AD_CONTRACT_CHECK(std::ranges::unique(sortedJoinColumnNumbers).empty()); // Are all the functions for generating join column entries not nullptr? - AD_CONTRACT_CHECK(std::ranges::all_of( + AD_CONTRACT_CHECK(ql::ranges::all_of( joinColumnGeneratorView, [](auto func) { return func != nullptr; })); // The random number generators for normal entries. @@ -134,10 +134,10 @@ IdTable createRandomlyFilledIdTable( // Assigning the column number to a generator function. std::vector*> columnToGenerator( numberColumns, &normalEntryGenerator); - std::ranges::for_each(joinColumnWithGenerator, - [&columnToGenerator](auto& pair) { - columnToGenerator.at(pair.first) = &pair.second; - }); + ql::ranges::for_each(joinColumnWithGenerator, + [&columnToGenerator](auto& pair) { + columnToGenerator.at(pair.first) = &pair.second; + }); // Creating the table. return generateIdTable( @@ -192,7 +192,7 @@ IdTable createRandomlyFilledIdTable( Is the lower bound smaller, or equal, to the upper bound? And is the upper bound smaller, or equal, to the maximum size of an IdTable entry? */ - AD_CONTRACT_CHECK(std::ranges::all_of( + AD_CONTRACT_CHECK(ql::ranges::all_of( joinColumnsAndBounds, [](const JoinColumnAndBounds& j) { return j.lowerBound_ <= j.upperBound_ && j.upperBound_ <= maxIdSize; })); diff --git a/test/util/RandomTestHelpers.h b/test/util/RandomTestHelpers.h index 202fccfa41..e45ff48c69 100644 --- a/test/util/RandomTestHelpers.h +++ b/test/util/RandomTestHelpers.h @@ -35,7 +35,7 @@ inline std::array createArrayOfRandomSeeds( ad_utility::RandomSeed::make(std::random_device{}())) { RandomSeedGenerator generator{std::move(seed)}; std::array seeds{}; - std::ranges::generate(seeds, - [&generator]() { return std::invoke(generator); }); + ql::ranges::generate(seeds, + [&generator]() { return std::invoke(generator); }); return seeds; } From 1adcecbec43612de6723ec65b0a84639f0eeba6f Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 12 Dec 2024 21:54:50 +0100 Subject: [PATCH 19/38] Reverting the nofLiterals being saved in the TextMetaData and instead saving nofNonLiterals in the configuration json file. --- src/index/Index.cpp | 4 +--- src/index/Index.h | 2 +- src/index/IndexImpl.Text.cpp | 4 +++- src/index/IndexImpl.cpp | 1 + src/index/IndexImpl.h | 9 ++++++--- src/index/TextMetaData.h | 6 ------ test/engine/TextIndexScanTestHelpers.h | 7 +++---- 7 files changed, 15 insertions(+), 18 deletions(-) diff --git a/src/index/Index.cpp b/src/index/Index.cpp index fe11bf55f7..a652b85bfc 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -233,9 +233,7 @@ size_t Index::getNofEntityPostings() const { } // ____________________________________________________________________________ -size_t Index::getNofLiteralsInTextIndex() const { - return pimpl_->getNofLiteralsInTextIndex(); -} +size_t Index::getNofNonLiterals() const { return pimpl_->getNofNonLiterals(); } // ____________________________________________________________________________ Index::NumNormalAndInternal Index::numDistinctSubjects() const { diff --git a/src/index/Index.h b/src/index/Index.h index 0288b15408..1fac924aca 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -214,7 +214,7 @@ class Index { size_t getNofTextRecords() const; size_t getNofWordPostings() const; size_t getNofEntityPostings() const; - size_t getNofLiteralsInTextIndex() const; + size_t getNofNonLiterals() const; NumNormalAndInternal numDistinctSubjects() const; NumNormalAndInternal numDistinctObjects() const; diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp index 0c8fb29e22..f678cd7278 100644 --- a/src/index/IndexImpl.Text.cpp +++ b/src/index/IndexImpl.Text.cpp @@ -299,7 +299,9 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, textMeta_.setNofTextRecords(nofContexts); textMeta_.setNofWordPostings(nofWordPostings); textMeta_.setNofEntityPostings(nofEntityPostings); - textMeta_.setNofLiteralsInTextIndex(nofLiterals); + nofNonLiterals_ = nofContexts - nofLiterals; + configurationJson_["num-non-literals"] = nofNonLiterals_; + writeConfiguration(); writer.finish(); LOG(TRACE) << "END IndexImpl::passContextFileIntoVector" << std::endl; diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index c878e4365c..51153691c6 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -1126,6 +1126,7 @@ void IndexImpl::readConfiguration() { loadDataMember("num-subjects", numSubjects_, NumNormalAndInternal{}); loadDataMember("num-objects", numObjects_, NumNormalAndInternal{}); loadDataMember("num-triples", numTriples_, NumNormalAndInternal{}); + loadDataMember("num-non-literals", nofNonLiterals_, 0); // Initialize BlankNodeManager uint64_t numBlankNodesTotal; diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index d12619d6df..3fa8db4194 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -158,6 +158,11 @@ class IndexImpl { NumNormalAndInternal numTriples_; string indexId_; + // Keeps track of the number of nonLiteral contexts in the index this is used + // in the test retrieval of the texts. This only works reliably if the + // wordsFile.tsv starts with contextId 1 and is continuous. + size_t nofNonLiterals_; + // Global static pointers to the currently active index and comparator. // Those are used to compare LocalVocab entries with each other as well as // with Vocab entries. @@ -424,9 +429,7 @@ class IndexImpl { size_t getNofEntityPostings() const { return textMeta_.getNofEntityPostings(); } - size_t getNofLiteralsInTextIndex() const { - return textMeta_.getNofLiteralsInTextIndex(); - } + size_t getNofNonLiterals() const { return nofNonLiterals_; } bool hasAllPermutations() const { return SPO().isLoaded(); } diff --git a/src/index/TextMetaData.h b/src/index/TextMetaData.h index b15b5e9a96..30fda07921 100644 --- a/src/index/TextMetaData.h +++ b/src/index/TextMetaData.h @@ -98,10 +98,6 @@ class TextMetaData { void setNofEntityPostings(size_t n) { _nofEntityPostings = n; } - size_t getNofLiteralsInTextIndex() const { return _nofLiteralsInTextIndex; } - - void setNofLiteralsInTextIndex(size_t n) { _nofLiteralsInTextIndex = n; } - const string& getName() const { return _name; } void setName(const string& name) { _name = name; } @@ -113,7 +109,6 @@ class TextMetaData { size_t _nofTextRecords = 0; size_t _nofWordPostings = 0; size_t _nofEntityPostings = 0; - size_t _nofLiteralsInTextIndex = 0; string _name; vector _blocks; @@ -123,7 +118,6 @@ class TextMetaData { serializer | arg._nofTextRecords; serializer | arg._nofWordPostings; serializer | arg._nofEntityPostings; - serializer | arg._nofLiteralsInTextIndex; serializer | arg._name; serializer | arg._blocks; } diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index a0107ffe83..7d344b998b 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -17,14 +17,13 @@ namespace textIndexScanTestHelpers { inline string getTextRecordFromResultTable(const QueryExecutionContext* qec, const ProtoResult& result, const size_t& rowIndex) { - uint64_t nofLiterals = qec->getIndex().getNofLiteralsInTextIndex(); - uint64_t nofContexts = qec->getIndex().getNofTextRecords(); + size_t nofNonLiterals = qec->getIndex().getNofNonLiterals(); uint64_t textRecordIdFromTable = result.idTable().getColumn(0)[rowIndex].getTextRecordIndex().get(); - if ((nofContexts - nofLiterals) <= textRecordIdFromTable) { + if (nofNonLiterals <= textRecordIdFromTable) { // Return when from Literals return qec->getIndex().indexToString( - VocabIndex::make(textRecordIdFromTable - (nofContexts - nofLiterals))); + VocabIndex::make(textRecordIdFromTable - nofNonLiterals)); } else { // Return when from DocsDB return qec->getIndex().getTextExcerpt( From f5eefab931679c8f243909cd6a43316d2a357d28 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 12 Dec 2024 22:01:00 +0100 Subject: [PATCH 20/38] Revert to first sync and then reapply "Reverting the nofLiterals being saved in the TextMetaData and instead saving nofNonLiterals in the configuration json file." This reverts commit 1adcecbec43612de6723ec65b0a84639f0eeba6f. --- src/index/Index.cpp | 4 +++- src/index/Index.h | 2 +- src/index/IndexImpl.Text.cpp | 4 +--- src/index/IndexImpl.cpp | 1 - src/index/IndexImpl.h | 9 +++------ src/index/TextMetaData.h | 6 ++++++ test/engine/TextIndexScanTestHelpers.h | 7 ++++--- 7 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/index/Index.cpp b/src/index/Index.cpp index a652b85bfc..fe11bf55f7 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -233,7 +233,9 @@ size_t Index::getNofEntityPostings() const { } // ____________________________________________________________________________ -size_t Index::getNofNonLiterals() const { return pimpl_->getNofNonLiterals(); } +size_t Index::getNofLiteralsInTextIndex() const { + return pimpl_->getNofLiteralsInTextIndex(); +} // ____________________________________________________________________________ Index::NumNormalAndInternal Index::numDistinctSubjects() const { diff --git a/src/index/Index.h b/src/index/Index.h index 1fac924aca..0288b15408 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -214,7 +214,7 @@ class Index { size_t getNofTextRecords() const; size_t getNofWordPostings() const; size_t getNofEntityPostings() const; - size_t getNofNonLiterals() const; + size_t getNofLiteralsInTextIndex() const; NumNormalAndInternal numDistinctSubjects() const; NumNormalAndInternal numDistinctObjects() const; diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp index f678cd7278..0c8fb29e22 100644 --- a/src/index/IndexImpl.Text.cpp +++ b/src/index/IndexImpl.Text.cpp @@ -299,9 +299,7 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, textMeta_.setNofTextRecords(nofContexts); textMeta_.setNofWordPostings(nofWordPostings); textMeta_.setNofEntityPostings(nofEntityPostings); - nofNonLiterals_ = nofContexts - nofLiterals; - configurationJson_["num-non-literals"] = nofNonLiterals_; - writeConfiguration(); + textMeta_.setNofLiteralsInTextIndex(nofLiterals); writer.finish(); LOG(TRACE) << "END IndexImpl::passContextFileIntoVector" << std::endl; diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 51153691c6..c878e4365c 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -1126,7 +1126,6 @@ void IndexImpl::readConfiguration() { loadDataMember("num-subjects", numSubjects_, NumNormalAndInternal{}); loadDataMember("num-objects", numObjects_, NumNormalAndInternal{}); loadDataMember("num-triples", numTriples_, NumNormalAndInternal{}); - loadDataMember("num-non-literals", nofNonLiterals_, 0); // Initialize BlankNodeManager uint64_t numBlankNodesTotal; diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 3fa8db4194..d12619d6df 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -158,11 +158,6 @@ class IndexImpl { NumNormalAndInternal numTriples_; string indexId_; - // Keeps track of the number of nonLiteral contexts in the index this is used - // in the test retrieval of the texts. This only works reliably if the - // wordsFile.tsv starts with contextId 1 and is continuous. - size_t nofNonLiterals_; - // Global static pointers to the currently active index and comparator. // Those are used to compare LocalVocab entries with each other as well as // with Vocab entries. @@ -429,7 +424,9 @@ class IndexImpl { size_t getNofEntityPostings() const { return textMeta_.getNofEntityPostings(); } - size_t getNofNonLiterals() const { return nofNonLiterals_; } + size_t getNofLiteralsInTextIndex() const { + return textMeta_.getNofLiteralsInTextIndex(); + } bool hasAllPermutations() const { return SPO().isLoaded(); } diff --git a/src/index/TextMetaData.h b/src/index/TextMetaData.h index 30fda07921..b15b5e9a96 100644 --- a/src/index/TextMetaData.h +++ b/src/index/TextMetaData.h @@ -98,6 +98,10 @@ class TextMetaData { void setNofEntityPostings(size_t n) { _nofEntityPostings = n; } + size_t getNofLiteralsInTextIndex() const { return _nofLiteralsInTextIndex; } + + void setNofLiteralsInTextIndex(size_t n) { _nofLiteralsInTextIndex = n; } + const string& getName() const { return _name; } void setName(const string& name) { _name = name; } @@ -109,6 +113,7 @@ class TextMetaData { size_t _nofTextRecords = 0; size_t _nofWordPostings = 0; size_t _nofEntityPostings = 0; + size_t _nofLiteralsInTextIndex = 0; string _name; vector _blocks; @@ -118,6 +123,7 @@ class TextMetaData { serializer | arg._nofTextRecords; serializer | arg._nofWordPostings; serializer | arg._nofEntityPostings; + serializer | arg._nofLiteralsInTextIndex; serializer | arg._name; serializer | arg._blocks; } diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index 7d344b998b..a0107ffe83 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -17,13 +17,14 @@ namespace textIndexScanTestHelpers { inline string getTextRecordFromResultTable(const QueryExecutionContext* qec, const ProtoResult& result, const size_t& rowIndex) { - size_t nofNonLiterals = qec->getIndex().getNofNonLiterals(); + uint64_t nofLiterals = qec->getIndex().getNofLiteralsInTextIndex(); + uint64_t nofContexts = qec->getIndex().getNofTextRecords(); uint64_t textRecordIdFromTable = result.idTable().getColumn(0)[rowIndex].getTextRecordIndex().get(); - if (nofNonLiterals <= textRecordIdFromTable) { + if ((nofContexts - nofLiterals) <= textRecordIdFromTable) { // Return when from Literals return qec->getIndex().indexToString( - VocabIndex::make(textRecordIdFromTable - nofNonLiterals)); + VocabIndex::make(textRecordIdFromTable - (nofContexts - nofLiterals))); } else { // Return when from DocsDB return qec->getIndex().getTextExcerpt( From 583a67a96be25e715bc97affdf8fd2b4f5c46e38 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 12 Dec 2024 22:14:56 +0100 Subject: [PATCH 21/38] ql:contains-word now can show the respective word-score. --- src/engine/TextIndexScanForWord.cpp | 14 +++++++------- src/index/FTSAlgorithms.cpp | 7 +++++-- src/index/IndexImpl.Text.cpp | 9 +++++++-- src/parser/sparqlParser/SparqlQleverVisitor.cpp | 1 + 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/engine/TextIndexScanForWord.cpp b/src/engine/TextIndexScanForWord.cpp index 612780e28b..b2854e5c20 100644 --- a/src/engine/TextIndexScanForWord.cpp +++ b/src/engine/TextIndexScanForWord.cpp @@ -18,13 +18,12 @@ ProtoResult TextIndexScanForWord::computeResult( IdTable idTable = getExecutionContext()->getIndex().getWordPostingsForTerm( word_, getExecutionContext()->getAllocator()); + // This filters out the word column. When the searchword is a prefix this + // column shows the word the prefix got extended to if (!isPrefix_) { - IdTable smallIdTable{getExecutionContext()->getAllocator()}; - smallIdTable.setNumColumns(1); - smallIdTable.resize(idTable.numRows()); - ql::ranges::copy(idTable.getColumn(0), smallIdTable.getColumn(0).begin()); - - return {std::move(smallIdTable), resultSortedOn(), LocalVocab{}}; + using CI = ColumnIndex; + idTable.setColumnSubset(std::array{CI{0}, CI{2}}); + return {std::move(idTable), resultSortedOn(), LocalVocab{}}; } // Add details to the runtimeInfo. This is has no effect on the result. @@ -46,12 +45,13 @@ VariableToColumnMap TextIndexScanForWord::computeVariableToColumnMap() const { addDefinedVar(textRecordVar_.getMatchingWordVariable( std::string_view(word_).substr(0, word_.size() - 1))); } + addDefinedVar(textRecordVar_.getScoreVariable(word_)); return vcmap; } // _____________________________________________________________________________ size_t TextIndexScanForWord::getResultWidth() const { - return 1 + (isPrefix_ ? 1 : 0); + return 2 + (isPrefix_ ? 1 : 0); } // _____________________________________________________________________________ diff --git a/src/index/FTSAlgorithms.cpp b/src/index/FTSAlgorithms.cpp index 0589c5ffee..087f97a1fe 100644 --- a/src/index/FTSAlgorithms.cpp +++ b/src/index/FTSAlgorithms.cpp @@ -10,19 +10,21 @@ // _____________________________________________________________________________ IdTable FTSAlgorithms::filterByRange(const IdRange& idRange, const IdTable& idTablePreFilter) { - AD_CONTRACT_CHECK(idTablePreFilter.numColumns() == 2); + AD_CONTRACT_CHECK(idTablePreFilter.numColumns() == 3); LOG(DEBUG) << "Filtering " << idTablePreFilter.getColumn(0).size() << " elements by ID range...\n"; IdTable idTableResult{idTablePreFilter.getAllocator()}; - idTableResult.setNumColumns(2); + idTableResult.setNumColumns(3); idTableResult.resize(idTablePreFilter.getColumn(0).size()); decltype(auto) resultCidColumn = idTableResult.getColumn(0); decltype(auto) resultWidColumn = idTableResult.getColumn(1); + decltype(auto) resultSidColumn = idTableResult.getColumn(2); size_t nofResultElements = 0; decltype(auto) preFilterCidColumn = idTablePreFilter.getColumn(0); decltype(auto) preFilterWidColumn = idTablePreFilter.getColumn(1); + decltype(auto) preFilterSidColumn = idTablePreFilter.getColumn(2); // TODO Use views::zip. for (size_t i = 0; i < preFilterWidColumn.size(); ++i) { // TODO proper Ids for the text stuff. @@ -36,6 +38,7 @@ IdTable FTSAlgorithms::filterByRange(const IdRange& idRange, preFilterWidColumn[i].getWordVocabIndex() <= idRange.last()) { resultCidColumn[nofResultElements] = preFilterCidColumn[i]; resultWidColumn[nofResultElements] = preFilterWidColumn[i]; + resultSidColumn[nofResultElements] = preFilterSidColumn[i]; nofResultElements++; } } diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp index bd46c81e53..e3b9457d2d 100644 --- a/src/index/IndexImpl.Text.cpp +++ b/src/index/IndexImpl.Text.cpp @@ -718,7 +718,7 @@ std::string_view IndexImpl::wordIdToString(WordIndex wordIndex) const { IdTable IndexImpl::readWordCl( const TextBlockMetaData& tbmd, const ad_utility::AllocatorWithLimit& allocator) const { - IdTable idTable{2, allocator}; + IdTable idTable{3, allocator}; vector cids = readGapComprList( tbmd._cl._nofElements, tbmd._cl._startContextlist, static_cast(tbmd._cl._startWordlist - tbmd._cl._startContextlist), @@ -734,6 +734,11 @@ IdTable IndexImpl::readWordCl( idTable.getColumn(1).begin(), [](WordIndex id) { return Id::makeFromWordVocabIndex(WordVocabIndex::make(id)); }); + std::ranges::transform( + readFreqComprList(tbmd._cl._nofElements, tbmd._cl._startScorelist, + static_cast(tbmd._cl._lastByte + 1 - + tbmd._cl._startScorelist)), + idTable.getColumn(2).begin(), &Id::makeFromInt); return idTable; } @@ -772,7 +777,7 @@ IdTable IndexImpl::getWordPostingsForTerm( const ad_utility::AllocatorWithLimit& allocator) const { LOG(DEBUG) << "Getting word postings for term: " << term << '\n'; IdTable idTable{allocator}; - idTable.setNumColumns(term.ends_with('*') ? 2 : 1); + idTable.setNumColumns(term.ends_with('*') ? 3 : 2); auto optionalTbmd = getTextBlockMetadataForWordOrPrefix(term); if (!optionalTbmd.has_value()) { return idTable; diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 5629f1452c..8ecf5a96be 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -1490,6 +1490,7 @@ void Visitor::setMatchingWordAndScoreVisibleIfPresent( } for (std::string_view s : std::vector( absl::StrSplit(name.substr(1, name.size() - 2), ' '))) { + addVisibleVariable(var->getScoreVariable(std::string(s))); if (!s.ends_with('*')) { continue; } From e4cb2edc9c337c3e8bf60416a23080ee192ede19 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Fri, 12 Jul 2024 16:37:22 +0200 Subject: [PATCH 22/38] Fixed tests and formatted files. --- test/QueryPlannerTestHelpers.h | 2 +- test/engine/TextIndexScanForWordTest.cpp | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/test/QueryPlannerTestHelpers.h b/test/QueryPlannerTestHelpers.h index ea22a00431..7c24085044 100644 --- a/test/QueryPlannerTestHelpers.h +++ b/test/QueryPlannerTestHelpers.h @@ -130,7 +130,7 @@ constexpr auto TextIndexScanForWord = [](Variable textRecordVar, string word) -> QetMatcher { return RootOperation<::TextIndexScanForWord>(AllOf( AD_PROPERTY(::TextIndexScanForWord, getResultWidth, - Eq(1 + word.ends_with('*'))), + Eq(2 + word.ends_with('*'))), AD_PROPERTY(::TextIndexScanForWord, textRecordVar, Eq(textRecordVar)), AD_PROPERTY(::TextIndexScanForWord, word, word))); }; diff --git a/test/engine/TextIndexScanForWordTest.cpp b/test/engine/TextIndexScanForWordTest.cpp index 57f4b46e02..af9e8c109a 100644 --- a/test/engine/TextIndexScanForWordTest.cpp +++ b/test/engine/TextIndexScanForWordTest.cpp @@ -5,6 +5,7 @@ #include #include +#include "../printers/VariablePrinters.h" #include "../util/GTestHelpers.h" #include "../util/IdTableHelpers.h" #include "../util/IndexTestHelpers.h" @@ -29,17 +30,18 @@ TEST(TextIndexScanForWord, WordScanPrefix) { TextIndexScanForWord s1{qec, Variable{"?text1"}, "test*"}; TextIndexScanForWord s2{qec, Variable{"?text2"}, "test*"}; - ASSERT_EQ(s1.getResultWidth(), 2); + ASSERT_EQ(s1.getResultWidth(), 3); auto result = s1.computeResultOnlyForTesting(); - ASSERT_EQ(result.idTable().numColumns(), 2); + ASSERT_EQ(result.idTable().numColumns(), 3); ASSERT_EQ(result.idTable().size(), 3); s2.getExternallyVisibleVariableColumns(); using enum ColumnIndexAndTypeInfo::UndefStatus; VariableToColumnMap expectedVariables{ {Variable{"?text2"}, {0, AlwaysDefined}}, - {Variable{"?ql_matchingword_text2_test"}, {1, AlwaysDefined}}}; + {Variable{"?ql_matchingword_text2_test"}, {1, AlwaysDefined}}, + {Variable{"?ql_score_text2_fixedEntity_test_42_"}, {2, AlwaysDefined}}}; EXPECT_THAT(s2.getExternallyVisibleVariableColumns(), ::testing::UnorderedElementsAreArray(expectedVariables)); @@ -60,10 +62,10 @@ TEST(TextIndexScanForWord, WordScanBasic) { TextIndexScanForWord s1{qec, Variable{"?text1"}, "test"}; - ASSERT_EQ(s1.getResultWidth(), 1); + ASSERT_EQ(s1.getResultWidth(), 2); auto result = s1.computeResultOnlyForTesting(); - ASSERT_EQ(result.idTable().numColumns(), 1); + ASSERT_EQ(result.idTable().numColumns(), 2); ASSERT_EQ(result.idTable().size(), 2); ASSERT_EQ("\"he failed the test\"", @@ -73,10 +75,10 @@ TEST(TextIndexScanForWord, WordScanBasic) { TextIndexScanForWord s2{qec, Variable{"?text1"}, "testing"}; - ASSERT_EQ(s2.getResultWidth(), 1); + ASSERT_EQ(s2.getResultWidth(), 2); result = s2.computeResultOnlyForTesting(); - ASSERT_EQ(result.idTable().numColumns(), 1); + ASSERT_EQ(result.idTable().numColumns(), 2); ASSERT_EQ(result.idTable().size(), 1); ASSERT_EQ("\"testing can help\"", From 3ce304d60f9f6ad8b6f858942000e8de8821c716 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Sat, 27 Jul 2024 16:00:28 +0200 Subject: [PATCH 23/38] New formatting for Word Score Variables. Changed where necessary and adapted unit tests. Missing e2e tests. --- src/engine/QueryPlanner.cpp | 4 +- src/engine/TextIndexScanForEntity.cpp | 4 +- src/engine/TextIndexScanForWord.cpp | 2 +- src/parser/data/Variable.cpp | 27 ++++++++- src/parser/data/Variable.h | 11 +++- .../sparqlParser/SparqlQleverVisitor.cpp | 7 ++- test/QueryPlannerTest.cpp | 60 ++++++++++--------- test/engine/TextIndexScanForWordTest.cpp | 2 +- 8 files changed, 77 insertions(+), 40 deletions(-) diff --git a/src/engine/QueryPlanner.cpp b/src/engine/QueryPlanner.cpp index 9e0cd56083..dceeebfcc4 100644 --- a/src/engine/QueryPlanner.cpp +++ b/src/engine/QueryPlanner.cpp @@ -1001,14 +1001,14 @@ QueryPlanner::SubtreePlan QueryPlanner::getTextLeafPlan( : *(node._variables.begin()); plan = makeSubtreePlan(_qec, cvar, evar, word); textLimits[cvar].entityVars_.push_back(evar); - textLimits[cvar].scoreVars_.push_back(cvar.getScoreVariable(evar)); + textLimits[cvar].scoreVars_.push_back(cvar.getEntityScoreVariable(evar)); } else { // Fixed entity case AD_CORRECTNESS_CHECK(node._variables.size() == 1); plan = makeSubtreePlan( _qec, cvar, node.triple_.o_.toString(), word); textLimits[cvar].scoreVars_.push_back( - cvar.getScoreVariable(node.triple_.o_.toString())); + cvar.getEntityScoreVariable(node.triple_.o_.toString())); } } else { plan = makeSubtreePlan(_qec, cvar, word); diff --git a/src/engine/TextIndexScanForEntity.cpp b/src/engine/TextIndexScanForEntity.cpp index 78c29e8734..6dbce07ef5 100644 --- a/src/engine/TextIndexScanForEntity.cpp +++ b/src/engine/TextIndexScanForEntity.cpp @@ -48,10 +48,10 @@ VariableToColumnMap TextIndexScanForEntity::computeVariableToColumnMap() const { }; addDefinedVar(textRecordVar_); if (hasFixedEntity()) { - addDefinedVar(textRecordVar_.getScoreVariable(fixedEntity())); + addDefinedVar(textRecordVar_.getEntityScoreVariable(fixedEntity())); } else { addDefinedVar(entityVariable()); - addDefinedVar(textRecordVar_.getScoreVariable(entityVariable())); + addDefinedVar(textRecordVar_.getEntityScoreVariable(entityVariable())); } return vcmap; } diff --git a/src/engine/TextIndexScanForWord.cpp b/src/engine/TextIndexScanForWord.cpp index b2854e5c20..7c3f931f8f 100644 --- a/src/engine/TextIndexScanForWord.cpp +++ b/src/engine/TextIndexScanForWord.cpp @@ -45,7 +45,7 @@ VariableToColumnMap TextIndexScanForWord::computeVariableToColumnMap() const { addDefinedVar(textRecordVar_.getMatchingWordVariable( std::string_view(word_).substr(0, word_.size() - 1))); } - addDefinedVar(textRecordVar_.getScoreVariable(word_)); + addDefinedVar(textRecordVar_.getWordScoreVariable(word_, isPrefix_)); return vcmap; } diff --git a/src/parser/data/Variable.cpp b/src/parser/data/Variable.cpp index 89e0e894a2..ffcb95b3e2 100644 --- a/src/parser/data/Variable.cpp +++ b/src/parser/data/Variable.cpp @@ -56,7 +56,7 @@ Variable::Variable(std::string name, bool checkName) : _name{std::move(name)} { } // _____________________________________________________________________________ -Variable Variable::getScoreVariable( +Variable Variable::getEntityScoreVariable( const std::variant& varOrEntity) const { std::string_view type; std::string entity; @@ -79,6 +79,31 @@ Variable Variable::getScoreVariable( absl::StrCat(SCORE_VARIABLE_PREFIX, name().substr(1), type, entity)}; } +// _____________________________________________________________________________ +Variable Variable::getWordScoreVariable(const std::string& word, + const bool& isPrefix) const { + std::string_view type; + std::string_view wordToConvert; + std::string convertedWord; + if (isPrefix) { + wordToConvert = std::string_view(word.data(), word.size() - 1); + type = "prefix_"; + } else { + wordToConvert = std::string_view(word); + type = "word_"; + } + convertedWord += "_"; + for (char c : wordToConvert) { + if (isalpha(static_cast(c))) { + convertedWord += c; + } else { + absl::StrAppend(&convertedWord, "_", std::to_string(c), "_"); + } + } + return Variable{absl::StrCat(SCORE_VARIABLE_PREFIX, type, name().substr(1), + convertedWord)}; +} + // _____________________________________________________________________________ Variable Variable::getMatchingWordVariable(std::string_view term) const { return Variable{ diff --git a/src/parser/data/Variable.h b/src/parser/data/Variable.h index a378cfd903..422542f09c 100644 --- a/src/parser/data/Variable.h +++ b/src/parser/data/Variable.h @@ -48,9 +48,18 @@ class Variable { // `?ql_someTextVar_fixedEntity_someFixedEntity`. // Note that if the the fixed entity contains non ascii characters they are // converted to numbers and escaped. - Variable getScoreVariable( + Variable getEntityScoreVariable( const std::variant& varOrEntity) const; + // Converts `?someTextVar` and `someWord` into + // `?ql_score_word_someTextVar_someWord. + // Converts `?someTextVar` and `somePrefix*` into + // `?ql_score_prefix_someTextVar_somePrefix`. + // Note that if the word contains non ascii characters they are converted to + // numbers and escaped. + Variable getWordScoreVariable(const std::string& word, + const bool& isPrefix) const; + // Convert `?someVariable` into `?ql_matchingword_someVariable_someTerm` Variable getMatchingWordVariable(std::string_view term) const; diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 8ecf5a96be..713a2a2b1e 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -1490,7 +1490,8 @@ void Visitor::setMatchingWordAndScoreVisibleIfPresent( } for (std::string_view s : std::vector( absl::StrSplit(name.substr(1, name.size() - 2), ' '))) { - addVisibleVariable(var->getScoreVariable(std::string(s))); + addVisibleVariable( + var->getWordScoreVariable(std::string(s), s.ends_with('*'))); if (!s.ends_with('*')) { continue; } @@ -1499,9 +1500,9 @@ void Visitor::setMatchingWordAndScoreVisibleIfPresent( } } else if (propertyPath->asString() == CONTAINS_ENTITY_PREDICATE) { if (const auto* entVar = std::get_if(&object)) { - addVisibleVariable(var->getScoreVariable(*entVar)); + addVisibleVariable(var->getEntityScoreVariable(*entVar)); } else { - addVisibleVariable(var->getScoreVariable(object.toSparql())); + addVisibleVariable(var->getEntityScoreVariable(object.toSparql())); } } } diff --git a/test/QueryPlannerTest.cpp b/test/QueryPlannerTest.cpp index c9a104bdcb..32c1f4ff08 100644 --- a/test/QueryPlannerTest.cpp +++ b/test/QueryPlannerTest.cpp @@ -2599,12 +2599,12 @@ TEST(QueryPlanner, TextLimit) { h::expect( "SELECT * WHERE { ?text ql:contains-word \"test*\" . ?text " "ql:contains-entity } TEXTLIMIT 10", - h::TextLimit( - 10, - h::Join(wordScan(Var{"?text"}, "test*"), - entityScan(Var{"?text"}, "", "test*")), - Var{"?text"}, vector{}, - vector{Var{"?text"}.getScoreVariable("")}), + h::TextLimit(10, + h::Join(wordScan(Var{"?text"}, "test*"), + entityScan(Var{"?text"}, "", "test*")), + Var{"?text"}, vector{}, + vector{ + Var{"?text"}.getEntityScoreVariable("")}), qec); // Contains entity @@ -2616,7 +2616,8 @@ TEST(QueryPlanner, TextLimit) { h::Join(wordScan(Var{"?text"}, "test*"), entityScan(Var{"?text"}, Var{"?scientist"}, "test*")), Var{"?text"}, vector{Var{"?scientist"}}, - vector{Var{"?text"}.getScoreVariable(Var{"?scientist"})}), + vector{ + Var{"?text"}.getEntityScoreVariable(Var{"?scientist"})}), qec); // Contains entity and fixed entity @@ -2624,15 +2625,15 @@ TEST(QueryPlanner, TextLimit) { "SELECT * WHERE { ?text ql:contains-entity ?scientist . ?text " "ql:contains-word \"test*\" . ?text ql:contains-entity } " "TEXTLIMIT 5", - h::TextLimit( - 5, - h::UnorderedJoins( - wordScan(Var{"?text"}, "test*"), - entityScan(Var{"?text"}, Var{"?scientist"}, "test*"), - entityScan(Var{"?text"}, "", "test*")), - Var{"?text"}, vector{Var{"?scientist"}}, - vector{Var{"?text"}.getScoreVariable(Var{"?scientist"}), - Var{"?text"}.getScoreVariable("")}), + h::TextLimit(5, + h::UnorderedJoins( + wordScan(Var{"?text"}, "test*"), + entityScan(Var{"?text"}, Var{"?scientist"}, "test*"), + entityScan(Var{"?text"}, "", "test*")), + Var{"?text"}, vector{Var{"?scientist"}}, + vector{ + Var{"?text"}.getEntityScoreVariable(Var{"?scientist"}), + Var{"?text"}.getEntityScoreVariable("")}), qec); // Contains two entities @@ -2647,8 +2648,9 @@ TEST(QueryPlanner, TextLimit) { entityScan(Var{"?text"}, Var{"?scientist"}, "test*"), entityScan(Var{"?text"}, Var{"?scientist2"}, "test*")), Var{"?text"}, vector{Var{"?scientist"}, Var{"?scientist2"}}, - vector{Var{"?text"}.getScoreVariable(Var{"?scientist"}), - Var{"?text"}.getScoreVariable(Var{"?scientist2"})}), + vector{ + Var{"?text"}.getEntityScoreVariable(Var{"?scientist"}), + Var{"?text"}.getEntityScoreVariable(Var{"?scientist2"})}), qec); // Contains two text variables. Also checks if the textlimit at an efficient @@ -2665,17 +2667,17 @@ TEST(QueryPlanner, TextLimit) { entityScan(Var{"?text1"}, Var{"?scientist1"}, "test*")), Var{"?text1"}, vector{Var{"?scientist1"}}, vector{ - Var{"?text1"}.getScoreVariable(Var{"?scientist1"})}), - h::TextLimit(5, - h::UnorderedJoins( - wordScan(Var{"?text2"}, "test*"), - entityScan(Var{"?text2"}, Var{"?author1"}, "test*"), - entityScan(Var{"?text2"}, Var{"?author2"}, "test*")), - Var{"?text2"}, - vector{Var{"?author1"}, Var{"?author2"}}, - vector{ - Var{"?text2"}.getScoreVariable(Var{"?author1"}), - Var{"?text2"}.getScoreVariable(Var{"?author2"})})), + Var{"?text1"}.getEntityScoreVariable(Var{"?scientist1"})}), + h::TextLimit( + 5, + h::UnorderedJoins( + wordScan(Var{"?text2"}, "test*"), + entityScan(Var{"?text2"}, Var{"?author1"}, "test*"), + entityScan(Var{"?text2"}, Var{"?author2"}, "test*")), + Var{"?text2"}, vector{Var{"?author1"}, Var{"?author2"}}, + vector{ + Var{"?text2"}.getEntityScoreVariable(Var{"?author1"}), + Var{"?text2"}.getEntityScoreVariable(Var{"?author2"})})), qec); } diff --git a/test/engine/TextIndexScanForWordTest.cpp b/test/engine/TextIndexScanForWordTest.cpp index af9e8c109a..588a549d98 100644 --- a/test/engine/TextIndexScanForWordTest.cpp +++ b/test/engine/TextIndexScanForWordTest.cpp @@ -41,7 +41,7 @@ TEST(TextIndexScanForWord, WordScanPrefix) { VariableToColumnMap expectedVariables{ {Variable{"?text2"}, {0, AlwaysDefined}}, {Variable{"?ql_matchingword_text2_test"}, {1, AlwaysDefined}}, - {Variable{"?ql_score_text2_fixedEntity_test_42_"}, {2, AlwaysDefined}}}; + {Variable{"?ql_score_prefix_text2_test"}, {2, AlwaysDefined}}}; EXPECT_THAT(s2.getExternallyVisibleVariableColumns(), ::testing::UnorderedElementsAreArray(expectedVariables)); From eb8e83a84028fab4b4e22ee4165b2748a89981bf Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Mon, 29 Jul 2024 18:55:10 +0200 Subject: [PATCH 24/38] Added getWordSCoreVariable for std::string_view --- src/parser/data/Variable.cpp | 25 ++++++++++++++++++- src/parser/data/Variable.h | 7 ++++-- .../sparqlParser/SparqlQleverVisitor.cpp | 3 +-- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/parser/data/Variable.cpp b/src/parser/data/Variable.cpp index ffcb95b3e2..5195e48f66 100644 --- a/src/parser/data/Variable.cpp +++ b/src/parser/data/Variable.cpp @@ -81,7 +81,7 @@ Variable Variable::getEntityScoreVariable( // _____________________________________________________________________________ Variable Variable::getWordScoreVariable(const std::string& word, - const bool& isPrefix) const { + bool isPrefix) const { std::string_view type; std::string_view wordToConvert; std::string convertedWord; @@ -104,6 +104,29 @@ Variable Variable::getWordScoreVariable(const std::string& word, convertedWord)}; } +// _____________________________________________________________________________ +Variable Variable::getWordScoreVariable(std::string_view word, + bool isPrefix) const { + std::string_view type; + std::string convertedWord; + if (isPrefix) { + word.remove_suffix(1); + type = "prefix_"; + } else { + type = "word_"; + } + convertedWord = "_"; + for (char c : word) { + if (isalpha(static_cast(c))) { + convertedWord += c; + } else { + absl::StrAppend(&convertedWord, "_", std::to_string(c), "_"); + } + } + return Variable{absl::StrCat(SCORE_VARIABLE_PREFIX, type, name().substr(1), + convertedWord)}; +} + // _____________________________________________________________________________ Variable Variable::getMatchingWordVariable(std::string_view term) const { return Variable{ diff --git a/src/parser/data/Variable.h b/src/parser/data/Variable.h index 422542f09c..fb46abd384 100644 --- a/src/parser/data/Variable.h +++ b/src/parser/data/Variable.h @@ -57,8 +57,11 @@ class Variable { // `?ql_score_prefix_someTextVar_somePrefix`. // Note that if the word contains non ascii characters they are converted to // numbers and escaped. - Variable getWordScoreVariable(const std::string& word, - const bool& isPrefix) const; + Variable getWordScoreVariable(const std::string& word, bool isPrefix) const; + + // Does the same thing as the function with std::string& param only for + // std::string_view + Variable getWordScoreVariable(std::string_view word, bool isPrefix) const; // Convert `?someVariable` into `?ql_matchingword_someVariable_someTerm` Variable getMatchingWordVariable(std::string_view term) const; diff --git a/src/parser/sparqlParser/SparqlQleverVisitor.cpp b/src/parser/sparqlParser/SparqlQleverVisitor.cpp index 713a2a2b1e..80b8a86795 100644 --- a/src/parser/sparqlParser/SparqlQleverVisitor.cpp +++ b/src/parser/sparqlParser/SparqlQleverVisitor.cpp @@ -1490,8 +1490,7 @@ void Visitor::setMatchingWordAndScoreVisibleIfPresent( } for (std::string_view s : std::vector( absl::StrSplit(name.substr(1, name.size() - 2), ' '))) { - addVisibleVariable( - var->getWordScoreVariable(std::string(s), s.ends_with('*'))); + addVisibleVariable(var->getWordScoreVariable(s, s.ends_with('*'))); if (!s.ends_with('*')) { continue; } From cd4789a1d55e693abc201e690484d1d034cb0ca1 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 12 Dec 2024 22:16:09 +0100 Subject: [PATCH 25/38] Made it possible to construct query execution contexts with text index. This is done through passing the words and docsfile as string, and then building the text index as normal. Basic Test is existent (TODO make more edge case tests) and e2e testing is fixed. --- e2e/scientists_queries.yaml | 33 ++++++++---- src/index/Index.cpp | 3 ++ src/index/Index.h | 1 + src/index/IndexImpl.Text.cpp | 12 +++-- src/index/IndexImpl.h | 8 ++- src/index/TextMetaData.h | 6 +++ src/parser/ContextFileParser.h | 1 + test/engine/TextIndexScanForWordTest.cpp | 46 +++++++++++++++- test/engine/TextIndexScanTestHelpers.h | 19 ++++++- test/util/IndexTestHelpers.cpp | 69 +++++++++++++++++------- test/util/IndexTestHelpers.h | 9 +++- 11 files changed, 169 insertions(+), 38 deletions(-) diff --git a/e2e/scientists_queries.yaml b/e2e/scientists_queries.yaml index 1fc78430be..421329c58b 100644 --- a/e2e/scientists_queries.yaml +++ b/e2e/scientists_queries.yaml @@ -55,31 +55,43 @@ queries: ?t ql:contains-word "RElaT* phySIKalische rela*" } checks: - - num_cols: 5 - - selected: [ "?x", "?ql_score_t_var_x", "?t", "?ql_matchingword_t_relat", "?ql_matchingword_t_rela" ] + - num_cols: 8 + - selected: [ "?x", "?ql_score_t_var_x", "?t", "?ql_score_prefix_t_RElaT", "?ql_matchingword_t_relat", "?ql_score_word_t_phySIKalische", "?ql_score_prefix_t_rela", "?ql_matchingword_t_rela" ] - contains_row: - "" - null - null + - null - "relationship" + - null + - null - "relationship" - contains_row: - "" - null - null + - null - "relationship" + - null + - null - "relativity" - contains_row: - "" - null - null + - null - "relativity" + - null + - null - "relationship" - contains_row: - "" - null - null + - null - "relativity" + - null + - null - "relativity" - query: algo-star-female-scientists @@ -151,7 +163,7 @@ queries: } TEXTLIMIT 2 checks: - - num_cols: 7 + - num_cols: 9 - num_rows: 18 - query: algor-star-female-born-before-1940 @@ -192,7 +204,7 @@ queries: } ORDER BY DESC(?ql_score_text_fixedEntity__60_Ada_95_Lovelace_62_) checks: - - num_cols: 5 + - num_cols: 6 - num_rows: 7 - contains_row: - "" @@ -202,6 +214,7 @@ queries: Charles Babbage, also known as' the father of computers', and in particular, Babbage's work on the Analytical Engine." - null + - null - "relationship" - order_numeric: {"dir": "DESC", "var" : "?ql_score_text_fixedEntity__60_Ada_95_Lovelace_62_"} @@ -219,7 +232,7 @@ queries: ORDER BY DESC(?ql_score_text_fixedEntity__60_Ada_95_Lovelace_62_) TEXTLIMIT 2 checks: - - num_cols: 5 + - num_cols: 6 - num_rows: 3 - contains_row: - "" @@ -229,6 +242,7 @@ queries: Charles Babbage, also known as' the father of computers', and in particular, Babbage's work on the Analytical Engine." - null + - null - "relationship" - order_numeric: {"dir": "DESC", "var" : "?ql_score_text_fixedEntity__60_Ada_95_Lovelace_62_"} @@ -246,7 +260,7 @@ queries: } TEXTLIMIT 1 checks: - - num_cols: 6 + - num_cols: 7 - num_rows: 2 - contains_row: - "" @@ -255,6 +269,7 @@ queries: with Somerville to visit Babbage as often as she could." - null - null + - null - "relationship" @@ -1391,10 +1406,10 @@ queries: ?t ql:contains-word "algo* herm* primary" } checks: - - num_cols: 5 + - num_cols: 8 - num_rows: 1 - - selected: [ "?x", "?ql_score_t_var_x", "?t", "?ql_matchingword_t_algo", "?ql_matchingword_t_herm" ] - - contains_row: [ "",null,"Hermann's algorithm for primary decomposition is still in use now.","algorithm","hermann" ] + - selected: [ "?x", "?ql_score_t_var_x", "?t", "?ql_score_prefix_t_algo", "?ql_matchingword_t_algo", "?ql_score_prefix_t_herm", "?ql_matchingword_t_herm", "?ql_score_word_t_primary" ] + - contains_row: [ "",null,"Hermann's algorithm for primary decomposition is still in use now.",null,"algorithm",null,"hermann",null ] - query : select_asterisk_regex-lastname-stein diff --git a/src/index/Index.cpp b/src/index/Index.cpp index 47fcad9c82..a652b85bfc 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -232,6 +232,9 @@ size_t Index::getNofEntityPostings() const { return pimpl_->getNofEntityPostings(); } +// ____________________________________________________________________________ +size_t Index::getNofNonLiterals() const { return pimpl_->getNofNonLiterals(); } + // ____________________________________________________________________________ Index::NumNormalAndInternal Index::numDistinctSubjects() const { return pimpl_->numDistinctSubjects(); diff --git a/src/index/Index.h b/src/index/Index.h index ec408f15df..1fac924aca 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -214,6 +214,7 @@ class Index { size_t getNofTextRecords() const; size_t getNofWordPostings() const; size_t getNofEntityPostings() const; + size_t getNofNonLiterals() const; NumNormalAndInternal numDistinctSubjects() const; NumNormalAndInternal numDistinctObjects() const; diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp index e3b9457d2d..49531383a4 100644 --- a/src/index/IndexImpl.Text.cpp +++ b/src/index/IndexImpl.Text.cpp @@ -65,7 +65,7 @@ cppcoro::generator IndexImpl::wordsInTextRecords( if (!isLiteral(text)) { continue; } - ContextFileParser::Line entityLine{text, true, contextId, 1}; + ContextFileParser::Line entityLine{text, true, contextId, 1, true}; co_yield entityLine; std::string_view textView = text; textView = textView.substr(0, textView.rfind('"')); @@ -239,6 +239,7 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, size_t nofWordPostings = 0; size_t nofEntityPostings = 0; size_t entityNotFoundErrorMsgCount = 0; + size_t nofLiterals = 0; for (auto line : wordsInTextRecords(contextFile, addWordsFromLiterals)) { if (line._contextId != currentContext) { @@ -258,6 +259,9 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, // Note that `entitiesInContext` is a HashMap, so the `Id`s don't have // to be contiguous. entitiesInContext[Id::makeFromVocabIndex(eid)] += line._score; + if (line._isLiteralEntity) { + ++nofLiterals; + } } else { if (entityNotFoundErrorMsgCount < 20) { LOG(WARN) << "Entity from text not in KB: " << line._word << '\n'; @@ -294,6 +298,7 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, textMeta_.setNofTextRecords(nofContexts); textMeta_.setNofWordPostings(nofWordPostings); textMeta_.setNofEntityPostings(nofEntityPostings); + textMeta_.setNofNonLiterals(nofContexts - nofLiterals); writer.finish(); LOG(TRACE) << "END IndexImpl::passContextFileIntoVector" << std::endl; @@ -415,7 +420,7 @@ ContextListMetaData IndexImpl::writePostings(ad_utility::File& out, size_t n = 0; - WordToCodeMap wordCodeMap; + WordCodeMap wordCodeMap; WordCodebook wordCodebook; ScoreCodeMap scoreCodeMap; ScoreCodebook scoreCodebook; @@ -646,10 +651,11 @@ size_t IndexImpl::writeList(Numeric* data, size_t nofElements, // _____________________________________________________________________________ void IndexImpl::createCodebooks(const vector& postings, - IndexImpl::WordToCodeMap& wordCodemap, + IndexImpl::WordCodeMap& wordCodemap, IndexImpl::WordCodebook& wordCodebook, IndexImpl::ScoreCodeMap& scoreCodemap, IndexImpl::ScoreCodebook& scoreCodebook) const { + // There should be a more efficient way to do this (Felix Meisen) ad_utility::HashMap wfMap; ad_utility::HashMap sfMap; for (const auto& p : postings) { diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 0d5b396ccc..6a350a4a6f 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -424,6 +424,7 @@ class IndexImpl { size_t getNofEntityPostings() const { return textMeta_.getNofEntityPostings(); } + size_t getNofNonLiterals() const { return textMeta_.getNofNonLiterals(); } bool hasAllPermutations() const { return SPO().isLoaded(); } @@ -624,14 +625,17 @@ class IndexImpl { ad_utility::File& file) const; // TODO understand what the "codes" are, are they better just ints? - typedef ad_utility::HashMap WordToCodeMap; + // After using createCodebooks on these types, the lowest codes refer to the + // most frequent WordIndex/Score. The maps are mapping those codes to their + // respective frequency. + typedef ad_utility::HashMap WordCodeMap; typedef ad_utility::HashMap ScoreCodeMap; typedef vector WordCodebook; typedef vector ScoreCodebook; //! Creates codebooks for lists that are supposed to be entropy encoded. void createCodebooks(const vector& postings, - WordToCodeMap& wordCodemap, WordCodebook& wordCodebook, + WordCodeMap& wordCodemap, WordCodebook& wordCodebook, ScoreCodeMap& scoreCodemap, ScoreCodebook& scoreCodebook) const; diff --git a/src/index/TextMetaData.h b/src/index/TextMetaData.h index 30fda07921..2198e052e4 100644 --- a/src/index/TextMetaData.h +++ b/src/index/TextMetaData.h @@ -98,6 +98,10 @@ class TextMetaData { void setNofEntityPostings(size_t n) { _nofEntityPostings = n; } + size_t getNofNonLiterals() const { return _nofNonLiterals; } + + void setNofNonLiterals(size_t n) { _nofNonLiterals = n; } + const string& getName() const { return _name; } void setName(const string& name) { _name = name; } @@ -109,6 +113,7 @@ class TextMetaData { size_t _nofTextRecords = 0; size_t _nofWordPostings = 0; size_t _nofEntityPostings = 0; + size_t _nofNonLiterals = 0; string _name; vector _blocks; @@ -118,6 +123,7 @@ class TextMetaData { serializer | arg._nofTextRecords; serializer | arg._nofWordPostings; serializer | arg._nofEntityPostings; + serializer | arg._nofNonLiterals; serializer | arg._name; serializer | arg._blocks; } diff --git a/src/parser/ContextFileParser.h b/src/parser/ContextFileParser.h index e00a268d24..ba8d7bac9c 100644 --- a/src/parser/ContextFileParser.h +++ b/src/parser/ContextFileParser.h @@ -21,6 +21,7 @@ class ContextFileParser { bool _isEntity; TextRecordIndex _contextId; Score _score; + bool _isLiteralEntity = false; }; explicit ContextFileParser(const string& contextFile, diff --git a/test/engine/TextIndexScanForWordTest.cpp b/test/engine/TextIndexScanForWordTest.cpp index 588a549d98..e421a8ca96 100644 --- a/test/engine/TextIndexScanForWordTest.cpp +++ b/test/engine/TextIndexScanForWordTest.cpp @@ -22,10 +22,52 @@ namespace { std::string kg = "

\"he failed the test\" .

\"testing can help\" .

" "\"some other sentence\" .

\"the test on friday was really hard\" " - ". . ."; + ". . . ."; TEST(TextIndexScanForWord, WordScanPrefix) { - auto qec = getQec(kg, true, true, true, 16_B, true); + std::string wordsFileContent; + wordsFileContent = + "astronomer\t0\t1\t1\n" + "\t1\t1\t0\n" + "scientist\t0\t1\t1\n" + "field\t0\t1\t1\n" + "astronomy\t0\t1\t1\n" + "astronomer\t0\t2\t0\n" + "\t1\t2\t0\n" + ":s:firstsentence\t0\t2\t0\n" + "scientist\t0\t2\t0\n" + "field\t0\t2\t0\n" + "astronomy\t0\t2\t0\n" + "astronomy\t0\t3\t1\n" + "concentrates\t0\t3\t1\n" + "studies\t0\t3\t1\n" + "specific\t0\t3\t1\n" + "question\t0\t3\t1\n" + "outside\t0\t3\t1\n" + "scope\t0\t3\t1\n" + "earth\t0\t3\t1\n" + "astronomy\t0\t4\t1\n" + "concentrates\t0\t4\t1\n" + "studies\t0\t4\t1\n" + "field\t0\t4\t1\n" + "outside\t0\t4\t1\n" + "scope\t0\t4\t1\n" + "earth\t0\t4\t1\n"; + std::string docsFileContent; + docsFileContent = + "4\tAn astronomer is a scientist in the field of astronomy who " + "concentrates their studies on a specific question or field outside of " + "the scope of Earth.\n"; + auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, + docsFileContent); + + TextIndexScanForWord t1{qec, Variable{"?t1"}, "astronom*"}; + auto tresult = t1.computeResultOnlyForTesting(); + ASSERT_EQ( + "An astronomer is a scientist in the field of astronomy who concentrates " + "their studies on a specific question or field outside of the scope of " + "Earth.", + h::getTextExcerptFromResultTable(qec, tresult, 0)); TextIndexScanForWord s1{qec, Variable{"?text1"}, "test*"}; TextIndexScanForWord s2{qec, Variable{"?text2"}, "test*"}; diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index 597344ad9e..4b216d522b 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -8,11 +8,26 @@ namespace textIndexScanTestHelpers { // NOTE: this function exploits a "lucky accident" that allows us to // obtain the textRecord using indexToString. // TODO: Implement a more elegant/stable version +// Idea for a more stable version is to add the literals to the docsfile +// which is later parsed and written to the docsDB. This would lead to a +// possible retrieval of the literals text with the getTextExcerpt function. +// The only problem is the increased size of the docsDB and the double saving +// of the literals. inline string getTextRecordFromResultTable(const QueryExecutionContext* qec, const ProtoResult& result, const size_t& rowIndex) { - return qec->getIndex().indexToString( - result.idTable().getColumn(0)[rowIndex].getVocabIndex()); + uint64_t offset = qec->getIndex().getNofNonLiterals(); + uint64_t shiftedTextRecordId = + result.idTable().getColumn(0)[rowIndex].getTextRecordIndex().get() - + offset; + return qec->getIndex().indexToString(VocabIndex::make(shiftedTextRecordId)); +} + +inline string getTextExcerptFromResultTable(const QueryExecutionContext* qec, + const ProtoResult& result, + const size_t& rowIndex) { + return qec->getIndex().getTextExcerpt( + result.idTable().getColumn(0)[rowIndex].getTextRecordIndex()); } inline string getEntityFromResultTable(const QueryExecutionContext* qec, diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp index 92d665a491..72d1016b50 100644 --- a/test/util/IndexTestHelpers.cpp +++ b/test/util/IndexTestHelpers.cpp @@ -46,7 +46,12 @@ std::vector getAllIndexFilenames( indexBasename + ".prefixes", indexBasename + ".vocabulary.internal", indexBasename + ".vocabulary.external", - indexBasename + ".vocabulary.external.offsets"}; + indexBasename + ".vocabulary.external.offsets", + indexBasename + ".wordsfile", + indexBasename + ".docsfile", + indexBasename + ".text.index", + indexBasename + ".text.vocabulary", + indexBasename + ".text.docsDB"}; } namespace { @@ -134,7 +139,9 @@ Index makeTestIndex(const std::string& indexBasename, bool loadAllPermutations, bool usePatterns, [[maybe_unused]] bool usePrefixCompression, ad_utility::MemorySize blocksizePermutations, - bool createTextIndex) { + bool createTextIndex, bool addWordsFromLiterals, + std::optional wordsFileContent, + std::optional docsFileContent) { // Ignore the (irrelevant) log output of the index building and loading during // these tests. static std::ostringstream ignoreLogStream; @@ -181,7 +188,29 @@ Index makeTestIndex(const std::string& indexBasename, std::nullopt}; index.createFromFiles({spec}); if (createTextIndex) { - index.addTextFromContextFile("", true); + if (wordsFileContent.has_value() && docsFileContent.has_value()) { + // Create and write to words- and docsfile to later build a full text + // index from them + ad_utility::File wordsFile(indexBasename + ".wordsfile", "w"); + ad_utility::File docsFile(indexBasename + ".docsfile", "w"); + wordsFile.write(wordsFileContent.value().c_str(), + wordsFileContent.value().size()); + docsFile.write(docsFileContent.value().c_str(), + docsFileContent.value().size()); + wordsFile.close(); + docsFile.close(); + index.setKbName(indexBasename); + index.setTextName(indexBasename); + index.setOnDiskBase(indexBasename); + if (addWordsFromLiterals) { + index.addTextFromContextFile(indexBasename + ".wordsfile", true); + } else { + index.addTextFromContextFile(indexBasename + ".wordsfile", false); + } + index.buildDocsDB(indexBasename + ".docsfile"); + } else if (addWordsFromLiterals) { + index.addTextFromContextFile("", true); + } } } if (!usePatterns || !loadAllPermutations) { @@ -216,7 +245,9 @@ QueryExecutionContext* getQec(std::optional turtleInput, bool loadAllPermutations, bool usePatterns, bool usePrefixCompression, ad_utility::MemorySize blocksizePermutations, - bool createTextIndex) { + bool createTextIndex, bool addWordsFromLiterals, + std::optional wordsFileContent, + std::optional docsFileContent) { // Similar to `absl::Cleanup`. Calls the `callback_` in the destructor, but // the callback is stored as a `std::function`, which allows to store // different types of callbacks in the same wrapper type. @@ -263,20 +294,22 @@ QueryExecutionContext* getQec(std::optional turtleInput, std::string testIndexBasename = "_staticGlobalTestIndex" + std::to_string(contextMap.size()); contextMap.emplace( - key, Context{TypeErasedCleanup{[testIndexBasename]() { - for (const std::string& indexFilename : - getAllIndexFilenames(testIndexBasename)) { - // Don't log when a file can't be deleted, - // because the logging might already be - // destroyed. - ad_utility::deleteFile(indexFilename, false); - } - }}, - std::make_unique(makeTestIndex( - testIndexBasename, turtleInput, loadAllPermutations, - usePatterns, usePrefixCompression, - blocksizePermutations, createTextIndex)), - std::make_unique()}); + key, + Context{TypeErasedCleanup{[testIndexBasename]() { + for (const std::string& indexFilename : + getAllIndexFilenames(testIndexBasename)) { + // Don't log when a file can't be deleted, + // because the logging might already be + // destroyed. + ad_utility::deleteFile(indexFilename, false); + } + }}, + std::make_unique(makeTestIndex( + testIndexBasename, turtleInput, loadAllPermutations, + usePatterns, usePrefixCompression, blocksizePermutations, + createTextIndex, addWordsFromLiterals, wordsFileContent, + docsFileContent)), + std::make_unique()}); } auto* qec = contextMap.at(key).qec_.get(); qec->getIndex().getImpl().setGlobalIndexAndComparatorOnlyForTesting(); diff --git a/test/util/IndexTestHelpers.h b/test/util/IndexTestHelpers.h index 3e09604613..6bbe0c9195 100644 --- a/test/util/IndexTestHelpers.h +++ b/test/util/IndexTestHelpers.h @@ -44,7 +44,10 @@ Index makeTestIndex(const std::string& indexBasename, bool loadAllPermutations = true, bool usePatterns = true, bool usePrefixCompression = true, ad_utility::MemorySize blocksizePermutations = 16_B, - bool createTextIndex = false); + bool createTextIndex = false, + bool addWordsFromLiterals = true, + std::optional wordsFileContent = std::nullopt, + std::optional docsFileContent = std::nullopt); // Return a static `QueryExecutionContext` that refers to an index that was // build using `makeTestIndex` (see above). The index (most notably its @@ -55,7 +58,9 @@ QueryExecutionContext* getQec( bool loadAllPermutations = true, bool usePatterns = true, bool usePrefixCompression = true, ad_utility::MemorySize blocksizePermutations = 16_B, - bool createTextIndex = false); + bool createTextIndex = false, bool addWordsFromLiterals = true, + std::optional wordsFileContent = std::nullopt, + std::optional docsFileContent = std::nullopt); // Return a lambda that takes a string and converts it into an ID by looking // it up in the vocabulary of `index`. An `AD_CONTRACT_CHECK` will fail if the From fdba417bae1e61d5e039589ca732e9e75d23800b Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Wed, 4 Dec 2024 15:07:51 +0100 Subject: [PATCH 26/38] Changed the counting of nofNonLiterals to nofLiterals. Some methods are still unstable because of the way nofContexts are counted. Implemented new more refined tests. --- src/index/Index.cpp | 2 +- src/index/Index.h | 2 +- src/index/IndexImpl.Text.cpp | 3 +- src/index/IndexImpl.h | 2 +- src/index/TextMetaData.h | 8 +- test/engine/TextIndexScanForWordTest.cpp | 154 ++++++++++++++++------- test/engine/TextIndexScanTestHelpers.h | 28 +++-- 7 files changed, 135 insertions(+), 64 deletions(-) diff --git a/src/index/Index.cpp b/src/index/Index.cpp index a652b85bfc..c8d1b1b40f 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -233,7 +233,7 @@ size_t Index::getNofEntityPostings() const { } // ____________________________________________________________________________ -size_t Index::getNofNonLiterals() const { return pimpl_->getNofNonLiterals(); } +size_t Index::getNofLiterals() const { return pimpl_->getNofLiterals(); } // ____________________________________________________________________________ Index::NumNormalAndInternal Index::numDistinctSubjects() const { diff --git a/src/index/Index.h b/src/index/Index.h index 1fac924aca..d43d363d7d 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -214,7 +214,7 @@ class Index { size_t getNofTextRecords() const; size_t getNofWordPostings() const; size_t getNofEntityPostings() const; - size_t getNofNonLiterals() const; + size_t getNofLiterals() const; NumNormalAndInternal numDistinctSubjects() const; NumNormalAndInternal numDistinctObjects() const; diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp index 49531383a4..4a405b691e 100644 --- a/src/index/IndexImpl.Text.cpp +++ b/src/index/IndexImpl.Text.cpp @@ -235,6 +235,7 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, ad_utility::HashMap wordsInContext; ad_utility::HashMap entitiesInContext; auto currentContext = TextRecordIndex::make(0); + // The nofContexts can be misleading since it also counts empty contexts size_t nofContexts = 0; size_t nofWordPostings = 0; size_t nofEntityPostings = 0; @@ -298,7 +299,7 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, textMeta_.setNofTextRecords(nofContexts); textMeta_.setNofWordPostings(nofWordPostings); textMeta_.setNofEntityPostings(nofEntityPostings); - textMeta_.setNofNonLiterals(nofContexts - nofLiterals); + textMeta_.setNofLiterals(nofLiterals); writer.finish(); LOG(TRACE) << "END IndexImpl::passContextFileIntoVector" << std::endl; diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 6a350a4a6f..71b59654db 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -424,7 +424,7 @@ class IndexImpl { size_t getNofEntityPostings() const { return textMeta_.getNofEntityPostings(); } - size_t getNofNonLiterals() const { return textMeta_.getNofNonLiterals(); } + size_t getNofLiterals() const { return textMeta_.getNofLiterals(); } bool hasAllPermutations() const { return SPO().isLoaded(); } diff --git a/src/index/TextMetaData.h b/src/index/TextMetaData.h index 2198e052e4..2d45ce28d2 100644 --- a/src/index/TextMetaData.h +++ b/src/index/TextMetaData.h @@ -98,9 +98,9 @@ class TextMetaData { void setNofEntityPostings(size_t n) { _nofEntityPostings = n; } - size_t getNofNonLiterals() const { return _nofNonLiterals; } + size_t getNofLiterals() const { return _nofLiterals; } - void setNofNonLiterals(size_t n) { _nofNonLiterals = n; } + void setNofLiterals(size_t n) { _nofLiterals = n; } const string& getName() const { return _name; } @@ -113,7 +113,7 @@ class TextMetaData { size_t _nofTextRecords = 0; size_t _nofWordPostings = 0; size_t _nofEntityPostings = 0; - size_t _nofNonLiterals = 0; + size_t _nofLiterals = 0; string _name; vector _blocks; @@ -123,7 +123,7 @@ class TextMetaData { serializer | arg._nofTextRecords; serializer | arg._nofWordPostings; serializer | arg._nofEntityPostings; - serializer | arg._nofNonLiterals; + serializer | arg._nofLiterals; serializer | arg._name; serializer | arg._blocks; } diff --git a/test/engine/TextIndexScanForWordTest.cpp b/test/engine/TextIndexScanForWordTest.cpp index e421a8ca96..f3062b6ee1 100644 --- a/test/engine/TextIndexScanForWordTest.cpp +++ b/test/engine/TextIndexScanForWordTest.cpp @@ -24,50 +24,95 @@ std::string kg = "\"some other sentence\" .

\"the test on friday was really hard\" " ". . . ."; +std::string wordsFileContent = + "astronomer\t0\t1\t1\n" + "\t1\t1\t0\n" + "scientist\t0\t1\t1\n" + "field\t0\t1\t1\n" + "astronomy\t0\t1\t1\n" + "astronomer\t0\t2\t0\n" + "\t1\t2\t0\n" + ":s:firstsentence\t0\t2\t0\n" + "scientist\t0\t2\t0\n" + "field\t0\t2\t0\n" + "astronomy\t0\t2\t0\n" + "astronomy\t0\t3\t1\n" + "concentrates\t0\t3\t1\n" + "studies\t0\t3\t1\n" + "specific\t0\t3\t1\n" + "question\t0\t3\t1\n" + "outside\t0\t3\t1\n" + "scope\t0\t3\t1\n" + "earth\t0\t3\t1\n" + "astronomy\t0\t4\t1\n" + "concentrates\t0\t4\t1\n" + "studies\t0\t4\t1\n" + "field\t0\t4\t1\n" + "outside\t0\t4\t1\n" + "scope\t0\t4\t1\n" + "earth\t0\t4\t1\n" + "tester\t0\t5\t1\n" + "rockets\t0\t5\t1\n" + "astronomer\t0\t5\t1\n" + "\t1\t5\t0\n" + "although\t0\t5\t1\n" + "astronomer\t0\t6\t0\n" + "\t1\t6\t0\n" + "although\t0\t6\t0\n" + "\t1\t6\t0\n" + "space\t0\t6\t1\n" + "\t1\t7\t0\n" + "space\t0\t7\t0\n" + "earth\t0\t7\t1\n"; + +std::string docsFileContent = + "4\tAn astronomer is a scientist in the field of astronomy who " + "concentrates their studies on a specific question or field outside of " + "the scope of Earth.\n" + "7\tThe Tester of the rockets can be an astronomer too although they " + "might not be in space but on earth.\n"; + +std::string firstDocText = + "An astronomer is a scientist in the field of " + "astronomy who concentrates their studies on a " + "specific question or field outside of the scope of " + "Earth."; + +std::string secondDocText = + "The Tester of the rockets can be an astronomer " + "too although they might not be in space but on " + "earth."; + TEST(TextIndexScanForWord, WordScanPrefix) { - std::string wordsFileContent; - wordsFileContent = - "astronomer\t0\t1\t1\n" - "\t1\t1\t0\n" - "scientist\t0\t1\t1\n" - "field\t0\t1\t1\n" - "astronomy\t0\t1\t1\n" - "astronomer\t0\t2\t0\n" - "\t1\t2\t0\n" - ":s:firstsentence\t0\t2\t0\n" - "scientist\t0\t2\t0\n" - "field\t0\t2\t0\n" - "astronomy\t0\t2\t0\n" - "astronomy\t0\t3\t1\n" - "concentrates\t0\t3\t1\n" - "studies\t0\t3\t1\n" - "specific\t0\t3\t1\n" - "question\t0\t3\t1\n" - "outside\t0\t3\t1\n" - "scope\t0\t3\t1\n" - "earth\t0\t3\t1\n" - "astronomy\t0\t4\t1\n" - "concentrates\t0\t4\t1\n" - "studies\t0\t4\t1\n" - "field\t0\t4\t1\n" - "outside\t0\t4\t1\n" - "scope\t0\t4\t1\n" - "earth\t0\t4\t1\n"; - std::string docsFileContent; - docsFileContent = - "4\tAn astronomer is a scientist in the field of astronomy who " - "concentrates their studies on a specific question or field outside of " - "the scope of Earth.\n"; auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, docsFileContent); TextIndexScanForWord t1{qec, Variable{"?t1"}, "astronom*"}; auto tresult = t1.computeResultOnlyForTesting(); - ASSERT_EQ( - "An astronomer is a scientist in the field of astronomy who concentrates " - "their studies on a specific question or field outside of the scope of " - "Earth.", - h::getTextExcerptFromResultTable(qec, tresult, 0)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 0)); + ASSERT_EQ(TextRecordIndex::make(1), + h::getTextRecordIdFromResultTable(qec, tresult, 0)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 1)); + ASSERT_EQ(TextRecordIndex::make(1), + h::getTextRecordIdFromResultTable(qec, tresult, 1)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 2)); + ASSERT_EQ(TextRecordIndex::make(2), + h::getTextRecordIdFromResultTable(qec, tresult, 2)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 3)); + ASSERT_EQ(TextRecordIndex::make(2), + h::getTextRecordIdFromResultTable(qec, tresult, 3)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 4)); + ASSERT_EQ(TextRecordIndex::make(3), + h::getTextRecordIdFromResultTable(qec, tresult, 4)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 5)); + ASSERT_EQ(TextRecordIndex::make(4), + h::getTextRecordIdFromResultTable(qec, tresult, 5)); + ASSERT_EQ(secondDocText, h::getTextRecordFromResultTable(qec, tresult, 6)); + ASSERT_EQ(TextRecordIndex::make(5), + h::getTextRecordIdFromResultTable(qec, tresult, 6)); + ASSERT_EQ(secondDocText, h::getTextRecordFromResultTable(qec, tresult, 7)); + ASSERT_EQ(TextRecordIndex::make(6), + h::getTextRecordIdFromResultTable(qec, tresult, 7)); TextIndexScanForWord s1{qec, Variable{"?text1"}, "test*"}; TextIndexScanForWord s2{qec, Variable{"?text2"}, "test*"}; @@ -76,7 +121,7 @@ TEST(TextIndexScanForWord, WordScanPrefix) { auto result = s1.computeResultOnlyForTesting(); ASSERT_EQ(result.idTable().numColumns(), 3); - ASSERT_EQ(result.idTable().size(), 3); + ASSERT_EQ(result.idTable().size(), 4); s2.getExternallyVisibleVariableColumns(); using enum ColumnIndexAndTypeInfo::UndefStatus; @@ -87,20 +132,25 @@ TEST(TextIndexScanForWord, WordScanPrefix) { EXPECT_THAT(s2.getExternallyVisibleVariableColumns(), ::testing::UnorderedElementsAreArray(expectedVariables)); - ASSERT_EQ(h::combineToString("\"he failed the test\"", "test"), + ASSERT_EQ(h::combineToString(secondDocText, "tester"), h::combineToString(h::getTextRecordFromResultTable(qec, result, 0), h::getWordFromResultTable(qec, result, 0))); - ASSERT_EQ(h::combineToString("\"testing can help\"", "testing"), + + ASSERT_EQ(h::combineToString("\"he failed the test\"", "test"), h::combineToString(h::getTextRecordFromResultTable(qec, result, 1), h::getWordFromResultTable(qec, result, 1))); + ASSERT_EQ(h::combineToString("\"testing can help\"", "testing"), + h::combineToString(h::getTextRecordFromResultTable(qec, result, 2), + h::getWordFromResultTable(qec, result, 2))); ASSERT_EQ( h::combineToString("\"the test on friday was really hard\"", "test"), - h::combineToString(h::getTextRecordFromResultTable(qec, result, 2), - h::getWordFromResultTable(qec, result, 2))); + h::combineToString(h::getTextRecordFromResultTable(qec, result, 3), + h::getWordFromResultTable(qec, result, 3))); } TEST(TextIndexScanForWord, WordScanBasic) { - auto qec = getQec(kg, true, true, true, 16_B, true); + auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, + docsFileContent); TextIndexScanForWord s1{qec, Variable{"?text1"}, "test"}; @@ -125,10 +175,21 @@ TEST(TextIndexScanForWord, WordScanBasic) { ASSERT_EQ("\"testing can help\"", h::getTextRecordFromResultTable(qec, result, 0)); + + TextIndexScanForWord s3{qec, Variable{"?text1"}, "tester"}; + + ASSERT_EQ(s3.getResultWidth(), 2); + + result = s3.computeResultOnlyForTesting(); + ASSERT_EQ(result.idTable().numColumns(), 2); + ASSERT_EQ(result.idTable().size(), 1); + + ASSERT_EQ(secondDocText, h::getTextRecordFromResultTable(qec, result, 0)); } TEST(TextIndexScanForWord, CacheKey) { - auto qec = getQec(kg, true, true, true, 16_B, true); + auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, + docsFileContent); TextIndexScanForWord s1{qec, Variable{"?text1"}, "test*"}; TextIndexScanForWord s2{qec, Variable{"?text2"}, "test*"}; @@ -151,7 +212,8 @@ TEST(TextIndexScanForWord, CacheKey) { } TEST(TextIndexScanForWord, KnownEmpty) { - auto qec = getQec(kg, true, true, true, 16_B, true); + auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, + docsFileContent); TextIndexScanForWord s1{qec, Variable{"?text1"}, "nonExistentWord*"}; ASSERT_TRUE(s1.knownEmptyResult()); diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index 4b216d522b..c91a02a87c 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -4,6 +4,7 @@ #pragma once +#include "global/IndexTypes.h" namespace textIndexScanTestHelpers { // NOTE: this function exploits a "lucky accident" that allows us to // obtain the textRecord using indexToString. @@ -16,18 +17,25 @@ namespace textIndexScanTestHelpers { inline string getTextRecordFromResultTable(const QueryExecutionContext* qec, const ProtoResult& result, const size_t& rowIndex) { - uint64_t offset = qec->getIndex().getNofNonLiterals(); - uint64_t shiftedTextRecordId = - result.idTable().getColumn(0)[rowIndex].getTextRecordIndex().get() - - offset; - return qec->getIndex().indexToString(VocabIndex::make(shiftedTextRecordId)); + uint64_t nofLiterals = qec->getIndex().getNofLiterals(); + uint64_t nofContexts = qec->getIndex().getNofTextRecords(); + uint64_t textRecordIdFromTable = + result.idTable().getColumn(0)[rowIndex].getTextRecordIndex().get(); + if ((nofContexts - nofLiterals) <= textRecordIdFromTable) { + // Return when from Literals + return qec->getIndex().indexToString( + VocabIndex::make(textRecordIdFromTable - (nofContexts - nofLiterals))); + } else { + // Return when from DocsDB + return qec->getIndex().getTextExcerpt( + result.idTable().getColumn(0)[rowIndex].getTextRecordIndex()); + } } -inline string getTextExcerptFromResultTable(const QueryExecutionContext* qec, - const ProtoResult& result, - const size_t& rowIndex) { - return qec->getIndex().getTextExcerpt( - result.idTable().getColumn(0)[rowIndex].getTextRecordIndex()); +inline const TextRecordIndex getTextRecordIdFromResultTable( + [[maybe_unused]] const QueryExecutionContext* qec, + const ProtoResult& result, const size_t& rowIndex) { + return result.idTable().getColumn(0)[rowIndex].getTextRecordIndex(); } inline string getEntityFromResultTable(const QueryExecutionContext* qec, From 6686325f65ddb7d5bf09fa5cc4a7b39cfdc10731 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 5 Dec 2024 11:59:43 +0100 Subject: [PATCH 27/38] renamed nofLiterals to nofLiteralsInTextIndex --- src/index/Index.cpp | 4 +++- src/index/Index.h | 2 +- src/index/IndexImpl.Text.cpp | 2 +- src/index/IndexImpl.h | 4 +++- src/index/TextMetaData.h | 8 ++++---- test/engine/TextIndexScanTestHelpers.h | 2 +- 6 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/index/Index.cpp b/src/index/Index.cpp index c8d1b1b40f..fe11bf55f7 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -233,7 +233,9 @@ size_t Index::getNofEntityPostings() const { } // ____________________________________________________________________________ -size_t Index::getNofLiterals() const { return pimpl_->getNofLiterals(); } +size_t Index::getNofLiteralsInTextIndex() const { + return pimpl_->getNofLiteralsInTextIndex(); +} // ____________________________________________________________________________ Index::NumNormalAndInternal Index::numDistinctSubjects() const { diff --git a/src/index/Index.h b/src/index/Index.h index d43d363d7d..0288b15408 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -214,7 +214,7 @@ class Index { size_t getNofTextRecords() const; size_t getNofWordPostings() const; size_t getNofEntityPostings() const; - size_t getNofLiterals() const; + size_t getNofLiteralsInTextIndex() const; NumNormalAndInternal numDistinctSubjects() const; NumNormalAndInternal numDistinctObjects() const; diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp index 4a405b691e..c844fc8f18 100644 --- a/src/index/IndexImpl.Text.cpp +++ b/src/index/IndexImpl.Text.cpp @@ -299,7 +299,7 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, textMeta_.setNofTextRecords(nofContexts); textMeta_.setNofWordPostings(nofWordPostings); textMeta_.setNofEntityPostings(nofEntityPostings); - textMeta_.setNofLiterals(nofLiterals); + textMeta_.setNofLiteralsInTextIndex(nofLiterals); writer.finish(); LOG(TRACE) << "END IndexImpl::passContextFileIntoVector" << std::endl; diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 71b59654db..d12619d6df 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -424,7 +424,9 @@ class IndexImpl { size_t getNofEntityPostings() const { return textMeta_.getNofEntityPostings(); } - size_t getNofLiterals() const { return textMeta_.getNofLiterals(); } + size_t getNofLiteralsInTextIndex() const { + return textMeta_.getNofLiteralsInTextIndex(); + } bool hasAllPermutations() const { return SPO().isLoaded(); } diff --git a/src/index/TextMetaData.h b/src/index/TextMetaData.h index 2d45ce28d2..b15b5e9a96 100644 --- a/src/index/TextMetaData.h +++ b/src/index/TextMetaData.h @@ -98,9 +98,9 @@ class TextMetaData { void setNofEntityPostings(size_t n) { _nofEntityPostings = n; } - size_t getNofLiterals() const { return _nofLiterals; } + size_t getNofLiteralsInTextIndex() const { return _nofLiteralsInTextIndex; } - void setNofLiterals(size_t n) { _nofLiterals = n; } + void setNofLiteralsInTextIndex(size_t n) { _nofLiteralsInTextIndex = n; } const string& getName() const { return _name; } @@ -113,7 +113,7 @@ class TextMetaData { size_t _nofTextRecords = 0; size_t _nofWordPostings = 0; size_t _nofEntityPostings = 0; - size_t _nofLiterals = 0; + size_t _nofLiteralsInTextIndex = 0; string _name; vector _blocks; @@ -123,7 +123,7 @@ class TextMetaData { serializer | arg._nofTextRecords; serializer | arg._nofWordPostings; serializer | arg._nofEntityPostings; - serializer | arg._nofLiterals; + serializer | arg._nofLiteralsInTextIndex; serializer | arg._name; serializer | arg._blocks; } diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index c91a02a87c..80c9475608 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -17,7 +17,7 @@ namespace textIndexScanTestHelpers { inline string getTextRecordFromResultTable(const QueryExecutionContext* qec, const ProtoResult& result, const size_t& rowIndex) { - uint64_t nofLiterals = qec->getIndex().getNofLiterals(); + uint64_t nofLiterals = qec->getIndex().getNofLiteralsInTextIndex(); uint64_t nofContexts = qec->getIndex().getNofTextRecords(); uint64_t textRecordIdFromTable = result.idTable().getColumn(0)[rowIndex].getTextRecordIndex().get(); From 0faf3d03fad71efce08c3111223bd0652cf72b79 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 5 Dec 2024 12:18:32 +0100 Subject: [PATCH 28/38] Removed redundant method getWordScoreVariable --- src/parser/data/Variable.cpp | 25 ------------------------- src/parser/data/Variable.h | 4 ---- 2 files changed, 29 deletions(-) diff --git a/src/parser/data/Variable.cpp b/src/parser/data/Variable.cpp index 5195e48f66..cd41fb3b42 100644 --- a/src/parser/data/Variable.cpp +++ b/src/parser/data/Variable.cpp @@ -79,31 +79,6 @@ Variable Variable::getEntityScoreVariable( absl::StrCat(SCORE_VARIABLE_PREFIX, name().substr(1), type, entity)}; } -// _____________________________________________________________________________ -Variable Variable::getWordScoreVariable(const std::string& word, - bool isPrefix) const { - std::string_view type; - std::string_view wordToConvert; - std::string convertedWord; - if (isPrefix) { - wordToConvert = std::string_view(word.data(), word.size() - 1); - type = "prefix_"; - } else { - wordToConvert = std::string_view(word); - type = "word_"; - } - convertedWord += "_"; - for (char c : wordToConvert) { - if (isalpha(static_cast(c))) { - convertedWord += c; - } else { - absl::StrAppend(&convertedWord, "_", std::to_string(c), "_"); - } - } - return Variable{absl::StrCat(SCORE_VARIABLE_PREFIX, type, name().substr(1), - convertedWord)}; -} - // _____________________________________________________________________________ Variable Variable::getWordScoreVariable(std::string_view word, bool isPrefix) const { diff --git a/src/parser/data/Variable.h b/src/parser/data/Variable.h index fb46abd384..e3ef49136b 100644 --- a/src/parser/data/Variable.h +++ b/src/parser/data/Variable.h @@ -57,10 +57,6 @@ class Variable { // `?ql_score_prefix_someTextVar_somePrefix`. // Note that if the word contains non ascii characters they are converted to // numbers and escaped. - Variable getWordScoreVariable(const std::string& word, bool isPrefix) const; - - // Does the same thing as the function with std::string& param only for - // std::string_view Variable getWordScoreVariable(std::string_view word, bool isPrefix) const; // Convert `?someVariable` into `?ql_matchingword_someVariable_someTerm` From eafd594df9381b596c9c49c4b236d0bb1e2c15b9 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 5 Dec 2024 12:49:12 +0100 Subject: [PATCH 29/38] added method appendEscapedWord to escape special chars in Variables --- src/parser/data/Variable.cpp | 30 ++++++++++++++---------------- src/parser/data/Variable.h | 4 ++++ 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/parser/data/Variable.cpp b/src/parser/data/Variable.cpp index cd41fb3b42..00371c3537 100644 --- a/src/parser/data/Variable.cpp +++ b/src/parser/data/Variable.cpp @@ -65,15 +65,7 @@ Variable Variable::getEntityScoreVariable( entity = std::get(varOrEntity).name().substr(1); } else { type = "_fixedEntity_"; - // Converts input string to unambiguous result string not containing any - // special characters. "_" is used as an escaping character. - for (char c : std::get(varOrEntity)) { - if (isalpha(static_cast(c))) { - entity += c; - } else { - absl::StrAppend(&entity, "_", std::to_string(c), "_"); - } - } + appendEscapedWord(std::get(varOrEntity), entity); } return Variable{ absl::StrCat(SCORE_VARIABLE_PREFIX, name().substr(1), type, entity)}; @@ -91,13 +83,7 @@ Variable Variable::getWordScoreVariable(std::string_view word, type = "word_"; } convertedWord = "_"; - for (char c : word) { - if (isalpha(static_cast(c))) { - convertedWord += c; - } else { - absl::StrAppend(&convertedWord, "_", std::to_string(c), "_"); - } - } + appendEscapedWord(word, convertedWord); return Variable{absl::StrCat(SCORE_VARIABLE_PREFIX, type, name().substr(1), convertedWord)}; } @@ -119,3 +105,15 @@ bool Variable::isValidVariableName(std::string_view var) { return false; } } + +// _____________________________________________________________________________ +void Variable::appendEscapedWord(std::string_view word, + std::string& target) const { + for (char c : word) { + if (isalpha(static_cast(c))) { + target += c; + } else { + absl::StrAppend(&target, "_", std::to_string(c), "_"); + } + } +} diff --git a/src/parser/data/Variable.h b/src/parser/data/Variable.h index e3ef49136b..5d89d21aac 100644 --- a/src/parser/data/Variable.h +++ b/src/parser/data/Variable.h @@ -80,4 +80,8 @@ class Variable { } static bool isValidVariableName(std::string_view var); + + // The method escapes all special chars in word to "_ASCIICODE_" and appends + // it at the end of target + void appendEscapedWord(std::string_view word, std::string& target) const; }; From fd01a977d4744a4aba3351c27f985483d4ef705b Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 5 Dec 2024 13:57:52 +0100 Subject: [PATCH 30/38] Added two function in the TextIndexScanTestHelpers.h to add content to the wordsFileContent and docsFileContent strings. Now you can clearly see what lines are added and can writing tests is cleaner --- test/engine/TextIndexScanForWordTest.cpp | 89 ++++++++++++------------ test/engine/TextIndexScanTestHelpers.h | 14 ++++ 2 files changed, 57 insertions(+), 46 deletions(-) diff --git a/test/engine/TextIndexScanForWordTest.cpp b/test/engine/TextIndexScanForWordTest.cpp index f3062b6ee1..597e95aa8e 100644 --- a/test/engine/TextIndexScanForWordTest.cpp +++ b/test/engine/TextIndexScanForWordTest.cpp @@ -19,58 +19,52 @@ using ad_utility::source_location; namespace h = textIndexScanTestHelpers; namespace { + std::string kg = "

\"he failed the test\" .

\"testing can help\" .

" "\"some other sentence\" .

\"the test on friday was really hard\" " ". . . ."; std::string wordsFileContent = - "astronomer\t0\t1\t1\n" - "\t1\t1\t0\n" - "scientist\t0\t1\t1\n" - "field\t0\t1\t1\n" - "astronomy\t0\t1\t1\n" - "astronomer\t0\t2\t0\n" - "\t1\t2\t0\n" - ":s:firstsentence\t0\t2\t0\n" - "scientist\t0\t2\t0\n" - "field\t0\t2\t0\n" - "astronomy\t0\t2\t0\n" - "astronomy\t0\t3\t1\n" - "concentrates\t0\t3\t1\n" - "studies\t0\t3\t1\n" - "specific\t0\t3\t1\n" - "question\t0\t3\t1\n" - "outside\t0\t3\t1\n" - "scope\t0\t3\t1\n" - "earth\t0\t3\t1\n" - "astronomy\t0\t4\t1\n" - "concentrates\t0\t4\t1\n" - "studies\t0\t4\t1\n" - "field\t0\t4\t1\n" - "outside\t0\t4\t1\n" - "scope\t0\t4\t1\n" - "earth\t0\t4\t1\n" - "tester\t0\t5\t1\n" - "rockets\t0\t5\t1\n" - "astronomer\t0\t5\t1\n" - "\t1\t5\t0\n" - "although\t0\t5\t1\n" - "astronomer\t0\t6\t0\n" - "\t1\t6\t0\n" - "although\t0\t6\t0\n" - "\t1\t6\t0\n" - "space\t0\t6\t1\n" - "\t1\t7\t0\n" - "space\t0\t7\t0\n" - "earth\t0\t7\t1\n"; - -std::string docsFileContent = - "4\tAn astronomer is a scientist in the field of astronomy who " - "concentrates their studies on a specific question or field outside of " - "the scope of Earth.\n" - "7\tThe Tester of the rockets can be an astronomer too although they " - "might not be in space but on earth.\n"; + h::createWordsFileLine("astronomer", false, 1, 1) + + h::createWordsFileLine("", true, 1, 0) + + h::createWordsFileLine("scientist", false, 1, 1) + + h::createWordsFileLine("field", false, 1, 1) + + h::createWordsFileLine("astronomy", false, 1, 1) + + h::createWordsFileLine("astronomer", false, 2, 0) + + h::createWordsFileLine("", true, 2, 0) + + h::createWordsFileLine(":s:firstsentence", false, 2, 0) + + h::createWordsFileLine("scientist", false, 2, 0) + + h::createWordsFileLine("field", false, 2, 0) + + h::createWordsFileLine("astronomy", false, 2, 0) + + h::createWordsFileLine("astronomy", false, 3, 1) + + h::createWordsFileLine("concentrates", false, 3, 1) + + h::createWordsFileLine("studies", false, 3, 1) + + h::createWordsFileLine("specific", false, 3, 1) + + h::createWordsFileLine("question", false, 3, 1) + + h::createWordsFileLine("outside", false, 3, 1) + + h::createWordsFileLine("scope", false, 3, 1) + + h::createWordsFileLine("earth", false, 3, 1) + + h::createWordsFileLine("astronomy", false, 4, 1) + + h::createWordsFileLine("concentrates", false, 4, 1) + + h::createWordsFileLine("studies", false, 4, 1) + + h::createWordsFileLine("field", false, 4, 1) + + h::createWordsFileLine("outside", false, 4, 1) + + h::createWordsFileLine("scope", false, 4, 1) + + h::createWordsFileLine("earth", false, 4, 1) + + h::createWordsFileLine("tester", false, 5, 1) + + h::createWordsFileLine("rockets", false, 5, 1) + + h::createWordsFileLine("astronomer", false, 5, 1) + + h::createWordsFileLine("", true, 5, 0) + + h::createWordsFileLine("although", false, 5, 1) + + h::createWordsFileLine("astronomer", false, 6, 0) + + h::createWordsFileLine("", true, 6, 0) + + h::createWordsFileLine("although", false, 6, 0) + + h::createWordsFileLine("", true, 6, 0) + + h::createWordsFileLine("space", false, 6, 1) + + h::createWordsFileLine("", true, 7, 0) + + h::createWordsFileLine("space", false, 7, 0) + + h::createWordsFileLine("earth", false, 7, 1); std::string firstDocText = "An astronomer is a scientist in the field of " @@ -83,6 +77,9 @@ std::string secondDocText = "too although they might not be in space but on " "earth."; +std::string docsFileContent = h::createDocsFileLine(4, firstDocText) + + h::createDocsFileLine(7, secondDocText); + TEST(TextIndexScanForWord, WordScanPrefix) { auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, docsFileContent); diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index 80c9475608..6c9d897514 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -57,4 +57,18 @@ inline string combineToString(const string& text, const string& word) { ss << "Text: " << text << ", Word: " << word << std::endl; return ss.str(); } + +std::string inlineSeperator = "\t"; +std::string lineSeperator = "\n"; + +inline string createWordsFileLine(std::string word, bool isEntity, + size_t contextId, size_t score) { + return word + inlineSeperator + (isEntity ? "1" : "0") + inlineSeperator + + std::to_string(contextId) + inlineSeperator + std::to_string(score) + + lineSeperator; +}; + +inline string createDocsFileLine(size_t docId, std::string docContent) { + return std::to_string(docId) + inlineSeperator + docContent + lineSeperator; +}; } // namespace textIndexScanTestHelpers From 65842f4d6d1f277db64c8359bf3895c91ffd9889 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 5 Dec 2024 14:37:29 +0100 Subject: [PATCH 31/38] Added tests for Scores. Also commented tests and refined them --- test/engine/TextIndexScanForWordTest.cpp | 79 ++++++++++++++++-------- test/engine/TextIndexScanTestHelpers.h | 10 +++ 2 files changed, 62 insertions(+), 27 deletions(-) diff --git a/test/engine/TextIndexScanForWordTest.cpp b/test/engine/TextIndexScanForWordTest.cpp index 597e95aa8e..7e9b0c0fd9 100644 --- a/test/engine/TextIndexScanForWordTest.cpp +++ b/test/engine/TextIndexScanForWordTest.cpp @@ -84,36 +84,10 @@ TEST(TextIndexScanForWord, WordScanPrefix) { auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, docsFileContent); - TextIndexScanForWord t1{qec, Variable{"?t1"}, "astronom*"}; - auto tresult = t1.computeResultOnlyForTesting(); - ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 0)); - ASSERT_EQ(TextRecordIndex::make(1), - h::getTextRecordIdFromResultTable(qec, tresult, 0)); - ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 1)); - ASSERT_EQ(TextRecordIndex::make(1), - h::getTextRecordIdFromResultTable(qec, tresult, 1)); - ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 2)); - ASSERT_EQ(TextRecordIndex::make(2), - h::getTextRecordIdFromResultTable(qec, tresult, 2)); - ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 3)); - ASSERT_EQ(TextRecordIndex::make(2), - h::getTextRecordIdFromResultTable(qec, tresult, 3)); - ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 4)); - ASSERT_EQ(TextRecordIndex::make(3), - h::getTextRecordIdFromResultTable(qec, tresult, 4)); - ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 5)); - ASSERT_EQ(TextRecordIndex::make(4), - h::getTextRecordIdFromResultTable(qec, tresult, 5)); - ASSERT_EQ(secondDocText, h::getTextRecordFromResultTable(qec, tresult, 6)); - ASSERT_EQ(TextRecordIndex::make(5), - h::getTextRecordIdFromResultTable(qec, tresult, 6)); - ASSERT_EQ(secondDocText, h::getTextRecordFromResultTable(qec, tresult, 7)); - ASSERT_EQ(TextRecordIndex::make(6), - h::getTextRecordIdFromResultTable(qec, tresult, 7)); - TextIndexScanForWord s1{qec, Variable{"?text1"}, "test*"}; TextIndexScanForWord s2{qec, Variable{"?text2"}, "test*"}; + // Test if size calculations are right ASSERT_EQ(s1.getResultWidth(), 3); auto result = s1.computeResultOnlyForTesting(); @@ -121,6 +95,7 @@ TEST(TextIndexScanForWord, WordScanPrefix) { ASSERT_EQ(result.idTable().size(), 4); s2.getExternallyVisibleVariableColumns(); + // Test if all columns are there and correct using enum ColumnIndexAndTypeInfo::UndefStatus; VariableToColumnMap expectedVariables{ {Variable{"?text2"}, {0, AlwaysDefined}}, @@ -129,6 +104,8 @@ TEST(TextIndexScanForWord, WordScanPrefix) { EXPECT_THAT(s2.getExternallyVisibleVariableColumns(), ::testing::UnorderedElementsAreArray(expectedVariables)); + // Tests if the correct texts are retrieved from a mix of non literal and + // literal texts ASSERT_EQ(h::combineToString(secondDocText, "tester"), h::combineToString(h::getTextRecordFromResultTable(qec, result, 0), h::getWordFromResultTable(qec, result, 0))); @@ -143,6 +120,54 @@ TEST(TextIndexScanForWord, WordScanPrefix) { h::combineToString("\"the test on friday was really hard\"", "test"), h::combineToString(h::getTextRecordFromResultTable(qec, result, 3), h::getWordFromResultTable(qec, result, 3))); + + // Tests if the correct texts are retrieved from the non literal texts + TextIndexScanForWord t1{qec, Variable{"?t1"}, "astronom*"}; + auto tresult = t1.computeResultOnlyForTesting(); + ASSERT_EQ(TextRecordIndex::make(1), + h::getTextRecordIdFromResultTable(qec, tresult, 0)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 0)); + ASSERT_EQ(TextRecordIndex::make(1), + h::getTextRecordIdFromResultTable(qec, tresult, 1)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 1)); + ASSERT_EQ(TextRecordIndex::make(2), + h::getTextRecordIdFromResultTable(qec, tresult, 2)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 2)); + ASSERT_EQ(TextRecordIndex::make(2), + h::getTextRecordIdFromResultTable(qec, tresult, 3)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 3)); + ASSERT_EQ(TextRecordIndex::make(3), + h::getTextRecordIdFromResultTable(qec, tresult, 4)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 4)); + ASSERT_EQ(TextRecordIndex::make(4), + h::getTextRecordIdFromResultTable(qec, tresult, 5)); + ASSERT_EQ(firstDocText, h::getTextRecordFromResultTable(qec, tresult, 5)); + ASSERT_EQ(TextRecordIndex::make(5), + h::getTextRecordIdFromResultTable(qec, tresult, 6)); + ASSERT_EQ(secondDocText, h::getTextRecordFromResultTable(qec, tresult, 6)); + ASSERT_EQ(TextRecordIndex::make(6), + h::getTextRecordIdFromResultTable(qec, tresult, 7)); + ASSERT_EQ(secondDocText, h::getTextRecordFromResultTable(qec, tresult, 7)); + + // Tests if correct words are deducted from prefix + ASSERT_EQ("astronomer", h::getWordFromResultTable(qec, tresult, 0)); + ASSERT_EQ("astronomy", h::getWordFromResultTable(qec, tresult, 1)); + ASSERT_EQ("astronomer", h::getWordFromResultTable(qec, tresult, 2)); + ASSERT_EQ("astronomy", h::getWordFromResultTable(qec, tresult, 3)); + ASSERT_EQ("astronomy", h::getWordFromResultTable(qec, tresult, 4)); + ASSERT_EQ("astronomy", h::getWordFromResultTable(qec, tresult, 5)); + ASSERT_EQ("astronomer", h::getWordFromResultTable(qec, tresult, 6)); + ASSERT_EQ("astronomer", h::getWordFromResultTable(qec, tresult, 7)); + + // Tests if the correct scores are retrieved from the non literal texts + ASSERT_EQ(1, h::getScoreFromResultTable(qec, tresult, 0, true)); + ASSERT_EQ(1, h::getScoreFromResultTable(qec, tresult, 1, true)); + ASSERT_EQ(0, h::getScoreFromResultTable(qec, tresult, 2, true)); + ASSERT_EQ(0, h::getScoreFromResultTable(qec, tresult, 3, true)); + ASSERT_EQ(1, h::getScoreFromResultTable(qec, tresult, 4, true)); + ASSERT_EQ(1, h::getScoreFromResultTable(qec, tresult, 5, true)); + ASSERT_EQ(1, h::getScoreFromResultTable(qec, tresult, 6, true)); + ASSERT_EQ(0, h::getScoreFromResultTable(qec, tresult, 7, true)); } TEST(TextIndexScanForWord, WordScanBasic) { diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index 6c9d897514..d1bfe0d2fc 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -38,6 +38,7 @@ inline const TextRecordIndex getTextRecordIdFromResultTable( return result.idTable().getColumn(0)[rowIndex].getTextRecordIndex(); } +// Only use on prefix search results inline string getEntityFromResultTable(const QueryExecutionContext* qec, const ProtoResult& result, const size_t& rowIndex) { @@ -45,6 +46,7 @@ inline string getEntityFromResultTable(const QueryExecutionContext* qec, result.idTable().getColumn(1)[rowIndex].getVocabIndex()); } +// Only use on prefix search results inline string getWordFromResultTable(const QueryExecutionContext* qec, const ProtoResult& result, const size_t& rowIndex) { @@ -52,6 +54,14 @@ inline string getWordFromResultTable(const QueryExecutionContext* qec, result.idTable().getColumn(1)[rowIndex].getWordVocabIndex())}; } +inline size_t getScoreFromResultTable( + [[maybe_unused]] const QueryExecutionContext* qec, + const ProtoResult& result, const size_t& rowIndex, bool wasPrefixSearch) { + size_t colToRetrieve = wasPrefixSearch ? 2 : 1; + return static_cast( + result.idTable().getColumn(colToRetrieve)[rowIndex].getInt()); +} + inline string combineToString(const string& text, const string& word) { std::stringstream ss; ss << "Text: " << text << ", Word: " << word << std::endl; From baa10cf1b8181faf7583e936ee44acbe29e74c3e Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 5 Dec 2024 15:03:41 +0100 Subject: [PATCH 32/38] Changed the getQec function and the respective makeTestIndex to take in the wordsFileContent and docsFileContent as pair contentsOfWordsFileAndDocsFile --- test/engine/TextIndexScanForWordTest.cpp | 19 +++++---- test/util/IndexTestHelpers.cpp | 49 ++++++++++++------------ test/util/IndexTestHelpers.h | 8 ++-- 3 files changed, 39 insertions(+), 37 deletions(-) diff --git a/test/engine/TextIndexScanForWordTest.cpp b/test/engine/TextIndexScanForWordTest.cpp index 7e9b0c0fd9..eac3cb0d2f 100644 --- a/test/engine/TextIndexScanForWordTest.cpp +++ b/test/engine/TextIndexScanForWordTest.cpp @@ -80,9 +80,12 @@ std::string secondDocText = std::string docsFileContent = h::createDocsFileLine(4, firstDocText) + h::createDocsFileLine(7, secondDocText); +std::pair contentsOfWordsFileAndDocsFile = { + wordsFileContent, docsFileContent}; + TEST(TextIndexScanForWord, WordScanPrefix) { - auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, - docsFileContent); + auto qec = getQec(kg, true, true, true, 16_B, true, true, + contentsOfWordsFileAndDocsFile); TextIndexScanForWord s1{qec, Variable{"?text1"}, "test*"}; TextIndexScanForWord s2{qec, Variable{"?text2"}, "test*"}; @@ -171,8 +174,8 @@ TEST(TextIndexScanForWord, WordScanPrefix) { } TEST(TextIndexScanForWord, WordScanBasic) { - auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, - docsFileContent); + auto qec = getQec(kg, true, true, true, 16_B, true, true, + contentsOfWordsFileAndDocsFile); TextIndexScanForWord s1{qec, Variable{"?text1"}, "test"}; @@ -210,8 +213,8 @@ TEST(TextIndexScanForWord, WordScanBasic) { } TEST(TextIndexScanForWord, CacheKey) { - auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, - docsFileContent); + auto qec = getQec(kg, true, true, true, 16_B, true, true, + contentsOfWordsFileAndDocsFile); TextIndexScanForWord s1{qec, Variable{"?text1"}, "test*"}; TextIndexScanForWord s2{qec, Variable{"?text2"}, "test*"}; @@ -234,8 +237,8 @@ TEST(TextIndexScanForWord, CacheKey) { } TEST(TextIndexScanForWord, KnownEmpty) { - auto qec = getQec(kg, true, true, true, 16_B, true, true, wordsFileContent, - docsFileContent); + auto qec = getQec(kg, true, true, true, 16_B, true, true, + contentsOfWordsFileAndDocsFile); TextIndexScanForWord s1{qec, Variable{"?text1"}, "nonExistentWord*"}; ASSERT_TRUE(s1.knownEmptyResult()); diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp index 72d1016b50..0dcfd334a6 100644 --- a/test/util/IndexTestHelpers.cpp +++ b/test/util/IndexTestHelpers.cpp @@ -140,8 +140,8 @@ Index makeTestIndex(const std::string& indexBasename, [[maybe_unused]] bool usePrefixCompression, ad_utility::MemorySize blocksizePermutations, bool createTextIndex, bool addWordsFromLiterals, - std::optional wordsFileContent, - std::optional docsFileContent) { + std::optional> + contentsOfWordsFileAndDocsFile) { // Ignore the (irrelevant) log output of the index building and loading during // these tests. static std::ostringstream ignoreLogStream; @@ -188,15 +188,15 @@ Index makeTestIndex(const std::string& indexBasename, std::nullopt}; index.createFromFiles({spec}); if (createTextIndex) { - if (wordsFileContent.has_value() && docsFileContent.has_value()) { + if (contentsOfWordsFileAndDocsFile.has_value()) { // Create and write to words- and docsfile to later build a full text // index from them ad_utility::File wordsFile(indexBasename + ".wordsfile", "w"); ad_utility::File docsFile(indexBasename + ".docsfile", "w"); - wordsFile.write(wordsFileContent.value().c_str(), - wordsFileContent.value().size()); - docsFile.write(docsFileContent.value().c_str(), - docsFileContent.value().size()); + wordsFile.write(contentsOfWordsFileAndDocsFile.value().first.c_str(), + contentsOfWordsFileAndDocsFile.value().first.size()); + docsFile.write(contentsOfWordsFileAndDocsFile.value().second.c_str(), + contentsOfWordsFileAndDocsFile.value().second.size()); wordsFile.close(); docsFile.close(); index.setKbName(indexBasename); @@ -246,8 +246,8 @@ QueryExecutionContext* getQec(std::optional turtleInput, bool usePrefixCompression, ad_utility::MemorySize blocksizePermutations, bool createTextIndex, bool addWordsFromLiterals, - std::optional wordsFileContent, - std::optional docsFileContent) { + std::optional> + contentsOfWordsFileAndDocsFile) { // Similar to `absl::Cleanup`. Calls the `callback_` in the destructor, but // the callback is stored as a `std::function`, which allows to store // different types of callbacks in the same wrapper type. @@ -294,22 +294,21 @@ QueryExecutionContext* getQec(std::optional turtleInput, std::string testIndexBasename = "_staticGlobalTestIndex" + std::to_string(contextMap.size()); contextMap.emplace( - key, - Context{TypeErasedCleanup{[testIndexBasename]() { - for (const std::string& indexFilename : - getAllIndexFilenames(testIndexBasename)) { - // Don't log when a file can't be deleted, - // because the logging might already be - // destroyed. - ad_utility::deleteFile(indexFilename, false); - } - }}, - std::make_unique(makeTestIndex( - testIndexBasename, turtleInput, loadAllPermutations, - usePatterns, usePrefixCompression, blocksizePermutations, - createTextIndex, addWordsFromLiterals, wordsFileContent, - docsFileContent)), - std::make_unique()}); + key, Context{TypeErasedCleanup{[testIndexBasename]() { + for (const std::string& indexFilename : + getAllIndexFilenames(testIndexBasename)) { + // Don't log when a file can't be deleted, + // because the logging might already be + // destroyed. + ad_utility::deleteFile(indexFilename, false); + } + }}, + std::make_unique(makeTestIndex( + testIndexBasename, turtleInput, loadAllPermutations, + usePatterns, usePrefixCompression, + blocksizePermutations, createTextIndex, + addWordsFromLiterals, contentsOfWordsFileAndDocsFile)), + std::make_unique()}); } auto* qec = contextMap.at(key).qec_.get(); qec->getIndex().getImpl().setGlobalIndexAndComparatorOnlyForTesting(); diff --git a/test/util/IndexTestHelpers.h b/test/util/IndexTestHelpers.h index 6bbe0c9195..cbbd5ea486 100644 --- a/test/util/IndexTestHelpers.h +++ b/test/util/IndexTestHelpers.h @@ -46,8 +46,8 @@ Index makeTestIndex(const std::string& indexBasename, ad_utility::MemorySize blocksizePermutations = 16_B, bool createTextIndex = false, bool addWordsFromLiterals = true, - std::optional wordsFileContent = std::nullopt, - std::optional docsFileContent = std::nullopt); + std::optional> + contentsOfWordsFileAndDocsfile = std::nullopt); // Return a static `QueryExecutionContext` that refers to an index that was // build using `makeTestIndex` (see above). The index (most notably its @@ -59,8 +59,8 @@ QueryExecutionContext* getQec( bool usePrefixCompression = true, ad_utility::MemorySize blocksizePermutations = 16_B, bool createTextIndex = false, bool addWordsFromLiterals = true, - std::optional wordsFileContent = std::nullopt, - std::optional docsFileContent = std::nullopt); + std::optional> + contentsOfWordsFileAndDocsfile = std::nullopt); // Return a lambda that takes a string and converts it into an ID by looking // it up in the vocabulary of `index`. An `AD_CONTRACT_CHECK` will fail if the From 6bb80d3cf1b0cf13c333ea943d767c5509b16c79 Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Thu, 12 Dec 2024 14:11:49 +0100 Subject: [PATCH 33/38] Fix the multiple definition error. Signed-off-by: Johannes Kalmbach --- test/engine/TextIndexScanTestHelpers.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index d1bfe0d2fc..a0107ffe83 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -68,17 +68,17 @@ inline string combineToString(const string& text, const string& word) { return ss.str(); } -std::string inlineSeperator = "\t"; -std::string lineSeperator = "\n"; +inline std::string inlineSeparator = "\t"; +inline std::string lineSeparator = "\n"; inline string createWordsFileLine(std::string word, bool isEntity, size_t contextId, size_t score) { - return word + inlineSeperator + (isEntity ? "1" : "0") + inlineSeperator + - std::to_string(contextId) + inlineSeperator + std::to_string(score) + - lineSeperator; + return word + inlineSeparator + (isEntity ? "1" : "0") + inlineSeparator + + std::to_string(contextId) + inlineSeparator + std::to_string(score) + + lineSeparator; }; inline string createDocsFileLine(size_t docId, std::string docContent) { - return std::to_string(docId) + inlineSeperator + docContent + lineSeperator; + return std::to_string(docId) + inlineSeparator + docContent + lineSeparator; }; } // namespace textIndexScanTestHelpers From d093d852d7eb1ab1352fe9d158c20ac646f80aea Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 12 Dec 2024 21:54:50 +0100 Subject: [PATCH 34/38] Reverting the nofLiterals being saved in the TextMetaData and instead saving nofNonLiterals in the configuration json file. --- src/index/Index.cpp | 4 +--- src/index/Index.h | 2 +- src/index/IndexImpl.Text.cpp | 4 +++- src/index/IndexImpl.cpp | 1 + src/index/IndexImpl.h | 9 ++++++--- src/index/TextMetaData.h | 6 ------ test/engine/TextIndexScanTestHelpers.h | 7 +++---- 7 files changed, 15 insertions(+), 18 deletions(-) diff --git a/src/index/Index.cpp b/src/index/Index.cpp index fe11bf55f7..a652b85bfc 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -233,9 +233,7 @@ size_t Index::getNofEntityPostings() const { } // ____________________________________________________________________________ -size_t Index::getNofLiteralsInTextIndex() const { - return pimpl_->getNofLiteralsInTextIndex(); -} +size_t Index::getNofNonLiterals() const { return pimpl_->getNofNonLiterals(); } // ____________________________________________________________________________ Index::NumNormalAndInternal Index::numDistinctSubjects() const { diff --git a/src/index/Index.h b/src/index/Index.h index 0288b15408..1fac924aca 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -214,7 +214,7 @@ class Index { size_t getNofTextRecords() const; size_t getNofWordPostings() const; size_t getNofEntityPostings() const; - size_t getNofLiteralsInTextIndex() const; + size_t getNofNonLiterals() const; NumNormalAndInternal numDistinctSubjects() const; NumNormalAndInternal numDistinctObjects() const; diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp index c844fc8f18..397a227854 100644 --- a/src/index/IndexImpl.Text.cpp +++ b/src/index/IndexImpl.Text.cpp @@ -299,7 +299,9 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, textMeta_.setNofTextRecords(nofContexts); textMeta_.setNofWordPostings(nofWordPostings); textMeta_.setNofEntityPostings(nofEntityPostings); - textMeta_.setNofLiteralsInTextIndex(nofLiterals); + nofNonLiterals_ = nofContexts - nofLiterals; + configurationJson_["num-non-literals"] = nofNonLiterals_; + writeConfiguration(); writer.finish(); LOG(TRACE) << "END IndexImpl::passContextFileIntoVector" << std::endl; diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index ed8a6dd526..41f9cf50ff 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -1128,6 +1128,7 @@ void IndexImpl::readConfiguration() { loadDataMember("num-subjects", numSubjects_, NumNormalAndInternal{}); loadDataMember("num-objects", numObjects_, NumNormalAndInternal{}); loadDataMember("num-triples", numTriples_, NumNormalAndInternal{}); + loadDataMember("num-non-literals", nofNonLiterals_, 0); // Initialize BlankNodeManager uint64_t numBlankNodesTotal; diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index d12619d6df..3fa8db4194 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -158,6 +158,11 @@ class IndexImpl { NumNormalAndInternal numTriples_; string indexId_; + // Keeps track of the number of nonLiteral contexts in the index this is used + // in the test retrieval of the texts. This only works reliably if the + // wordsFile.tsv starts with contextId 1 and is continuous. + size_t nofNonLiterals_; + // Global static pointers to the currently active index and comparator. // Those are used to compare LocalVocab entries with each other as well as // with Vocab entries. @@ -424,9 +429,7 @@ class IndexImpl { size_t getNofEntityPostings() const { return textMeta_.getNofEntityPostings(); } - size_t getNofLiteralsInTextIndex() const { - return textMeta_.getNofLiteralsInTextIndex(); - } + size_t getNofNonLiterals() const { return nofNonLiterals_; } bool hasAllPermutations() const { return SPO().isLoaded(); } diff --git a/src/index/TextMetaData.h b/src/index/TextMetaData.h index b15b5e9a96..30fda07921 100644 --- a/src/index/TextMetaData.h +++ b/src/index/TextMetaData.h @@ -98,10 +98,6 @@ class TextMetaData { void setNofEntityPostings(size_t n) { _nofEntityPostings = n; } - size_t getNofLiteralsInTextIndex() const { return _nofLiteralsInTextIndex; } - - void setNofLiteralsInTextIndex(size_t n) { _nofLiteralsInTextIndex = n; } - const string& getName() const { return _name; } void setName(const string& name) { _name = name; } @@ -113,7 +109,6 @@ class TextMetaData { size_t _nofTextRecords = 0; size_t _nofWordPostings = 0; size_t _nofEntityPostings = 0; - size_t _nofLiteralsInTextIndex = 0; string _name; vector _blocks; @@ -123,7 +118,6 @@ class TextMetaData { serializer | arg._nofTextRecords; serializer | arg._nofWordPostings; serializer | arg._nofEntityPostings; - serializer | arg._nofLiteralsInTextIndex; serializer | arg._name; serializer | arg._blocks; } diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index a0107ffe83..7d344b998b 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -17,14 +17,13 @@ namespace textIndexScanTestHelpers { inline string getTextRecordFromResultTable(const QueryExecutionContext* qec, const ProtoResult& result, const size_t& rowIndex) { - uint64_t nofLiterals = qec->getIndex().getNofLiteralsInTextIndex(); - uint64_t nofContexts = qec->getIndex().getNofTextRecords(); + size_t nofNonLiterals = qec->getIndex().getNofNonLiterals(); uint64_t textRecordIdFromTable = result.idTable().getColumn(0)[rowIndex].getTextRecordIndex().get(); - if ((nofContexts - nofLiterals) <= textRecordIdFromTable) { + if (nofNonLiterals <= textRecordIdFromTable) { // Return when from Literals return qec->getIndex().indexToString( - VocabIndex::make(textRecordIdFromTable - (nofContexts - nofLiterals))); + VocabIndex::make(textRecordIdFromTable - nofNonLiterals)); } else { // Return when from DocsDB return qec->getIndex().getTextExcerpt( From 716e8287bcbbf96024cc9d33597489d10ca2fdb5 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 12 Dec 2024 22:01:00 +0100 Subject: [PATCH 35/38] Revert to first sync and then reapply "Reverting the nofLiterals being saved in the TextMetaData and instead saving nofNonLiterals in the configuration json file." This reverts commit 1adcecbec43612de6723ec65b0a84639f0eeba6f. --- src/index/Index.cpp | 4 +++- src/index/Index.h | 2 +- src/index/IndexImpl.Text.cpp | 4 +--- src/index/IndexImpl.cpp | 1 - src/index/IndexImpl.h | 9 +++------ src/index/TextMetaData.h | 6 ++++++ test/engine/TextIndexScanTestHelpers.h | 7 ++++--- 7 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/index/Index.cpp b/src/index/Index.cpp index a652b85bfc..fe11bf55f7 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -233,7 +233,9 @@ size_t Index::getNofEntityPostings() const { } // ____________________________________________________________________________ -size_t Index::getNofNonLiterals() const { return pimpl_->getNofNonLiterals(); } +size_t Index::getNofLiteralsInTextIndex() const { + return pimpl_->getNofLiteralsInTextIndex(); +} // ____________________________________________________________________________ Index::NumNormalAndInternal Index::numDistinctSubjects() const { diff --git a/src/index/Index.h b/src/index/Index.h index 1fac924aca..0288b15408 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -214,7 +214,7 @@ class Index { size_t getNofTextRecords() const; size_t getNofWordPostings() const; size_t getNofEntityPostings() const; - size_t getNofNonLiterals() const; + size_t getNofLiteralsInTextIndex() const; NumNormalAndInternal numDistinctSubjects() const; NumNormalAndInternal numDistinctObjects() const; diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp index 397a227854..c844fc8f18 100644 --- a/src/index/IndexImpl.Text.cpp +++ b/src/index/IndexImpl.Text.cpp @@ -299,9 +299,7 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, textMeta_.setNofTextRecords(nofContexts); textMeta_.setNofWordPostings(nofWordPostings); textMeta_.setNofEntityPostings(nofEntityPostings); - nofNonLiterals_ = nofContexts - nofLiterals; - configurationJson_["num-non-literals"] = nofNonLiterals_; - writeConfiguration(); + textMeta_.setNofLiteralsInTextIndex(nofLiterals); writer.finish(); LOG(TRACE) << "END IndexImpl::passContextFileIntoVector" << std::endl; diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 41f9cf50ff..ed8a6dd526 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -1128,7 +1128,6 @@ void IndexImpl::readConfiguration() { loadDataMember("num-subjects", numSubjects_, NumNormalAndInternal{}); loadDataMember("num-objects", numObjects_, NumNormalAndInternal{}); loadDataMember("num-triples", numTriples_, NumNormalAndInternal{}); - loadDataMember("num-non-literals", nofNonLiterals_, 0); // Initialize BlankNodeManager uint64_t numBlankNodesTotal; diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 3fa8db4194..d12619d6df 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -158,11 +158,6 @@ class IndexImpl { NumNormalAndInternal numTriples_; string indexId_; - // Keeps track of the number of nonLiteral contexts in the index this is used - // in the test retrieval of the texts. This only works reliably if the - // wordsFile.tsv starts with contextId 1 and is continuous. - size_t nofNonLiterals_; - // Global static pointers to the currently active index and comparator. // Those are used to compare LocalVocab entries with each other as well as // with Vocab entries. @@ -429,7 +424,9 @@ class IndexImpl { size_t getNofEntityPostings() const { return textMeta_.getNofEntityPostings(); } - size_t getNofNonLiterals() const { return nofNonLiterals_; } + size_t getNofLiteralsInTextIndex() const { + return textMeta_.getNofLiteralsInTextIndex(); + } bool hasAllPermutations() const { return SPO().isLoaded(); } diff --git a/src/index/TextMetaData.h b/src/index/TextMetaData.h index 30fda07921..b15b5e9a96 100644 --- a/src/index/TextMetaData.h +++ b/src/index/TextMetaData.h @@ -98,6 +98,10 @@ class TextMetaData { void setNofEntityPostings(size_t n) { _nofEntityPostings = n; } + size_t getNofLiteralsInTextIndex() const { return _nofLiteralsInTextIndex; } + + void setNofLiteralsInTextIndex(size_t n) { _nofLiteralsInTextIndex = n; } + const string& getName() const { return _name; } void setName(const string& name) { _name = name; } @@ -109,6 +113,7 @@ class TextMetaData { size_t _nofTextRecords = 0; size_t _nofWordPostings = 0; size_t _nofEntityPostings = 0; + size_t _nofLiteralsInTextIndex = 0; string _name; vector _blocks; @@ -118,6 +123,7 @@ class TextMetaData { serializer | arg._nofTextRecords; serializer | arg._nofWordPostings; serializer | arg._nofEntityPostings; + serializer | arg._nofLiteralsInTextIndex; serializer | arg._name; serializer | arg._blocks; } diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index 7d344b998b..a0107ffe83 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -17,13 +17,14 @@ namespace textIndexScanTestHelpers { inline string getTextRecordFromResultTable(const QueryExecutionContext* qec, const ProtoResult& result, const size_t& rowIndex) { - size_t nofNonLiterals = qec->getIndex().getNofNonLiterals(); + uint64_t nofLiterals = qec->getIndex().getNofLiteralsInTextIndex(); + uint64_t nofContexts = qec->getIndex().getNofTextRecords(); uint64_t textRecordIdFromTable = result.idTable().getColumn(0)[rowIndex].getTextRecordIndex().get(); - if (nofNonLiterals <= textRecordIdFromTable) { + if ((nofContexts - nofLiterals) <= textRecordIdFromTable) { // Return when from Literals return qec->getIndex().indexToString( - VocabIndex::make(textRecordIdFromTable - nofNonLiterals)); + VocabIndex::make(textRecordIdFromTable - (nofContexts - nofLiterals))); } else { // Return when from DocsDB return qec->getIndex().getTextExcerpt( From 2e32bd36f20fe77e12021679435666bdca7e6eff Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 12 Dec 2024 21:54:50 +0100 Subject: [PATCH 36/38] Reverting the nofLiterals being saved in the TextMetaData and instead saving nofNonLiterals in the configuration json file. --- src/index/Index.cpp | 4 +--- src/index/Index.h | 2 +- src/index/IndexImpl.Text.cpp | 4 +++- src/index/IndexImpl.cpp | 1 + src/index/IndexImpl.h | 9 ++++++--- src/index/TextMetaData.h | 6 ------ test/engine/TextIndexScanTestHelpers.h | 7 +++---- 7 files changed, 15 insertions(+), 18 deletions(-) diff --git a/src/index/Index.cpp b/src/index/Index.cpp index fe11bf55f7..a652b85bfc 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -233,9 +233,7 @@ size_t Index::getNofEntityPostings() const { } // ____________________________________________________________________________ -size_t Index::getNofLiteralsInTextIndex() const { - return pimpl_->getNofLiteralsInTextIndex(); -} +size_t Index::getNofNonLiterals() const { return pimpl_->getNofNonLiterals(); } // ____________________________________________________________________________ Index::NumNormalAndInternal Index::numDistinctSubjects() const { diff --git a/src/index/Index.h b/src/index/Index.h index 0288b15408..1fac924aca 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -214,7 +214,7 @@ class Index { size_t getNofTextRecords() const; size_t getNofWordPostings() const; size_t getNofEntityPostings() const; - size_t getNofLiteralsInTextIndex() const; + size_t getNofNonLiterals() const; NumNormalAndInternal numDistinctSubjects() const; NumNormalAndInternal numDistinctObjects() const; diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp index c844fc8f18..397a227854 100644 --- a/src/index/IndexImpl.Text.cpp +++ b/src/index/IndexImpl.Text.cpp @@ -299,7 +299,9 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, textMeta_.setNofTextRecords(nofContexts); textMeta_.setNofWordPostings(nofWordPostings); textMeta_.setNofEntityPostings(nofEntityPostings); - textMeta_.setNofLiteralsInTextIndex(nofLiterals); + nofNonLiterals_ = nofContexts - nofLiterals; + configurationJson_["num-non-literals"] = nofNonLiterals_; + writeConfiguration(); writer.finish(); LOG(TRACE) << "END IndexImpl::passContextFileIntoVector" << std::endl; diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index ed8a6dd526..41f9cf50ff 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -1128,6 +1128,7 @@ void IndexImpl::readConfiguration() { loadDataMember("num-subjects", numSubjects_, NumNormalAndInternal{}); loadDataMember("num-objects", numObjects_, NumNormalAndInternal{}); loadDataMember("num-triples", numTriples_, NumNormalAndInternal{}); + loadDataMember("num-non-literals", nofNonLiterals_, 0); // Initialize BlankNodeManager uint64_t numBlankNodesTotal; diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index d12619d6df..3fa8db4194 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -158,6 +158,11 @@ class IndexImpl { NumNormalAndInternal numTriples_; string indexId_; + // Keeps track of the number of nonLiteral contexts in the index this is used + // in the test retrieval of the texts. This only works reliably if the + // wordsFile.tsv starts with contextId 1 and is continuous. + size_t nofNonLiterals_; + // Global static pointers to the currently active index and comparator. // Those are used to compare LocalVocab entries with each other as well as // with Vocab entries. @@ -424,9 +429,7 @@ class IndexImpl { size_t getNofEntityPostings() const { return textMeta_.getNofEntityPostings(); } - size_t getNofLiteralsInTextIndex() const { - return textMeta_.getNofLiteralsInTextIndex(); - } + size_t getNofNonLiterals() const { return nofNonLiterals_; } bool hasAllPermutations() const { return SPO().isLoaded(); } diff --git a/src/index/TextMetaData.h b/src/index/TextMetaData.h index b15b5e9a96..30fda07921 100644 --- a/src/index/TextMetaData.h +++ b/src/index/TextMetaData.h @@ -98,10 +98,6 @@ class TextMetaData { void setNofEntityPostings(size_t n) { _nofEntityPostings = n; } - size_t getNofLiteralsInTextIndex() const { return _nofLiteralsInTextIndex; } - - void setNofLiteralsInTextIndex(size_t n) { _nofLiteralsInTextIndex = n; } - const string& getName() const { return _name; } void setName(const string& name) { _name = name; } @@ -113,7 +109,6 @@ class TextMetaData { size_t _nofTextRecords = 0; size_t _nofWordPostings = 0; size_t _nofEntityPostings = 0; - size_t _nofLiteralsInTextIndex = 0; string _name; vector _blocks; @@ -123,7 +118,6 @@ class TextMetaData { serializer | arg._nofTextRecords; serializer | arg._nofWordPostings; serializer | arg._nofEntityPostings; - serializer | arg._nofLiteralsInTextIndex; serializer | arg._name; serializer | arg._blocks; } diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index a0107ffe83..7d344b998b 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -17,14 +17,13 @@ namespace textIndexScanTestHelpers { inline string getTextRecordFromResultTable(const QueryExecutionContext* qec, const ProtoResult& result, const size_t& rowIndex) { - uint64_t nofLiterals = qec->getIndex().getNofLiteralsInTextIndex(); - uint64_t nofContexts = qec->getIndex().getNofTextRecords(); + size_t nofNonLiterals = qec->getIndex().getNofNonLiterals(); uint64_t textRecordIdFromTable = result.idTable().getColumn(0)[rowIndex].getTextRecordIndex().get(); - if ((nofContexts - nofLiterals) <= textRecordIdFromTable) { + if (nofNonLiterals <= textRecordIdFromTable) { // Return when from Literals return qec->getIndex().indexToString( - VocabIndex::make(textRecordIdFromTable - (nofContexts - nofLiterals))); + VocabIndex::make(textRecordIdFromTable - nofNonLiterals)); } else { // Return when from DocsDB return qec->getIndex().getTextExcerpt( From e93f944a3e4233c4110616a356436b17f05a0d31 Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Thu, 12 Dec 2024 23:03:12 +0100 Subject: [PATCH 37/38] Changed some naming to better describe functions --- src/index/Index.cpp | 4 +++- src/index/Index.h | 2 +- src/index/IndexImpl.h | 2 +- test/engine/TextIndexScanTestHelpers.h | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/index/Index.cpp b/src/index/Index.cpp index a652b85bfc..c70706b341 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -233,7 +233,9 @@ size_t Index::getNofEntityPostings() const { } // ____________________________________________________________________________ -size_t Index::getNofNonLiterals() const { return pimpl_->getNofNonLiterals(); } +size_t Index::getNofNonLiteralsInTextIndex() const { + return pimpl_->getNofNonLiteralsInTextIndex(); +} // ____________________________________________________________________________ Index::NumNormalAndInternal Index::numDistinctSubjects() const { diff --git a/src/index/Index.h b/src/index/Index.h index 1fac924aca..e815b2a5bf 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -214,7 +214,7 @@ class Index { size_t getNofTextRecords() const; size_t getNofWordPostings() const; size_t getNofEntityPostings() const; - size_t getNofNonLiterals() const; + size_t getNofNonLiteralsInTextIndex() const; NumNormalAndInternal numDistinctSubjects() const; NumNormalAndInternal numDistinctObjects() const; diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 3fa8db4194..37041cc301 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -429,7 +429,7 @@ class IndexImpl { size_t getNofEntityPostings() const { return textMeta_.getNofEntityPostings(); } - size_t getNofNonLiterals() const { return nofNonLiterals_; } + size_t getNofNonLiteralsInTextIndex() const { return nofNonLiterals_; } bool hasAllPermutations() const { return SPO().isLoaded(); } diff --git a/test/engine/TextIndexScanTestHelpers.h b/test/engine/TextIndexScanTestHelpers.h index 7d344b998b..83a72ddea4 100644 --- a/test/engine/TextIndexScanTestHelpers.h +++ b/test/engine/TextIndexScanTestHelpers.h @@ -17,7 +17,7 @@ namespace textIndexScanTestHelpers { inline string getTextRecordFromResultTable(const QueryExecutionContext* qec, const ProtoResult& result, const size_t& rowIndex) { - size_t nofNonLiterals = qec->getIndex().getNofNonLiterals(); + size_t nofNonLiterals = qec->getIndex().getNofNonLiteralsInTextIndex(); uint64_t textRecordIdFromTable = result.idTable().getColumn(0)[rowIndex].getTextRecordIndex().get(); if (nofNonLiterals <= textRecordIdFromTable) { From deb1e375c1ed984202490a4073e0d10c1dbdfb9d Mon Sep 17 00:00:00 2001 From: Felix Meisen Date: Fri, 13 Dec 2024 09:58:09 +0100 Subject: [PATCH 38/38] Changed the ambiguous naming of nofNonLiterals to nofNonLiteralsInTextIndex everywhere aswell as num-non-literals to nom-non-literals-text-index --- src/index/IndexImpl.Text.cpp | 5 +++-- src/index/IndexImpl.cpp | 2 +- src/index/IndexImpl.h | 6 ++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/index/IndexImpl.Text.cpp b/src/index/IndexImpl.Text.cpp index 397a227854..76c0015974 100644 --- a/src/index/IndexImpl.Text.cpp +++ b/src/index/IndexImpl.Text.cpp @@ -299,8 +299,9 @@ void IndexImpl::processWordsForInvertedLists(const string& contextFile, textMeta_.setNofTextRecords(nofContexts); textMeta_.setNofWordPostings(nofWordPostings); textMeta_.setNofEntityPostings(nofEntityPostings); - nofNonLiterals_ = nofContexts - nofLiterals; - configurationJson_["num-non-literals"] = nofNonLiterals_; + nofNonLiteralsInTextIndex_ = nofContexts - nofLiterals; + configurationJson_["num-non-literals-text-index"] = + nofNonLiteralsInTextIndex_; writeConfiguration(); writer.finish(); diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 41f9cf50ff..4f5ce915fe 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -1128,7 +1128,7 @@ void IndexImpl::readConfiguration() { loadDataMember("num-subjects", numSubjects_, NumNormalAndInternal{}); loadDataMember("num-objects", numObjects_, NumNormalAndInternal{}); loadDataMember("num-triples", numTriples_, NumNormalAndInternal{}); - loadDataMember("num-non-literals", nofNonLiterals_, 0); + loadDataMember("num-non-literals-text-index", nofNonLiteralsInTextIndex_, 0); // Initialize BlankNodeManager uint64_t numBlankNodesTotal; diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index 37041cc301..b98d0d5788 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -161,7 +161,7 @@ class IndexImpl { // Keeps track of the number of nonLiteral contexts in the index this is used // in the test retrieval of the texts. This only works reliably if the // wordsFile.tsv starts with contextId 1 and is continuous. - size_t nofNonLiterals_; + size_t nofNonLiteralsInTextIndex_; // Global static pointers to the currently active index and comparator. // Those are used to compare LocalVocab entries with each other as well as @@ -429,7 +429,9 @@ class IndexImpl { size_t getNofEntityPostings() const { return textMeta_.getNofEntityPostings(); } - size_t getNofNonLiteralsInTextIndex() const { return nofNonLiterals_; } + size_t getNofNonLiteralsInTextIndex() const { + return nofNonLiteralsInTextIndex_; + } bool hasAllPermutations() const { return SPO().isLoaded(); }