From 92d906f198cfbf56347362aaa3a50bb0b45f9946 Mon Sep 17 00:00:00 2001 From: RobinTF <83676088+RobinTF@users.noreply.github.com> Date: Tue, 19 Nov 2024 18:54:24 +0100 Subject: [PATCH 1/4] Fix out-of-memory issue with transitive paths (#1627) The introduction of lazy transitive paths via #1595 caused transitive path operations with lazy inputs and a fully materialized result to store the same local vocab once per input row. This led to OOM errors even if that local vocab was empty. This is not fixed by (1) skipping empty local vocabs when merging, and (2) only storing one local vocab per input block instead of repeating it for each row. There can still be duplicates between the local vocabs of different blocks. Deduplicating those is work for a separate PR. --- src/engine/LocalVocab.cpp | 8 +++----- src/engine/LocalVocab.h | 3 ++- src/engine/TransitivePathBase.cpp | 6 ++---- src/engine/TransitivePathImpl.h | 16 +++++++++++++--- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/src/engine/LocalVocab.cpp b/src/engine/LocalVocab.cpp index 1dbc8e7c68..80c97de87a 100644 --- a/src/engine/LocalVocab.cpp +++ b/src/engine/LocalVocab.cpp @@ -10,11 +10,9 @@ // _____________________________________________________________________________ LocalVocab LocalVocab::clone() const { - LocalVocab localVocabClone; - localVocabClone.otherWordSets_ = otherWordSets_; - localVocabClone.otherWordSets_.push_back(primaryWordSet_); - // Return the clone. - return localVocabClone; + LocalVocab clone; + clone.mergeWith(std::span{this, 1}); + return clone; } // _____________________________________________________________________________ diff --git a/src/engine/LocalVocab.h b/src/engine/LocalVocab.h index 3055c400a6..ff30b824f1 100644 --- a/src/engine/LocalVocab.h +++ b/src/engine/LocalVocab.h @@ -96,7 +96,8 @@ class LocalVocab { template void mergeWith(const R& vocabs) { auto inserter = std::back_inserter(otherWordSets_); - for (const auto& vocab : vocabs) { + using std::views::filter; + for (const auto& vocab : vocabs | filter(std::not_fn(&LocalVocab::empty))) { std::ranges::copy(vocab.otherWordSets_, inserter); *inserter = vocab.primaryWordSet_; } diff --git a/src/engine/TransitivePathBase.cpp b/src/engine/TransitivePathBase.cpp index a833bfdfbd..1db8a7eb0d 100644 --- a/src/engine/TransitivePathBase.cpp +++ b/src/engine/TransitivePathBase.cpp @@ -93,7 +93,7 @@ Result::Generator TransitivePathBase::fillTableWithHullImpl( ad_utility::Timer timer{ad_utility::Timer::Stopped}; size_t outputRow = 0; IdTableStatic table{getResultWidth(), allocator()}; - std::vector storedLocalVocabs; + LocalVocab mergedVocab{}; for (auto& [node, linkedNodes, localVocab, idTable, inputRow] : hull) { timer.cont(); // As an optimization nodes without any linked nodes should not get yielded @@ -120,7 +120,7 @@ Result::Generator TransitivePathBase::fillTableWithHullImpl( } if (yieldOnce) { - storedLocalVocabs.emplace_back(std::move(localVocab)); + mergedVocab.mergeWith(std::span{&localVocab, 1}); } else { timer.stop(); runtimeInfo().addDetail("IdTable fill time", timer.msecs()); @@ -132,8 +132,6 @@ Result::Generator TransitivePathBase::fillTableWithHullImpl( } if (yieldOnce) { timer.start(); - LocalVocab mergedVocab{}; - mergedVocab.mergeWith(storedLocalVocabs); runtimeInfo().addDetail("IdTable fill time", timer.msecs()); co_yield {std::move(table).toDynamic(), std::move(mergedVocab)}; } diff --git a/src/engine/TransitivePathImpl.h b/src/engine/TransitivePathImpl.h index 407b63a298..3e15141114 100644 --- a/src/engine/TransitivePathImpl.h +++ b/src/engine/TransitivePathImpl.h @@ -82,7 +82,8 @@ class TransitivePathImpl : public TransitivePathBase { transitiveHull(edges, sub->getCopyOfLocalVocab(), std::move(nodes), targetSide.isVariable() ? std::nullopt - : std::optional{std::get(targetSide.value_)}); + : std::optional{std::get(targetSide.value_)}, + yieldOnce); auto result = fillTableWithHull( std::move(hull), startSide.outputCol_, targetSide.outputCol_, @@ -131,7 +132,8 @@ class TransitivePathImpl : public TransitivePathBase { edges, sub->getCopyOfLocalVocab(), std::span{&tableInfo, 1}, targetSide.isVariable() ? std::nullopt - : std::optional{std::get(targetSide.value_)}); + : std::optional{std::get(targetSide.value_)}, + yieldOnce); auto result = fillTableWithHull(std::move(hull), startSide.outputCol_, targetSide.outputCol_, yieldOnce); @@ -240,11 +242,15 @@ class TransitivePathImpl : public TransitivePathBase { * `TableColumnWithVocab` that can be consumed to create a transitive hull. * @param target Optional target Id. If supplied, only paths which end * in this Id are added to the hull. + * @param yieldOnce This has to be set to the same value as the consuming + * code. When set to true, this will prevent yielding the same LocalVocab over + * and over again to make merging faster (because merging with an empty + * LocalVocab is a no-op). * @return Map Maps each Id to its connected Ids in the transitive hull */ NodeGenerator transitiveHull(const T& edges, LocalVocab edgesVocab, std::ranges::range auto startNodes, - std::optional target) const { + std::optional target, bool yieldOnce) const { ad_utility::Timer timer{ad_utility::Timer::Stopped}; for (auto&& tableColumn : startNodes) { timer.cont(); @@ -260,6 +266,10 @@ class TransitivePathImpl : public TransitivePathBase { mergedVocab.clone(), tableColumn.table_, currentRow}; timer.cont(); + // Reset vocab to prevent merging the same vocab over and over again. + if (yieldOnce) { + mergedVocab = LocalVocab{}; + } } currentRow++; } From 774ea834f5bb496eb96a0120ac8a2062cd6318d9 Mon Sep 17 00:00:00 2001 From: Hannah Bast Date: Wed, 20 Nov 2024 10:50:51 +0100 Subject: [PATCH 2/4] Improve `LocalVocab` comments and code (#1626) Major revision of comments, many of which were completely outdated. Also improve variable names and make some minor changes to the code. In particular, the the methods `size()` and `empty()` now run in constant time. --- src/engine/LocalVocab.cpp | 38 +++++------ src/engine/LocalVocab.h | 120 ++++++++++++++++++++------------- src/index/LocalVocabEntry.h | 2 +- src/util/TransparentFunctors.h | 91 ++++++++++++++----------- 4 files changed, 142 insertions(+), 109 deletions(-) diff --git a/src/engine/LocalVocab.cpp b/src/engine/LocalVocab.cpp index 80c97de87a..240780261a 100644 --- a/src/engine/LocalVocab.cpp +++ b/src/engine/LocalVocab.cpp @@ -1,35 +1,35 @@ -// Copyright 2022, University of Freiburg +// Copyright 2022 - 2024, University of Freiburg // Chair of Algorithms and Data Structures -// Author: Hannah Bast +// Authors: Hannah Bast +// Johannes Kalmbach #include "engine/LocalVocab.h" #include "absl/strings/str_cat.h" #include "global/Id.h" #include "global/ValueId.h" +#include "util/TransparentFunctors.h" // _____________________________________________________________________________ LocalVocab LocalVocab::clone() const { - LocalVocab clone; - clone.mergeWith(std::span{this, 1}); - return clone; + LocalVocab result; + result.mergeWith(std::span{this, 1}); + AD_CORRECTNESS_CHECK(result.size_ == size_); + return result; } // _____________________________________________________________________________ LocalVocab LocalVocab::merge(std::span vocabs) { - LocalVocab res; - res.mergeWith(vocabs | - std::views::transform( - [](const LocalVocab* localVocab) -> const LocalVocab& { - return *localVocab; - })); - return res; + LocalVocab result; + result.mergeWith(vocabs | std::views::transform(ad_utility::dereference)); + return result; } // _____________________________________________________________________________ template LocalVocabIndex LocalVocab::getIndexAndAddIfNotContainedImpl(WordT&& word) { auto [wordIterator, isNewWord] = primaryWordSet().insert(AD_FWD(word)); + size_ += static_cast(isNewWord); // TODO Use std::to_address (more idiomatic, but currently breaks // the MacOS build. return &(*wordIterator); @@ -37,18 +37,19 @@ LocalVocabIndex LocalVocab::getIndexAndAddIfNotContainedImpl(WordT&& word) { // _____________________________________________________________________________ LocalVocabIndex LocalVocab::getIndexAndAddIfNotContained( - const LiteralOrIri& word) { + const LocalVocabEntry& word) { return getIndexAndAddIfNotContainedImpl(word); } // _____________________________________________________________________________ -LocalVocabIndex LocalVocab::getIndexAndAddIfNotContained(LiteralOrIri&& word) { +LocalVocabIndex LocalVocab::getIndexAndAddIfNotContained( + LocalVocabEntry&& word) { return getIndexAndAddIfNotContainedImpl(std::move(word)); } // _____________________________________________________________________________ std::optional LocalVocab::getIndexOrNullopt( - const LiteralOrIri& word) const { + const LocalVocabEntry& word) const { auto localVocabIndex = primaryWordSet().find(word); if (localVocabIndex != primaryWordSet().end()) { // TODO Use std::to_address (more idiomatic, but currently breaks @@ -60,15 +61,14 @@ std::optional LocalVocab::getIndexOrNullopt( } // _____________________________________________________________________________ -const LocalVocab::LiteralOrIri& LocalVocab::getWord( +const LocalVocabEntry& LocalVocab::getWord( LocalVocabIndex localVocabIndex) const { return *localVocabIndex; } // _____________________________________________________________________________ -std::vector LocalVocab::getAllWordsForTesting() - const { - std::vector result; +std::vector LocalVocab::getAllWordsForTesting() const { + std::vector result; std::ranges::copy(primaryWordSet(), std::back_inserter(result)); for (const auto& previous : otherWordSets_) { std::ranges::copy(*previous, std::back_inserter(result)); diff --git a/src/engine/LocalVocab.h b/src/engine/LocalVocab.h index ff30b824f1..72745746b9 100644 --- a/src/engine/LocalVocab.h +++ b/src/engine/LocalVocab.h @@ -1,6 +1,7 @@ -// Copyright 2022, University of Freiburg +// Copyright 2022 - 2024, University of Freiburg // Chair of Algorithms and Data Structures -// Author: Hannah Bast +// Authors: Hannah Bast +// Johannes Kalmbach #pragma once @@ -14,31 +15,39 @@ #include "global/Id.h" #include "parser/LiteralOrIri.h" #include "util/BlankNodeManager.h" +#include "util/Exception.h" -// A class for maintaining a local vocabulary with contiguous (local) IDs. This -// is meant for words that are not part of the normal vocabulary (constructed -// from the input data at indexing time). +// A class for maintaining a local vocabulary, which conceptually is a set of +// `LiteralOrIri`s that are not part of the original vocabulary (which stems +// from the input data). The implementation is subtle and quite clever: // - +// The entries of the local vocabulary are `LocalVocabEntry`s, each of which +// holds a `LiteralOrIri` and remembers its position in the original vocabulary +// after it has been computed once. +// +// A `LocalVocab` has a primary set of `LocalVocabEntry`s, which can grow +// dynamically, and a collection of other sets of `LocalVocabEntry`s, which +// cannot be modified by this class. A `LocalVocabEntry` lives exactly as long +// as it is contained in at least one of the (primary or other) sets of a +// `LocalVocab`. class LocalVocab { private: - using Entry = LocalVocabEntry; - using LiteralOrIri = LocalVocabEntry; - // A map of the words in the local vocabulary to their local IDs. This is a - // node hash map because we need the addresses of the words (which are of type - // `LiteralOrIri`) to remain stable over their lifetime in the hash map - // because we hand out pointers to them. - using Set = absl::node_hash_set; + // The primary set of `LocalVocabEntry`s, which can grow dynamically. + // + // NOTE: This is a `absl::node_hash_set` because we hand out pointers to + // the `LocalVocabEntry`s and it is hence essential that their addresses + // remain stable over their lifetime in the hash set. + using Set = absl::node_hash_set; std::shared_ptr primaryWordSet_ = std::make_shared(); - // Local vocabularies from child operations that were merged into this - // vocabulary s.t. the pointers are kept alive. They have to be `const` - // because they are possibly shared concurrently (for example via the cache). + // The other sets of `LocalVocabEntry`s, which are static. std::vector> otherWordSets_; - auto& primaryWordSet() { return *primaryWordSet_; } - const auto& primaryWordSet() const { return *primaryWordSet_; } + // The number of words (so that we can compute `size()` in constant time). + size_t size_ = 0; + // Each `LocalVocab` has its own `LocalBlankNodeManager` to generate blank + // nodes when needed (e.g., when parsing the result of a SERVICE query). std::optional localBlankNodeManager_; @@ -50,49 +59,54 @@ class LocalVocab { LocalVocab(const LocalVocab&) = delete; LocalVocab& operator=(const LocalVocab&) = delete; - // Make a logical copy. The clone will have an empty primary set so it can - // safely be modified. The contents are copied as shared pointers to const, so - // the function runs in linear time in the number of word sets. + // Make a logical copy, where all sets of `LocalVocabEntry`s become "other" + // sets, that is, they cannot be modified by the copy. The primary set becomes + // empty. This only copies shared pointers and takes time linear in the number + // of sets. LocalVocab clone() const; // Moving a local vocabulary is not problematic (though the typical use case - // in our code is to copy shared pointers to local vocabularies). + // in our code is to copy shared pointers from one `LocalVocab` to another). LocalVocab(LocalVocab&&) = default; LocalVocab& operator=(LocalVocab&&) = default; - // Get the index of a word in the local vocabulary. If the word was already - // contained, return the already existing index. If the word was not yet - // contained, add it, and return the new index. - LocalVocabIndex getIndexAndAddIfNotContained(const LiteralOrIri& word); - LocalVocabIndex getIndexAndAddIfNotContained(LiteralOrIri&& word); + // For a given `LocalVocabEntry`, return the corresponding `LocalVocabIndex` + // (which is just the address of the `LocalVocabEntry`). If the + // `LocalVocabEntry` is not contained in any of the sets, add it to the + // primary. + LocalVocabIndex getIndexAndAddIfNotContained(const LocalVocabEntry& word); + LocalVocabIndex getIndexAndAddIfNotContained(LocalVocabEntry&& word); - // Get the index of a word in the local vocabulary, or std::nullopt if it is - // not contained. This is useful for testing. + // Like `getIndexAndAddIfNotContained`, but if the `LocalVocabEntry` is not + // contained in any of the sets, do not add it and return `std::nullopt`. std::optional getIndexOrNullopt( - const LiteralOrIri& word) const; + const LocalVocabEntry& word) const; - // The number of words in the vocabulary. - // Note: This is not constant time, but linear in the number of word sets. + // The number of words in this local vocabulary. size_t size() const { - auto result = primaryWordSet().size(); - for (const auto& previous : otherWordSets_) { - result += previous->size(); + if constexpr (ad_utility::areExpensiveChecksEnabled) { + auto size = primaryWordSet().size(); + for (const auto& previous : otherWordSets_) { + size += previous->size(); + } + AD_CORRECTNESS_CHECK(size == size_); } - return result; + return size_; } // Return true if and only if the local vocabulary is empty. bool empty() const { return size() == 0; } - // Return a const reference to the word. - const LiteralOrIri& getWord(LocalVocabIndex localVocabIndex) const; - - // Create a local vocab that contains and keeps alive all the words from each - // of the `vocabs`. The primary word set of the newly created vocab is empty. - static LocalVocab merge(std::span vocabs); + // Get the `LocalVocabEntry` corresponding to the given `LocalVocabIndex`. + // + // NOTE: This used to be a more complex function but is now a simple + // dereference. It could be thrown out in the future. + const LocalVocabEntry& getWord(LocalVocabIndex localVocabIndex) const; - // Merge all passed local vocabs to keep alive all the words from each of the - // `vocabs`. + // Add all sets (primary and other) of the given local vocabs as other sets + // to this local vocab. The purpose is to keep all the contained + // `LocalVocabEntry`s alive as long as this `LocalVocab` is alive. The + // primary set of this `LocalVocab` remains unchanged. template void mergeWith(const R& vocabs) { auto inserter = std::back_inserter(otherWordSets_); @@ -100,11 +114,17 @@ class LocalVocab { for (const auto& vocab : vocabs | filter(std::not_fn(&LocalVocab::empty))) { std::ranges::copy(vocab.otherWordSets_, inserter); *inserter = vocab.primaryWordSet_; + size_ += vocab.size_; } } - // Return all the words from all the word sets as a vector. - std::vector getAllWordsForTesting() const; + // Create a new local vocab with empty set and other sets that are the union + // of all sets (primary and other) of the given local vocabs. + static LocalVocab merge(std::span vocabs); + + // Return all the words from all the word sets as a vector. This is useful + // for testing. + std::vector getAllWordsForTesting() const; // Get a new BlankNodeIndex using the LocalBlankNodeManager. [[nodiscard]] BlankNodeIndex getBlankNodeIndex( @@ -115,8 +135,12 @@ class LocalVocab { bool isBlankNodeIndexContained(BlankNodeIndex blankNodeIndex) const; private: - // Common implementation for the two variants of - // `getIndexAndAddIfNotContainedImpl` above. + // Accessors for the primary set. + Set& primaryWordSet() { return *primaryWordSet_; } + const Set& primaryWordSet() const { return *primaryWordSet_; } + + // Common implementation for the two methods `getIndexAndAddIfNotContained` + // and `getIndexOrNullopt` above. template LocalVocabIndex getIndexAndAddIfNotContainedImpl(WordT&& word); }; diff --git a/src/index/LocalVocabEntry.h b/src/index/LocalVocabEntry.h index 545d8a8350..e591ad64ff 100644 --- a/src/index/LocalVocabEntry.h +++ b/src/index/LocalVocabEntry.h @@ -27,7 +27,7 @@ class alignas(16) LocalVocabEntry // the first *larger* word in the vocabulary. Note: we store the cache as // three separate atomics to avoid mutexes. The downside is, that in parallel // code multiple threads might look up the position concurrently, which wastes - // a bit of resources. We however don't consider this case to be likely. + // a bit of resources. However, we don't consider this case to be likely. mutable ad_utility::CopyableAtomic lowerBoundInVocab_; mutable ad_utility::CopyableAtomic upperBoundInVocab_; mutable ad_utility::CopyableAtomic positionInVocabKnown_ = false; diff --git a/src/util/TransparentFunctors.h b/src/util/TransparentFunctors.h index 6e2bd9fca3..1b889a0796 100644 --- a/src/util/TransparentFunctors.h +++ b/src/util/TransparentFunctors.h @@ -1,25 +1,23 @@ -// Copyright 2022, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: -// 2022 - Johannes Kalmbach +// Copyright 2022 - 2024, University of Freiburg +// Chair of Algorithms and Data Structures +// Author: Johannes Kalmbach -#ifndef QLEVER_TRANSPARENTFUNCTORS_H -#define QLEVER_TRANSPARENTFUNCTORS_H +#pragma once #include #include #include -/// Contains several function object types with templated operator() that wrap -/// overloaded functions from the standard library. This enables passing them as -/// function parameters. - -/// Note that in theory all of them could be implemented shorter as captureless -/// lambda expressions. We have chosen not to do this because the STL also does -/// not choose this approach (see e.g. `std::less`, `std::plus`, etc.) and -/// because global inline lambdas in header files might in theory cause ODR (one -/// definition rule) problems, especially when using different compilers. +// Contains several function object types with templated operator() that wrap +// overloaded functions from the standard library. This enables passing them as +// function parameters. +// +// NOTE: in theory all of them could be implemented shorter as captureless +// lambda expressions. We have chosen not to do this because the STL also does +// not choose this approach (see e.g. `std::less`, `std::plus`, etc.) and +// because global inline lambdas in header files might in theory cause ODR (one +// definition rule) problems, especially when using different compilers. namespace ad_utility { @@ -79,37 +77,60 @@ struct ToBoolImpl { } }; +// Implementation of `staticCast` (see below). +template +struct StaticCastImpl { + constexpr decltype(auto) operator()(auto&& x) const { + return static_cast(AD_FWD(x)); + } +}; + +// Implementation of `dereference` (see below). +struct DereferenceImpl { + constexpr decltype(auto) operator()(auto&& x) const { return *AD_FWD(x); } +}; + } // namespace detail -/// Return the first element via perfect forwarding of any type for which -/// `std::get<0>(x)` is valid. This holds e.g. for `std::pair`, `std::tuple`, -/// and `std::array`. +// Return the first element via perfect forwarding of any type for which +// `std::get<0>(x)` is valid. This holds e.g. for `std::pair`, `std::tuple`, +// and `std::array`. static constexpr detail::FirstImpl first; -/// Return the second element via perfect forwarding of any type for which -/// `std::get<1>(x)` is valid. This holds e.g. for `std::pair`, `std::tuple`, -/// and `std::array`. +// Return the second element via perfect forwarding of any type for which +// `std::get<1>(x)` is valid. This holds e.g. for `std::pair`, `std::tuple`, +// and `std::array`. static constexpr detail::SecondImpl second; -/// Transparent functor for `std::holds_alternative` +// Transparent functor for `std::holds_alternative` template static constexpr detail::HoldsAlternativeImpl holdsAlternative; -/// Transparent functor for `std::get`. Currently only works for `std::variant` -/// and not for `std::array` or `std::tuple`. +// Transparent functor for `std::get`. Currently only works for `std::variant` +// and not for `std::array` or `std::tuple`. template static constexpr detail::GetImpl get; -/// Transparent functor for `std::get_if`. As an extension to `std::get_if`, -/// `ad_utility::getIf` may also be called with a `variant` object or reference, -/// not only with a pointer. +// Transparent functor for `std::get_if`. As an extension to `std::get_if`, +// `ad_utility::getIf` may also be called with a `variant` object or reference, +// not only with a pointer. template static constexpr detail::GetIfImpl getIf; +// Transparent functor that converts any type to `bool` via +// `static_cast`. static constexpr detail::ToBoolImpl toBool; -/// A functor that takes an arbitrary number of arguments by reference and does -/// nothing. +// Transparent functor that casts any type to `T` via `static_cast`. +template +static constexpr detail::StaticCastImpl staticCast{}; + +// Transparent functor that dereferences a pointer or smart pointer. +static constexpr detail::DereferenceImpl dereference; + +// Transparent functor that takes an arbitrary number of arguments by reference +// and does nothing. We also use the type `Noop`, hence it is defined here and +// not in the `detail` namespace above. struct Noop { void operator()(const auto&...) const { // This function deliberately does nothing (static analysis expects a @@ -118,16 +139,4 @@ struct Noop { }; [[maybe_unused]] static constexpr Noop noop{}; -template -struct StaticCast { - constexpr decltype(auto) operator()(auto&& x) const { - return static_cast(AD_FWD(x)); - } -}; - -template -static constexpr StaticCast staticCast{}; - } // namespace ad_utility - -#endif // QLEVER_TRANSPARENTFUNCTORS_H From d53d4f9fd91fd03463382e2b6f476d4ee345d6d9 Mon Sep 17 00:00:00 2001 From: unex <63149623+UNEXENU@users.noreply.github.com> Date: Thu, 21 Nov 2024 10:45:10 +0100 Subject: [PATCH 3/4] Also merge the `LocalBlankNodeManager`s when a `LocalVocab` is merged. (#1617) This resolves a bug that was introduced in #1504, where the `LocalBlankNodeManager` of a `LocalVocab` was not accounted for in the merge functions of the `LocalVocab`. --- src/engine/LocalVocab.cpp | 4 +++- src/engine/LocalVocab.h | 24 ++++++++++++++++++++- src/util/BlankNodeManager.cpp | 30 +++++++++++++-------------- src/util/BlankNodeManager.h | 39 +++++++++++++++++++++++++++++++---- test/BlankNodeManagerTest.cpp | 30 +++++++++++++++++---------- test/LocalVocabTest.cpp | 32 ++++++++++++++++++++++++++++ 6 files changed, 127 insertions(+), 32 deletions(-) diff --git a/src/engine/LocalVocab.cpp b/src/engine/LocalVocab.cpp index 240780261a..2170d20d5d 100644 --- a/src/engine/LocalVocab.cpp +++ b/src/engine/LocalVocab.cpp @@ -82,7 +82,9 @@ BlankNodeIndex LocalVocab::getBlankNodeIndex( AD_CONTRACT_CHECK(blankNodeManager); // Initialize the `localBlankNodeManager_` if it doesn't exist yet. if (!localBlankNodeManager_) [[unlikely]] { - localBlankNodeManager_.emplace(blankNodeManager); + localBlankNodeManager_ = + std::make_shared( + blankNodeManager); } return BlankNodeIndex::make(localBlankNodeManager_->getId()); } diff --git a/src/engine/LocalVocab.h b/src/engine/LocalVocab.h index 72745746b9..f61982400d 100644 --- a/src/engine/LocalVocab.h +++ b/src/engine/LocalVocab.h @@ -5,8 +5,10 @@ #pragma once +#include #include #include +#include #include #include #include @@ -48,7 +50,7 @@ class LocalVocab { // Each `LocalVocab` has its own `LocalBlankNodeManager` to generate blank // nodes when needed (e.g., when parsing the result of a SERVICE query). - std::optional + std::shared_ptr localBlankNodeManager_; public: @@ -116,6 +118,26 @@ class LocalVocab { *inserter = vocab.primaryWordSet_; size_ += vocab.size_; } + + // Also merge the `vocabs` `LocalBlankNodeManager`s, if they exist. + using LocalBlankNodeManager = + ad_utility::BlankNodeManager::LocalBlankNodeManager; + auto localManagersView = + vocabs | + std::views::transform([](const LocalVocab& vocab) -> const auto& { + return vocab.localBlankNodeManager_; + }); + + auto it = std::ranges::find_if(localManagersView, + [](const auto& l) { return l != nullptr; }); + if (it == localManagersView.end()) { + return; + } + if (!localBlankNodeManager_) { + localBlankNodeManager_ = + std::make_shared((*it)->blankNodeManager()); + } + localBlankNodeManager_->mergeWith(localManagersView); } // Create a new local vocab with empty set and other sets that are the union diff --git a/src/util/BlankNodeManager.cpp b/src/util/BlankNodeManager.cpp index 6b367118d5..44295b3aeb 100644 --- a/src/util/BlankNodeManager.cpp +++ b/src/util/BlankNodeManager.cpp @@ -4,6 +4,8 @@ #include "util/BlankNodeManager.h" +#include "util/Exception.h" + namespace ad_utility { // _____________________________________________________________________________ @@ -41,30 +43,28 @@ BlankNodeManager::LocalBlankNodeManager::LocalBlankNodeManager( BlankNodeManager* blankNodeManager) : blankNodeManager_(blankNodeManager) {} -// _____________________________________________________________________________ -BlankNodeManager::LocalBlankNodeManager::~LocalBlankNodeManager() { - auto ptr = blankNodeManager_->usedBlocksSet_.wlock(); - for (const auto& block : blocks_) { - AD_CONTRACT_CHECK(ptr->contains(block.blockIdx_)); - ptr->erase(block.blockIdx_); - } -} - // _____________________________________________________________________________ uint64_t BlankNodeManager::LocalBlankNodeManager::getId() { - if (blocks_.empty() || blocks_.back().nextIdx_ == idxAfterCurrentBlock_) { - blocks_.emplace_back(blankNodeManager_->allocateBlock()); - idxAfterCurrentBlock_ = blocks_.back().nextIdx_ + blockSize_; + if (blocks_->empty() || blocks_->back().nextIdx_ == idxAfterCurrentBlock_) { + blocks_->emplace_back(blankNodeManager_->allocateBlock()); + idxAfterCurrentBlock_ = blocks_->back().nextIdx_ + blockSize_; } - return blocks_.back().nextIdx_++; + return blocks_->back().nextIdx_++; } // _____________________________________________________________________________ bool BlankNodeManager::LocalBlankNodeManager::containsBlankNodeIndex( uint64_t index) const { - return std::ranges::any_of(blocks_, [index](const Block& block) { + auto containsIndex = [index](const Block& block) { return index >= block.startIdx_ && index < block.nextIdx_; - }); + }; + + return std::ranges::any_of(*blocks_, containsIndex) || + std::ranges::any_of( + otherBlocks_, + [&](const std::shared_ptr>& blocks) { + return std::ranges::any_of(*blocks, containsIndex); + }); } } // namespace ad_utility diff --git a/src/util/BlankNodeManager.h b/src/util/BlankNodeManager.h index 7fd5416294..afdc748281 100644 --- a/src/util/BlankNodeManager.h +++ b/src/util/BlankNodeManager.h @@ -58,6 +58,7 @@ class BlankNodeManager { public: ~Block() = default; + // The index of this block. const uint64_t blockIdx_; @@ -71,7 +72,7 @@ class BlankNodeManager { class LocalBlankNodeManager { public: explicit LocalBlankNodeManager(BlankNodeManager* blankNodeManager); - ~LocalBlankNodeManager(); + ~LocalBlankNodeManager() = default; // No copy, as the managed blocks should not be duplicated. LocalBlankNodeManager(const LocalBlankNodeManager&) = delete; @@ -87,16 +88,46 @@ class BlankNodeManager { // Return true iff the `index` was returned by a previous call to `getId()`. bool containsBlankNodeIndex(uint64_t index) const; - private: - // Reserved blocks. - std::vector blocks_; + // Merge passed `LocalBlankNodeManager`s to keep alive their reserved + // BlankNodeIndex blocks. + template + void mergeWith(const R& localBlankNodeManagers) { + auto inserter = std::back_inserter(otherBlocks_); + for (const auto& l : localBlankNodeManagers) { + if (l == nullptr) { + continue; + } + std::ranges::copy(l->otherBlocks_, inserter); + *inserter = l->blocks_; + } + } + + // Getter for the `blankNodeManager_` pointer required in + // `LocalVocab::mergeWith`. + BlankNodeManager* blankNodeManager() const { return blankNodeManager_; } + private: // Reference to the BlankNodeManager, used to free the reserved blocks. BlankNodeManager* blankNodeManager_; + // Reserved blocks. + using Blocks = std::vector; + std::shared_ptr blocks_{ + new Blocks(), [blankNodeManager = blankNodeManager()](auto blocksPtr) { + auto ptr = blankNodeManager->usedBlocksSet_.wlock(); + for (const auto& block : *blocksPtr) { + AD_CONTRACT_CHECK(ptr->contains(block.blockIdx_)); + ptr->erase(block.blockIdx_); + } + delete blocksPtr; + }}; + // The first index after the current Block. uint64_t idxAfterCurrentBlock_{0}; + // Blocks merged from other `LocalBlankNodeManager`s. + std::vector> otherBlocks_; + FRIEND_TEST(BlankNodeManager, LocalBlankNodeManagerGetID); }; diff --git a/test/BlankNodeManagerTest.cpp b/test/BlankNodeManagerTest.cpp index 70803bd3f0..9d1969c80c 100644 --- a/test/BlankNodeManagerTest.cpp +++ b/test/BlankNodeManagerTest.cpp @@ -37,24 +37,32 @@ TEST(BlankNodeManager, blockAllocationAndFree) { // _____________________________________________________________________________ TEST(BlankNodeManager, LocalBlankNodeManagerGetID) { BlankNodeManager bnm(0); - BlankNodeManager::LocalBlankNodeManager l(&bnm); + auto l = std::make_shared(&bnm); // initially the LocalBlankNodeManager doesn't have any blocks - EXPECT_EQ(l.blocks_.size(), 0); + EXPECT_EQ(l->blocks_->size(), 0); // A new Block is allocated, if // no blocks are allocated yet - uint64_t id = l.getId(); - EXPECT_EQ(l.blocks_.size(), 1); - EXPECT_TRUE(l.containsBlankNodeIndex(id)); - EXPECT_FALSE(l.containsBlankNodeIndex(id + 1)); - EXPECT_FALSE(l.containsBlankNodeIndex(id - 1)); + uint64_t id = l->getId(); + EXPECT_EQ(l->blocks_->size(), 1); + EXPECT_TRUE(l->containsBlankNodeIndex(id)); + EXPECT_FALSE(l->containsBlankNodeIndex(id + 1)); + EXPECT_FALSE(l->containsBlankNodeIndex(id - 1)); // or the ids of the last block are all used - l.blocks_.back().nextIdx_ = id + BlankNodeManager::blockSize_; - id = l.getId(); - EXPECT_TRUE(l.containsBlankNodeIndex(id)); - EXPECT_EQ(l.blocks_.size(), 2); + l->blocks_->back().nextIdx_ = id + BlankNodeManager::blockSize_; + id = l->getId(); + EXPECT_TRUE(l->containsBlankNodeIndex(id)); + EXPECT_EQ(l->blocks_->size(), 2); + + // The `LocalBlankNodeManager` still works when recursively merged. + std::vector itSelf{l}; + l->mergeWith(itSelf); + + EXPECT_TRUE(l->containsBlankNodeIndex(id)); + EXPECT_TRUE(l->containsBlankNodeIndex(l->getId())); + EXPECT_EQ(l->blocks_, l->otherBlocks_[0]); } // _____________________________________________________________________________ diff --git a/test/LocalVocabTest.cpp b/test/LocalVocabTest.cpp index a9058d3a68..38728a3c92 100644 --- a/test/LocalVocabTest.cpp +++ b/test/LocalVocabTest.cpp @@ -134,6 +134,14 @@ TEST(LocalVocab, clone) { for (size_t i = 0; i < inputWords.size(); ++i) { EXPECT_EQ(*indices[i], inputWords[i]); } + + // Test that a BlankNodeIndex obtained by a `LocalVocab` is also contained + // in the clone. + ad_utility::BlankNodeManager bnm; + LocalVocab v; + auto id = v.getBlankNodeIndex(&bnm); + LocalVocab vClone = v.clone(); + EXPECT_TRUE(vClone.isBlankNodeIndexContained(id)); } // _____________________________________________________________________________ TEST(LocalVocab, merge) { @@ -162,6 +170,30 @@ TEST(LocalVocab, merge) { EXPECT_EQ(*indices[1], lit("twoA")); EXPECT_EQ(*indices[2], lit("oneB")); EXPECT_EQ(*indices[3], lit("twoB")); + + // Test that the `LocalBlankNodeManager` of vocabs is merged correctly. + ad_utility::BlankNodeManager bnm; + LocalVocab localVocabMerged2; + BlankNodeIndex id; + { + LocalVocab vocC, vocD; + id = vocC.getBlankNodeIndex(&bnm); + auto vocabs2 = std::vector{&std::as_const(vocC), &std::as_const(vocD)}; + localVocabMerged2 = LocalVocab::merge(vocabs2); + } + EXPECT_TRUE(localVocabMerged2.isBlankNodeIndexContained(id)); + + LocalVocab vocE, vocF; + auto id2 = vocE.getBlankNodeIndex(&bnm); + auto vocabs3 = + std::vector{&std::as_const(localVocabMerged2), &std::as_const(vocF)}; + vocE.mergeWith(vocabs3 | std::views::transform( + [](const LocalVocab* l) -> const LocalVocab& { + return *l; + })); + EXPECT_TRUE(vocE.isBlankNodeIndexContained(id)); + EXPECT_TRUE(localVocabMerged2.isBlankNodeIndexContained(id)); + EXPECT_TRUE(vocE.isBlankNodeIndexContained(id2)); } // _____________________________________________________________________________ From 5f28e832043b22102ad6e6e2a90bf93ff6846169 Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Thu, 21 Nov 2024 23:34:27 +0100 Subject: [PATCH 4/4] Rename `LOG(...)` macro to `AD_LOG_...` to avoid conflicts with Abseil (#1629) So far, QLever uses the macro `LOG(...)` for logging, where the argument is the log level. This is used by other libraries, too, in particular by Abseil. With an unfortunate include order, this leads to compilation errors that are hard to understand for new developers. We now use our usual prefix also for logging and call the macros `AD_LOG_INFO`, `AD_LOG_DEBUG`, etc. --------- Co-authored-by: Hannah Bast --- src/index/IndexImpl.cpp | 264 +++++++++++++++++++++------------------- src/util/Log.h | 59 ++++++--- 2 files changed, 178 insertions(+), 145 deletions(-) diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 93b2fc09ea..27ae9491ff 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -245,10 +245,10 @@ IndexImpl::buildOspWithPatterns( secondSorter->clear(); // Add the `ql:has-pattern` predicate to the sorter such that it will become // part of the PSO and POS permutation. - LOG(INFO) << "Adding " << hasPatternPredicateSortedByPSO->size() - << " triples to the POS and PSO permutation for " - "the internal `ql:has-pattern` ..." - << std::endl; + AD_LOG_INFO << "Adding " << hasPatternPredicateSortedByPSO->size() + << " triples to the POS and PSO permutation for " + "the internal `ql:has-pattern` ..." + << std::endl; static_assert(NumColumnsIndexBuilding == 4, "When adding additional payload columns, the following code " "has to be changed"); @@ -297,9 +297,10 @@ void IndexImpl::updateInputFileSpecificationsAndLog( // for a single input stream and forbidden for multiple input streams. if (parallelParsingSpecifiedViaJson.has_value()) { if (spec.size() == 1) { - LOG(WARN) << "Parallel parsing set in the `.settings.json` file; this is " - "deprecated, " - << pleaseUseParallelParsingOption << std::endl; + AD_LOG_WARN + << "Parallel parsing set in the `.settings.json` file; this is " + "deprecated, " + << pleaseUseParallelParsingOption << std::endl; spec.at(0).parseInParallel_ = parallelParsingSpecifiedViaJson.value(); } else { throw std::runtime_error{absl::StrCat( @@ -312,21 +313,22 @@ void IndexImpl::updateInputFileSpecificationsAndLog( // on the command line, we set if implicitly for backward compatibility. if (!parallelParsingSpecifiedViaJson.has_value() && spec.size() == 1 && !spec.at(0).parseInParallelSetExplicitly_) { - LOG(WARN) << "Implicitly using the parallel parser for a single input file " - "for reasons of backward compatibility; this is deprecated, " - << pleaseUseParallelParsingOption << std::endl; + AD_LOG_WARN + << "Implicitly using the parallel parser for a single input file " + "for reasons of backward compatibility; this is deprecated, " + << pleaseUseParallelParsingOption << std::endl; spec.at(0).parseInParallel_ = true; } // For a single input stream, show the name and whether we parse in parallel. // For multiple input streams, only show the number of streams. if (spec.size() == 1) { - LOG(INFO) << "Parsing triples from single input stream " - << spec.at(0).filename_ << " (parallel = " - << (spec.at(0).parseInParallel_ ? "true" : "false") << ") ..." - << std::endl; + AD_LOG_INFO << "Parsing triples from single input stream " + << spec.at(0).filename_ << " (parallel = " + << (spec.at(0).parseInParallel_ ? "true" : "false") << ") ..." + << std::endl; } else { - LOG(INFO) << "Processing triples from " << spec.size() - << " input streams ..." << std::endl; + AD_LOG_INFO << "Processing triples from " << spec.size() + << " input streams ..." << std::endl; } } @@ -412,7 +414,7 @@ void IndexImpl::createFromFiles( addInternalStatisticsToConfiguration(numTriplesInternal, numPredicatesInternal); - LOG(INFO) << "Index build completed" << std::endl; + AD_LOG_INFO << "Index build completed" << std::endl; } // _____________________________________________________________________________ @@ -439,9 +441,9 @@ IndexBuilderDataAsStxxlVector IndexImpl::passFileForVocabulary( ad_utility::Synchronized> idTriples( std::make_unique(onDiskBase_ + ".unsorted-triples.dat", 1_GB, allocator_)); - LOG(INFO) << "Parsing input triples and creating partial vocabularies, one " - "per batch ..." - << std::endl; + AD_LOG_INFO << "Parsing input triples and creating partial vocabularies, one " + "per batch ..." + << std::endl; bool parserExhausted = false; // already count the numbers of triples that will be used for the language @@ -492,12 +494,12 @@ IndexBuilderDataAsStxxlVector IndexImpl::passFileForVocabulary( } numTriplesProcessed++; if (progressBar.update()) { - LOG(INFO) << progressBar.getProgressString() << std::flush; + AD_LOG_INFO << progressBar.getProgressString() << std::flush; } } - LOG(TIMING) << "WaitTimes for Pipeline in msecs\n"; + AD_LOG_TIMING << "WaitTimes for Pipeline in msecs\n"; for (const auto& t : p.getWaitingTime()) { - LOG(TIMING) + AD_LOG_TIMING << std::chrono::duration_cast(t).count() << " msecs" << std::endl; } @@ -514,7 +516,7 @@ IndexBuilderDataAsStxxlVector IndexImpl::passFileForVocabulary( if (writePartialVocabularyFuture[0].valid()) { writePartialVocabularyFuture[0].get(); } - LOG(TIMING) + AD_LOG_TIMING << "Time spent waiting for the writing of a previous vocabulary: " << sortFutureTimer.msecs().count() << "ms." << std::endl; auto moveMap = [](std::optional&& el) { @@ -537,20 +539,20 @@ IndexBuilderDataAsStxxlVector IndexImpl::passFileForVocabulary( // ids actualPartialSizes.push_back(actualCurrentPartialSize); } - LOG(INFO) << progressBar.getFinalProgressString() << std::flush; + AD_LOG_INFO << progressBar.getFinalProgressString() << std::flush; for (auto& future : writePartialVocabularyFuture) { if (future.valid()) { future.get(); } } - LOG(INFO) << "Number of triples created (including QLever-internal ones): " - << (*idTriples.wlock())->size() << " [may contain duplicates]" - << std::endl; + AD_LOG_INFO << "Number of triples created (including QLever-internal ones): " + << (*idTriples.wlock())->size() << " [may contain duplicates]" + << std::endl; size_t sizeInternalVocabulary = 0; std::vector prefixes; - LOG(INFO) << "Merging partial vocabularies ..." << std::endl; + AD_LOG_INFO << "Merging partial vocabularies ..." << std::endl; const ad_utility::vocabulary_merger::VocabularyMetaData mergeRes = [&]() { auto sortPred = [cmp = &(vocab_.getCaseComparator())](std::string_view a, std::string_view b) { @@ -562,21 +564,22 @@ IndexBuilderDataAsStxxlVector IndexImpl::passFileForVocabulary( onDiskBase_, numFiles, sortPred, wordCallback, memoryLimitIndexBuilding()); }(); - LOG(DEBUG) << "Finished merging partial vocabularies" << std::endl; + AD_LOG_DEBUG << "Finished merging partial vocabularies" << std::endl; IndexBuilderDataAsStxxlVector res; res.vocabularyMetaData_ = mergeRes; idOfHasPatternDuringIndexBuilding_ = mergeRes.specialIdMapping().at(HAS_PATTERN_PREDICATE); idOfInternalGraphDuringIndexBuilding_ = mergeRes.specialIdMapping().at(QLEVER_INTERNAL_GRAPH_IRI); - LOG(INFO) << "Number of words in external vocabulary: " - << res.vocabularyMetaData_.numWordsTotal() - sizeInternalVocabulary - << std::endl; + AD_LOG_INFO << "Number of words in external vocabulary: " + << res.vocabularyMetaData_.numWordsTotal() - + sizeInternalVocabulary + << std::endl; res.idTriples = std::move(*idTriples.wlock()); res.actualPartialSizes = std::move(actualPartialSizes); - LOG(DEBUG) << "Removing temporary files ..." << std::endl; + AD_LOG_DEBUG << "Removing temporary files ..." << std::endl; for (size_t n = 0; n < numFiles; ++n) { deleteTemporaryFile(absl::StrCat(onDiskBase_, PARTIAL_VOCAB_FILE_NAME, n)); } @@ -589,10 +592,10 @@ auto IndexImpl::convertPartialToGlobalIds( TripleVec& data, const vector& actualLinesPerPartial, size_t linesPerPartial, auto isQLeverInternalTriple) -> FirstPermutationSorterAndInternalTriplesAsPso { - LOG(INFO) << "Converting triples from local IDs to global IDs ..." - << std::endl; - LOG(DEBUG) << "Triples per partial vocabulary: " << linesPerPartial - << std::endl; + AD_LOG_INFO << "Converting triples from local IDs to global IDs ..." + << std::endl; + AD_LOG_DEBUG << "Triples per partial vocabulary: " << linesPerPartial + << std::endl; // Iterate over all partial vocabularies. auto resultPtr = @@ -660,7 +663,7 @@ auto IndexImpl::convertPartialToGlobalIds( numTriplesConverted += triples->size(); numTriplesConverted += internalTriples->size(); if (progressBar.update()) { - LOG(INFO) << progressBar.getProgressString() << std::flush; + AD_LOG_INFO << progressBar.getProgressString() << std::flush; } }; }; @@ -751,7 +754,7 @@ auto IndexImpl::convertPartialToGlobalIds( } lookupQueue.finish(); writeQueue.finish(); - LOG(INFO) << progressBar.getFinalProgressString() << std::flush; + AD_LOG_INFO << progressBar.getFinalProgressString() << std::flush; return {std::move(resultPtr), std::move(internalTriplesPtr)}; } @@ -811,8 +814,8 @@ std::tuple> configurationJson_; if (configurationJson_.find("git-hash") != configurationJson_.end()) { - LOG(INFO) << "The git hash used to build this index was " - << configurationJson_["git-hash"] << std::endl; + AD_LOG_INFO << "The git hash used to build this index was " + << configurationJson_["git-hash"] << std::endl; } else { - LOG(INFO) << "The index was built before git commit hashes were stored in " - "the index meta data" - << std::endl; + AD_LOG_INFO + << "The index was built before git commit hashes were stored in " + "the index meta data" + << std::endl; } if (configurationJson_.find("index-format-version") != @@ -1011,31 +1017,34 @@ void IndexImpl::readConfiguration() { const auto& currentVersion = qlever::indexFormatVersion; if (indexFormatVersion != currentVersion) { if (indexFormatVersion.date_.toBits() > currentVersion.date_.toBits()) { - LOG(ERROR) << "The version of QLever you are using is too old for this " - "index. Please use a version of QLever that is " - "compatible with this index" - " (PR = " - << indexFormatVersion.prNumber_ << ", Date = " - << indexFormatVersion.date_.toStringAndType().first << ")." - << std::endl; + AD_LOG_ERROR + << "The version of QLever you are using is too old for this " + "index. Please use a version of QLever that is " + "compatible with this index" + " (PR = " + << indexFormatVersion.prNumber_ + << ", Date = " << indexFormatVersion.date_.toStringAndType().first + << ")." << std::endl; } else { - LOG(ERROR) << "The index is too old for this version of QLever. " - "We recommend that you rebuild the index and start the " - "server with the current master. Alternatively start the " - "engine with a version of QLever that is compatible with " - "this index (PR = " - << indexFormatVersion.prNumber_ << ", Date = " - << indexFormatVersion.date_.toStringAndType().first << ")." - << std::endl; + AD_LOG_ERROR + << "The index is too old for this version of QLever. " + "We recommend that you rebuild the index and start the " + "server with the current master. Alternatively start the " + "engine with a version of QLever that is compatible with " + "this index (PR = " + << indexFormatVersion.prNumber_ + << ", Date = " << indexFormatVersion.date_.toStringAndType().first + << ")." << std::endl; } throw std::runtime_error{ "Incompatible index format, see log message for details"}; } } else { - LOG(ERROR) << "This index was built before versioning was introduced for " - "QLever's index format. Please rebuild your index using the " - "current version of QLever." - << std::endl; + AD_LOG_ERROR + << "This index was built before versioning was introduced for " + "QLever's index format. Please rebuild your index using the " + "current version of QLever." + << std::endl; throw std::runtime_error{ "Incompatible index format, see log message for details"}; } @@ -1047,7 +1056,7 @@ void IndexImpl::readConfiguration() { } if (configurationJson_.count("ignore-case")) { - LOG(ERROR) << ERROR_IGNORE_CASE_UNSUPPORTED << '\n'; + AD_LOG_ERROR << ERROR_IGNORE_CASE_UNSUPPORTED << '\n'; throw std::runtime_error("Deprecated key \"ignore-case\" in index build"); } @@ -1058,9 +1067,10 @@ void IndexImpl::readConfiguration() { vocab_.setLocale(lang, country, ignorePunctuation); textVocab_.setLocale(lang, country, ignorePunctuation); } else { - LOG(ERROR) << "Key \"locale\" is missing in the metadata. This is probably " - "and old index build that is no longer supported by QLever. " - "Please rebuild your index\n"; + AD_LOG_ERROR + << "Key \"locale\" is missing in the metadata. This is probably " + "and old index build that is no longer supported by QLever. " + "Please rebuild your index\n"; throw std::runtime_error( "Missing required key \"locale\" in index build's metadata"); } @@ -1186,7 +1196,7 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { } if (j.count("ignore-case")) { - LOG(ERROR) << ERROR_IGNORE_CASE_UNSUPPORTED << '\n'; + AD_LOG_ERROR << ERROR_IGNORE_CASE_UNSUPPORTED << '\n'; throw std::runtime_error("Deprecated key \"ignore-case\" in settings JSON"); } @@ -1206,22 +1216,24 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { country = std::string{j["locale"]["country"]}; ignorePunctuation = bool{j["locale"]["ignore-punctuation"]}; } else { - LOG(INFO) << "Locale was not specified in settings file, default is " - "en_US" - << std::endl; + AD_LOG_INFO << "Locale was not specified in settings file, default is " + "en_US" + << std::endl; } - LOG(INFO) << "You specified \"locale = " << lang << "_" << country << "\" " - << "and \"ignore-punctuation = " << ignorePunctuation << "\"" - << std::endl; + AD_LOG_INFO << "You specified \"locale = " << lang << "_" << country + << "\" " + << "and \"ignore-punctuation = " << ignorePunctuation << "\"" + << std::endl; if (lang != LOCALE_DEFAULT_LANG || country != LOCALE_DEFAULT_COUNTRY) { - LOG(WARN) << "You are using Locale settings that differ from the default " - "language or country.\n\t" - << "This should work but is untested by the QLever team. If " - "you are running into unexpected problems,\n\t" - << "Please make sure to also report your used locale when " - "filing a bug report. Also note that changing the\n\t" - << "locale requires to completely rebuild the index\n"; + AD_LOG_WARN + << "You are using Locale settings that differ from the default " + "language or country.\n\t" + << "This should work but is untested by the QLever team. If " + "you are running into unexpected problems,\n\t" + << "Please make sure to also report your used locale when " + "filing a bug report. Also note that changing the\n\t" + << "locale requires to completely rebuild the index\n"; } vocab_.setLocale(lang, country, ignorePunctuation); textVocab_.setLocale(lang, country, ignorePunctuation); @@ -1238,19 +1250,19 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { onlyAsciiTurtlePrefixes_ = static_cast(j["ascii-prefixes-only"]); } if (onlyAsciiTurtlePrefixes_) { - LOG(INFO) << WARNING_ASCII_ONLY_PREFIXES << std::endl; + AD_LOG_INFO << WARNING_ASCII_ONLY_PREFIXES << std::endl; } if (j.count("parallel-parsing")) { useParallelParser_ = static_cast(j["parallel-parsing"]); } if (useParallelParser_) { - LOG(INFO) << WARNING_PARALLEL_PARSING << std::endl; + AD_LOG_INFO << WARNING_PARALLEL_PARSING << std::endl; } if (j.count("num-triples-per-batch")) { numTriplesPerBatch_ = size_t{j["num-triples-per-batch"]}; - LOG(INFO) + AD_LOG_INFO << "You specified \"num-triples-per-batch = " << numTriplesPerBatch_ << "\", choose a lower value if the index builder runs out of memory" << std::endl; @@ -1258,9 +1270,10 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { if (j.count("parser-batch-size")) { parserBatchSize_ = size_t{j["parser-batch-size"]}; - LOG(INFO) << "Overriding setting parser-batch-size to " << parserBatchSize_ - << " This might influence performance during index build." - << std::endl; + AD_LOG_INFO << "Overriding setting parser-batch-size to " + << parserBatchSize_ + << " This might influence performance during index build." + << std::endl; } std::string overflowingIntegersThrow = "overflowing-integers-throw"; @@ -1274,34 +1287,34 @@ void IndexImpl::readIndexBuilderSettingsFromFile() { if (j.count(key)) { auto value = static_cast(j[key]); if (value == overflowingIntegersThrow) { - LOG(INFO) << "Integers that cannot be represented by QLever will throw " - "an exception" - << std::endl; + AD_LOG_INFO << "Integers that cannot be represented by QLever will throw " + "an exception" + << std::endl; turtleParserIntegerOverflowBehavior_ = TurtleParserIntegerOverflowBehavior::Error; } else if (value == overflowingIntegersBecomeDoubles) { - LOG(INFO) << "Integers that cannot be represented by QLever will be " - "converted to doubles" - << std::endl; + AD_LOG_INFO << "Integers that cannot be represented by QLever will be " + "converted to doubles" + << std::endl; turtleParserIntegerOverflowBehavior_ = TurtleParserIntegerOverflowBehavior::OverflowingToDouble; } else if (value == allIntegersBecomeDoubles) { - LOG(INFO) << "All integers will be converted to doubles" << std::endl; + AD_LOG_INFO << "All integers will be converted to doubles" << std::endl; turtleParserIntegerOverflowBehavior_ = TurtleParserIntegerOverflowBehavior::OverflowingToDouble; } else { AD_CONTRACT_CHECK(std::ranges::find(allModes, value) == allModes.end()); - LOG(ERROR) << "Invalid value for " << key << std::endl; - LOG(INFO) << "The currently supported values are " - << absl::StrJoin(allModes, ",") << std::endl; + AD_LOG_ERROR << "Invalid value for " << key << std::endl; + AD_LOG_INFO << "The currently supported values are " + << absl::StrJoin(allModes, ",") << std::endl; } } else { turtleParserIntegerOverflowBehavior_ = TurtleParserIntegerOverflowBehavior::Error; - LOG(INFO) << "By default, integers that cannot be represented by QLever " - "will throw an " - "exception" - << std::endl; + AD_LOG_INFO << "By default, integers that cannot be represented by QLever " + "will throw an " + "exception" + << std::endl; } } @@ -1311,8 +1324,9 @@ std::future IndexImpl::writeNextPartialVocabulary( std::unique_ptr items, auto localIds, ad_utility::Synchronized>* globalWritePtr) { using namespace ad_utility::vocabulary_merger; - LOG(DEBUG) << "Input triples read in this section: " << numLines << std::endl; - LOG(DEBUG) + AD_LOG_DEBUG << "Input triples read in this section: " << numLines + << std::endl; + AD_LOG_DEBUG << "Triples processed, also counting internal triples added by QLever: " << actualCurrentPartialSize << std::endl; std::future resultFuture; @@ -1341,7 +1355,7 @@ std::future IndexImpl::writeNextPartialVocabulary( ad_utility::TimeBlockAndLog l{"creating internal mapping"}; return createInternalMapping(&vec); }(); - LOG(TRACE) << "Finished creating of Mapping vocabulary" << std::endl; + AD_LOG_TRACE << "Finished creating of Mapping vocabulary" << std::endl; // since now adjacent duplicates also have the same Ids, it suffices to // compare those { @@ -1368,7 +1382,7 @@ std::future IndexImpl::writeNextPartialVocabulary( ad_utility::TimeBlockAndLog l{"write partial vocabulary"}; writePartialVocabularyToFile(vec, partialFilename); } - LOG(TRACE) << "Finished writing the partial vocabulary" << std::endl; + AD_LOG_TRACE << "Finished writing the partial vocabulary" << std::endl; vec.clear(); { ad_utility::TimeBlockAndLog l{"writing to global file"}; diff --git a/src/util/Log.h b/src/util/Log.h index 1ba7277fb0..8b5c52df57 100644 --- a/src/util/Log.h +++ b/src/util/Log.h @@ -1,6 +1,8 @@ -// Copyright 2011, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de) +// Copyright 2011 - 2024, University of Freiburg +// Chair of Algorithms and Data Structures +// Authors: Björn Buchhold [2011 - 2014] +// Johannes Kalmbach +// Hannah Bast #pragma once @@ -20,7 +22,14 @@ #define LOGLEVEL INFO #endif -// Abseil does also define its own LOG macro, so we need to undefine it here. +// Abseil also defines its own LOG macro, so we need to undefine it here. +// +// NOTE: In case you run into trouble with this conflict, in particular, if you +// use the `LOG(INFO)` macro and you get compilation errors that mention +// `abseil`, use the (otherwise identical) `AD_LOG_INFO` macro below. +// +// TODO: Eventually replace the `LOG` macro by `AD_LOG` everywhere. + #ifdef LOG #undef LOG #endif @@ -31,6 +40,12 @@ else \ ad_utility::Log::getLog() // NOLINT +#define AD_LOG(x) \ + if (x > LOGLEVEL) \ + ; \ + else \ + ad_utility::Log::getLog() // NOLINT + enum class LogLevel { FATAL = 0, ERROR = 1, @@ -41,13 +56,21 @@ enum class LogLevel { TRACE = 6 }; +// Macros for the different log levels. Always use these instead of the old +// `LOG(...)` macro to avoid conflicts with `abseil`. +#define AD_LOG_FATAL AD_LOG(LogLevel::FATAL) +#define AD_LOG_ERROR AD_LOG(LogLevel::ERROR) +#define AD_LOG_WARN AD_LOG(LogLevel::WARN) +#define AD_LOG_INFO AD_LOG(LogLevel::INFO) +#define AD_LOG_DEBUG AD_LOG(LogLevel::DEBUG) +#define AD_LOG_TIMING AD_LOG(LogLevel::TIMING) +#define AD_LOG_TRACE AD_LOG(LogLevel::TRACE) + using enum LogLevel; namespace ad_utility { -/* A singleton (According to Scott Meyer's pattern) that holds - * a pointer to a single std::ostream. This enables us to globally - * redirect the LOG(LEVEL) macros to another location. - */ +// A singleton that holds a pointer to a single `std::ostream`. This enables us +// to globally redirect the `AD_LOG_...` macros to another output stream. struct LogstreamChoice { std::ostream& getStream() { return *_stream; } void setStream(std::ostream* stream) { _stream = stream; } @@ -65,19 +88,15 @@ struct LogstreamChoice { // default to cout since it was the default before std::ostream* _stream = &std::cout; +}; -}; // struct LogstreamChoice - -/** @brief Redirect every LOG(LEVEL) macro that is called afterwards - * to the stream that the argument points to. - * Typically called in the main function of an executable. - */ -inline void setGlobalLoggingStream(std::ostream* streamPtr) { - LogstreamChoice::get().setStream(streamPtr); +// After this call, every use of `AD_LOG_...` will use the specified stream. +// Used in various tests to redirect or suppress logging output. +inline void setGlobalLoggingStream(std::ostream* stream) { + LogstreamChoice::get().setStream(stream); } -using std::string; -//! Helper class to get thousandth separators in a locale +// Helper class to get thousandth separators in a locale class CommaNumPunct : public std::numpunct { protected: virtual char do_thousands_sep() const { return ','; } @@ -87,7 +106,7 @@ class CommaNumPunct : public std::numpunct { const static std::locale commaLocale(std::locale(), new CommaNumPunct()); -//! Log +// The class that actually does the logging. class Log { public: template @@ -99,7 +118,7 @@ class Log { static void imbue(const std::locale& locale) { std::cout.imbue(locale); } - static string getTimeStamp() { + static std::string getTimeStamp() { return absl::FormatTime("%Y-%m-%d %H:%M:%E3S", absl::Now(), absl::LocalTimeZone()); }