Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add BlankNode support for SERVICE #1504

Merged
merged 18 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/engine/LocalVocab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,15 @@ std::vector<LocalVocab::LiteralOrIri> LocalVocab::getAllWordsForTesting()
}
return result;
}

// _____________________________________________________________________________
BlankNodeIndex LocalVocab::getBlankNodeIndex(
ad_utility::BlankNodeManager* blankNodeManager) {
// Initialize the `localBlankNodeManager_` if it doesn't exist yet.
if (!localBlankNodeManager_) [[unlikely]] {
localBlankNodeManager_ =
joka921 marked this conversation as resolved.
Show resolved Hide resolved
std::make_unique<ad_utility::BlankNodeManager::LocalBlankNodeManager>(
blankNodeManager);
}
return BlankNodeIndex::make(localBlankNodeManager_->getId());
}
8 changes: 8 additions & 0 deletions src/engine/LocalVocab.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "absl/container/node_hash_set.h"
#include "global/Id.h"
#include "parser/LiteralOrIri.h"
#include "util/BlankNodeManager.h"

// A class for maintaining a local vocabulary with contiguous (local) IDs. This
// is meant for words that are not part of the normal vocabulary (constructed
Expand All @@ -38,6 +39,9 @@ class LocalVocab {
auto& primaryWordSet() { return *primaryWordSet_; }
const auto& primaryWordSet() const { return *primaryWordSet_; }

std::unique_ptr<ad_utility::BlankNodeManager::LocalBlankNodeManager>
localBlankNodeManager_;

public:
// Create a new, empty local vocabulary.
LocalVocab() = default;
Expand Down Expand Up @@ -90,6 +94,10 @@ class LocalVocab {
// Return all the words from all the word sets as a vector.
std::vector<LiteralOrIri> getAllWordsForTesting() const;

// Get a new BlankNodeIndex using the LocalBlankNodeManager.
[[nodiscard]] BlankNodeIndex getBlankNodeIndex(
ad_utility::BlankNodeManager* blankNodeManager);

private:
// Common implementation for the two variants of
// `getIndexAndAddIfNotContainedImpl` above.
Expand Down
3 changes: 3 additions & 0 deletions src/engine/QueryExecutionContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

#pragma once

#include <cstdint>
#include <limits>
#include <memory>
#include <shared_mutex>
#include <string>
Expand All @@ -14,6 +16,7 @@
#include "engine/Result.h"
#include "engine/RuntimeInformation.h"
#include "engine/SortPerformanceEstimator.h"
#include "global/Constants.h"
#include "global/Id.h"
#include "index/Index.h"
#include "util/Cache.h"
Expand Down
25 changes: 17 additions & 8 deletions src/engine/Service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "parser/RdfParser.h"
#include "parser/TokenizerCtre.h"
#include "util/Exception.h"
#include "util/HashMap.h"
#include "util/HashSet.h"
#include "util/StringUtils.h"
#include "util/http/HttpUtils.h"
Expand Down Expand Up @@ -204,14 +205,18 @@ void Service::writeJsonResult(const std::vector<std::string>& vars,
IdTableStatic<I> idTable = std::move(*idTablePtr).toStatic<I>();
checkCancellation();
std::vector<size_t> numLocalVocabPerColumn(idTable.numColumns());
// TODO<joka921> We should include a memory limit, as soon as we can do proper
// memory-limited HashMaps.
ad_utility::HashMap<std::string, Id> blankNodeMap;
joka921 marked this conversation as resolved.
Show resolved Hide resolved

auto writeBindings = [&](const nlohmann::json& bindings, size_t& rowIdx) {
for (const auto& binding : bindings) {
idTable.emplace_back();
for (size_t colIdx = 0; colIdx < vars.size(); ++colIdx) {
TripleComponent tc =
binding.contains(vars[colIdx])
? bindingToTripleComponent(binding[vars[colIdx]])
? bindingToTripleComponent(binding[vars[colIdx]], blankNodeMap,
localVocab)
: TripleComponent::UNDEF();

Id id = std::move(tc).toValueId(getIndex().getVocab(), *localVocab);
Expand Down Expand Up @@ -359,7 +364,9 @@ std::optional<std::string> Service::getSiblingValuesClause() const {

// ____________________________________________________________________________
TripleComponent Service::bindingToTripleComponent(
const nlohmann::json& binding) {
const nlohmann::json& binding,
ad_utility::HashMap<std::string, Id>& blankNodeMap,
LocalVocab* localVocab) const {
if (!binding.contains("type") || !binding.contains("value")) {
throw std::runtime_error(absl::StrCat(
"Missing type or value field in binding. The binding is: '",
Expand All @@ -368,6 +375,8 @@ TripleComponent Service::bindingToTripleComponent(

const auto type = binding["type"].get<std::string_view>();
const auto value = binding["value"].get<std::string_view>();
auto blankNodeManagerPtr =
getExecutionContext()->getIndex().getBlankNodeManager();

TripleComponent tc;
if (type == "literal") {
Expand All @@ -386,12 +395,12 @@ TripleComponent Service::bindingToTripleComponent(
} else if (type == "uri") {
tc = TripleComponent::Iri::fromIrirefWithoutBrackets(value);
} else if (type == "bnode") {
throw std::runtime_error(
"Blank nodes in the result of a SERVICE are currently not "
"supported. "
"For now, consider filtering them out using the ISBLANK function "
"or "
"converting them via the STR function.");
auto [it, wasNew] = blankNodeMap.try_emplace(value, Id());
if (wasNew) {
it->second = Id::makeFromBlankNodeIndex(
localVocab->getBlankNodeIndex(blankNodeManagerPtr));
}
tc = it->second;
} else {
throw std::runtime_error(absl::StrCat("Type ", type,
" is undefined. The binding is: '",
Expand Down
6 changes: 4 additions & 2 deletions src/engine/Service.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,10 @@ class Service : public Operation {
vector<QueryExecutionTree*> getChildren() override { return {}; }

// Convert the given binding to TripleComponent.
static TripleComponent bindingToTripleComponent(
const nlohmann::json& binding);
TripleComponent bindingToTripleComponent(
const nlohmann::json& binding,
ad_utility::HashMap<std::string, Id>& blankNodeMap,
LocalVocab* localVocab) const;

// Create a value for the VALUES-clause used in `getSiblingValuesClause` from
// id. If the id is of type blank node `std::nullopt` is returned.
Expand Down
1 change: 1 addition & 0 deletions src/global/Constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#pragma once

#include <atomic>
#include <chrono>
#include <ctre.hpp>
#include <limits>
Expand Down
4 changes: 4 additions & 0 deletions src/index/Index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ auto Index::getTextVocab() const -> const TextVocab& {
return pimpl_->getTextVocab();
}

ad_utility::BlankNodeManager* Index::getBlankNodeManager() const {
return pimpl_->blankNodeManager_.get();
joka921 marked this conversation as resolved.
Show resolved Hide resolved
}

// ____________________________________________________________________________
size_t Index::getCardinality(const TripleComponent& comp,
Permutation::Enum p) const {
Expand Down
3 changes: 3 additions & 0 deletions src/index/Index.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ class Index {
Vocabulary<std::string, SimpleStringComparator, WordVocabIndex>;
[[nodiscard]] const TextVocab& getTextVocab() const;

// Get a (non-owning) pointer to the BlankNodeManager of this Index.
ad_utility::BlankNodeManager* getBlankNodeManager() const;

// --------------------------------------------------------------------------
// RDF RETRIEVAL
// --------------------------------------------------------------------------
Expand Down
9 changes: 9 additions & 0 deletions src/index/IndexImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,9 @@ void IndexImpl::createFromFile(const string& filename, Index::Filetype type) {
configurationJson_["has-all-permutations"] = true;
}

configurationJson_["num-blank-nodes-total"] =
indexBuilderData.vocabularyMetaData_.getNextBlankNodeIndex();

addInternalStatisticsToConfiguration(numTriplesInternal,
numPredicatesInternal);
LOG(INFO) << "Index build completed" << std::endl;
Expand Down Expand Up @@ -1060,6 +1063,12 @@ void IndexImpl::readConfiguration() {
loadDataMember("num-objects", numObjects_, NumNormalAndInternal{});
loadDataMember("num-triples", numTriples_, NumNormalAndInternal{});

// Initialize BlankNodeManager
uint64_t numBlankNodesTotal;
loadDataMember("num-blank-nodes-total", numBlankNodesTotal);
blankNodeManager_ =
std::make_unique<ad_utility::BlankNodeManager>(numBlankNodesTotal);

// Compute unique ID for this index.
//
// TODO: This is a simplistic way. It would be better to incorporate bytes
Expand Down
3 changes: 3 additions & 0 deletions src/index/IndexImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ class IndexImpl {

using NumNormalAndInternal = Index::NumNormalAndInternal;

// BlankNodeManager, initialized during `readConfiguration`
std::unique_ptr<ad_utility::BlankNodeManager> blankNodeManager_{nullptr};

// Private data members.
private:
string onDiskBase_;
Expand Down
2 changes: 1 addition & 1 deletion src/index/VocabularyMerger.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ struct VocabularyMetaData {
Id begin() const { return begin_; }
Id end() const { return end_; }

// Return true iff the `id` belongs to this range.
// Return true if the `id` belongs to this range.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Return true if the `id` belongs to this range.
// Return true iff the `id` belongs to this range.

That is not really a typo.
In mathematics and therefore also in computer since you write iff with two ffs for if and only if to be more precise.

bool contains(Id id) const { return begin_ <= id && id < end_; }

private:
Expand Down
65 changes: 65 additions & 0 deletions src/util/BlankNodeManager.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Moritz Dom ([email protected])

#include "util/BlankNodeManager.h"

namespace ad_utility {

// _____________________________________________________________________________
BlankNodeManager::BlankNodeManager(uint64_t minIndex)
: minIndex_(minIndex),
randBlockIndex_(
SlowRandomIntGenerator<uint64_t>(0, totalAvailableBlocks_ - 1)) {}

// _____________________________________________________________________________
BlankNodeManager::Block BlankNodeManager::allocateBlock() {
// The Random-Generation Algorithm's performance is reduced once the number of
// used blocks exceeds a limit.
AD_CORRECTNESS_CHECK(usedBlocksSet_.rlock()->size() <
totalAvailableBlocks_ / 256);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a message to this check (the second argument to the CORRECTNESS_CHECK can be a string or a lambda that returns a string., s.t. we know, what has gone wrong here.


uint64_t newBlockIndex = randBlockIndex_();
{
auto usedBlocksSetPtr_ = usedBlocksSet_.wlock();
while (usedBlocksSetPtr_->contains(newBlockIndex)) {
newBlockIndex = randBlockIndex_();
}
usedBlocksSetPtr_->insert(newBlockIndex);
}
return Block(newBlockIndex, minIndex_ + newBlockIndex * blockSize_);
}

// _____________________________________________________________________________
void BlankNodeManager::freeBlock(uint64_t blockIndex) {
usedBlocksSet_.wlock()->erase(blockIndex);
}

// _____________________________________________________________________________
BlankNodeManager::Block::Block(uint64_t blockIndex, uint64_t startIndex)
: blockIdx_(blockIndex), nextIdx_(startIndex) {}

// _____________________________________________________________________________
BlankNodeManager::LocalBlankNodeManager::LocalBlankNodeManager(
BlankNodeManager* blankNodeManager)
: blankNodeManager_(blankNodeManager) {}

// _____________________________________________________________________________
BlankNodeManager::LocalBlankNodeManager::~LocalBlankNodeManager() {
for (auto block : blocks_) {
blankNodeManager_->freeBlock(block.blockIdx_);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This takes one lock + unlock per block, which is a waste of resources.
You can either create a templated function freeBlocks s.t. you here can write manager_->freeBlocks(blocks_ | std::views::transform(...)).
or you make the LocalBlankNodeManager a friend of the BlankNodeManager and handle the locking here yourself.

}

// _____________________________________________________________________________
uint64_t BlankNodeManager::LocalBlankNodeManager::getId() {
if (blocks_.empty() ||
blocks_.back().nextIdx_ ==
(blankNodeManager_->minIndex_ + blocks_.back().blockIdx_ + 1) *
blockSize_) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can explicitly store the lastIndex or the endIndex in the Block , that way you don't have to fiddle with the internals here.

blocks_.emplace_back(blankNodeManager_->allocateBlock());
}
return blocks_.back().nextIdx_++;
}

} // namespace ad_utility
91 changes: 91 additions & 0 deletions src/util/BlankNodeManager.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Moritz Dom ([email protected])

#pragma once

#include <gtest/gtest_prod.h>

#include <vector>

#include "global/ValueId.h"
#include "util/HashSet.h"
#include "util/Random.h"
#include "util/Synchronized.h"

namespace ad_utility {
/*
* Manager class for Blank node indices added after indexing time.
*/
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please elaborate a little bit, e.g. what are local blank node indices, what does this class do/manage, etc.

class BlankNodeManager {
public:
// Minimum index.
const uint64_t minIndex_;

// Number of indices that make up a single block.
static constexpr uint blockSize_ = 1000;

// Number of blocks available.
const uint64_t totalAvailableBlocks_ =
(ValueId::maxIndex - minIndex_ + 1) / blockSize_;

private:
// Int Generator yielding random block indices.
SlowRandomIntGenerator<uint64_t> randBlockIndex_;

// Tracks blocks currently used by instances of `LocalBlankNodeManager`.
Synchronized<HashSet<uint64_t>> usedBlocksSet_;

public:
// Constructor, where `minIndex` is the minimum index such that all managed
// indices are in [`minIndex_`, `ValueId::maxIndex`]. Currently `minIndex_` is
// determined by the number of BlankNodes in the current Index.
explicit BlankNodeManager(uint64_t minIndex = 0);
~BlankNodeManager() = default;

// A BlankNodeIndex Block of size `blockSize_`.
class Block {
// Intentional private constructor, allowing only the BlankNodeManager to
// create Blocks (for a `LocalBlankNodeManager`).
explicit Block(uint64_t blockIndex, uint64_t startIndex);
friend class BlankNodeManager;

public:
~Block() = default;
// The index of this block.
uint64_t blockIdx_;
// The next free index within this block.
uint64_t nextIdx_;
};

// Manages the BlankNodes used within a LocalVocab.
class LocalBlankNodeManager {
public:
explicit LocalBlankNodeManager(BlankNodeManager* blankNodeManager);
~LocalBlankNodeManager();

// Get a new id.
[[nodiscard]] uint64_t getId();

private:
// Reserved blocks.
std::vector<BlankNodeManager::Block> blocks_;

// Reference of the BlankNodeManager, used to free the reserved blocks.
BlankNodeManager* const blankNodeManager_;

FRIEND_TEST(BlankNodeManager, LocalBlankNodeManagerGetID);
};

void setInitialIndex(uint64_t idx);

// Allocate and retrieve a block of free ids.
[[nodiscard]] Block allocateBlock();

// Free a block of ids.
void freeBlock(uint64_t blockIndex);

FRIEND_TEST(BlankNodeManager, blockAllocationAndFree);
};

} // namespace ad_utility
2 changes: 1 addition & 1 deletion src/util/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
add_subdirectory(ConfigManager)
add_subdirectory(MemorySize)
add_subdirectory(http)
add_library(util GeoSparqlHelpers.cpp antlr/ANTLRErrorHandling.cpp ParseException.cpp Conversions.cpp Date.cpp DateYearDuration.cpp Duration.cpp antlr/GenerateAntlrExceptionMetadata.cpp CancellationHandle.cpp StringUtils.cpp LazyJsonParser.cpp)
add_library(util GeoSparqlHelpers.cpp antlr/ANTLRErrorHandling.cpp ParseException.cpp Conversions.cpp Date.cpp DateYearDuration.cpp Duration.cpp antlr/GenerateAntlrExceptionMetadata.cpp CancellationHandle.cpp StringUtils.cpp LazyJsonParser.cpp BlankNodeManager.cpp)
qlever_target_link_libraries(util re2::re2)
2 changes: 1 addition & 1 deletion src/util/Synchronized.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ class Synchronized {
return f(data_);
}

/// const overload of with WriteLock
/// const overload of `withWriteLock`
template <typename F>
auto withWriteLock(F f) const {
std::lock_guard l(mutex());
Expand Down
Loading
Loading