Skip to content

Commit

Permalink
SPARQL Update Part 1: Located Triples (#1379)
Browse files Browse the repository at this point in the history
This is the first PR in a series of PRs that will implement SPARQL UPDATE for QLever.

It implements `LocatedTriples`. These are triples that know to which block in a given permutation they belong to and whether they are to be inserted or deleted. They come with the functionality to find the corresponding blocks for a set of triples (to turn them into `LocatedTriples` as well as the functionality to merge a block from a permutation with all the `LocatedTriples` that belong to this block. The latter function is the core functionality to incorporate the results of an UPDATE into subsequent queries.
  • Loading branch information
Qup42 authored Jul 18, 2024
1 parent 6a6a4b8 commit ee0e2ac
Show file tree
Hide file tree
Showing 10 changed files with 1,269 additions and 4 deletions.
11 changes: 11 additions & 0 deletions src/engine/idTable/IdTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,17 @@ class IdTableStatic
*(static_cast<Base*>(this)) = std::move(b);
return *this;
}

// This operator is only for debugging and testing. It returns a
// human-readable representation.
friend std::ostream& operator<<(std::ostream& os,
const IdTableStatic& idTable) {
os << "{ ";
std::ranges::copy(
idTable, std::ostream_iterator<columnBasedIdTable::Row<Id>>(os, " "));
os << "}";
return os;
}
};

// This was previously implemented as an alias (`using IdTable =
Expand Down
12 changes: 12 additions & 0 deletions src/engine/idTable/IdTableRow.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <variant>
#include <vector>

#include "global/Id.h"
#include "util/Enums.h"
#include "util/Exception.h"
#include "util/Forward.h"
Expand Down Expand Up @@ -93,6 +94,17 @@ class Row {
std::ranges::copy(*this, result.begin());
return result;
}

// This operator is only for debugging and testing. It returns a
// human-readable representation.
friend std::ostream& operator<<(std::ostream& os, const Row& idTableRow)
requires(std::is_same_v<T, Id>) {
os << "(";
for (size_t i = 0; i < idTableRow.numColumns(); ++i) {
os << idTableRow[i] << (i < idTableRow.numColumns() - 1 ? " " : ")");
}
return os;
}
};

// The following two classes store a reference to a row in the underlying
Expand Down
59 changes: 56 additions & 3 deletions src/global/IdTriple.h
Original file line number Diff line number Diff line change
@@ -1,12 +1,65 @@
// Copyright 2024, University of Freiburg
// Chair of Algorithms and Data Structures
// Authors: Hannah Bast <[email protected]>
// Authors:
// 2023 Hannah Bast <[email protected]>
// 2024 Julian Mundhahs <[email protected]>

#pragma once

#include <array>
#include <ostream>

#include "global/Id.h"
#include "index/CompressedRelation.h"

// Should we have an own class for this? We need this at several places.
using IdTriple = std::array<Id, 3>;
template <size_t N = 0>
struct IdTriple {
// The three IDs that define the triple.
std::array<Id, 3> ids_;
// Some additional payload of the triple, e.g. which graph it belongs to.
std::array<Id, N> payload_;

explicit IdTriple(const std::array<Id, 3>& ids) requires(N == 0)
: ids_(ids), payload_(){};

explicit IdTriple(const std::array<Id, 3>& ids,
const std::array<Id, N>& payload) requires(N != 0)
: ids_(ids), payload_(payload){};

friend std::ostream& operator<<(std::ostream& os, const IdTriple& triple) {
os << "IdTriple(";
std::ranges::copy(triple.ids_, std::ostream_iterator<Id>(os, ", "));
std::ranges::copy(triple.payload_, std::ostream_iterator<Id>(os, ", "));
os << ")";
return os;
}

// TODO: default once we drop clang16 with libc++16
std::strong_ordering operator<=>(const IdTriple& other) const {
return std::tie(ids_[0], ids_[1], ids_[2]) <=>
std::tie(other.ids_[0], other.ids_[1], other.ids_[2]);
}
bool operator==(const IdTriple& other) const = default;

template <typename H>
friend H AbslHashValue(H h, const IdTriple& c) {
return H::combine(std::move(h), c.ids_, c.payload_);
}

// Permutes the ID of this triple according to the given permutation given by
// its keyOrder.
IdTriple<N> permute(const std::array<size_t, 3>& keyOrder) const {
std::array<Id, 3> newIds{ids_[keyOrder[0]], ids_[keyOrder[1]],
ids_[keyOrder[2]]};
if constexpr (N == 0) {
return IdTriple<N>(newIds);
} else {
return IdTriple<N>(newIds, payload_);
}
}

CompressedBlockMetadata::PermutedTriple toPermutedTriple() const
requires(N == 0) {
return {ids_[0], ids_[1], ids_[2]};
}
};
2 changes: 1 addition & 1 deletion src/index/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ add_subdirectory(vocabulary)
add_library(index
Index.cpp IndexImpl.cpp IndexImpl.Text.cpp
Vocabulary.cpp VocabularyOnDisk.cpp
Permutation.cpp TextMetaData.cpp
LocatedTriples.cpp Permutation.cpp TextMetaData.cpp
DocsDB.cpp FTSAlgorithms.cpp
PrefixHeuristic.cpp CompressedRelation.cpp
PatternCreator.cpp)
Expand Down
245 changes: 245 additions & 0 deletions src/index/LocatedTriples.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,245 @@
// Copyright 2023 - 2024, University of Freiburg
// Chair of Algorithms and Data Structures
// Authors:
// 2023 Hannah Bast <[email protected]>
// 2024 Julian Mundhahs <[email protected]>

#include "index/LocatedTriples.h"

#include <algorithm>

#include "absl/strings/str_join.h"
#include "index/CompressedRelation.h"
#include "util/ChunkedForLoop.h"

// ____________________________________________________________________________
std::vector<LocatedTriple> LocatedTriple::locateTriplesInPermutation(
std::span<const IdTriple<0>> triples,
std::span<const CompressedBlockMetadata> blockMetadata,
const std::array<size_t, 3>& keyOrder, bool shouldExist,
ad_utility::SharedCancellationHandle cancellationHandle) {
std::vector<LocatedTriple> out;
out.reserve(triples.size());
ad_utility::chunkedForLoop<10'000>(
0, triples.size(),
[&triples, &out, &blockMetadata, &keyOrder, &shouldExist](size_t i) {
auto triple = triples[i].permute(keyOrder);
// A triple belongs to the first block that contains at least one triple
// that larger than or equal to the triple. See `LocatedTriples.h` for a
// discussion of the corner cases.
size_t blockIndex =
std::ranges::lower_bound(blockMetadata, triple.toPermutedTriple(),
std::less<>{},
&CompressedBlockMetadata::lastTriple_) -
blockMetadata.begin();
out.emplace_back(blockIndex, triple, shouldExist);
},
[&cancellationHandle]() { cancellationHandle->throwIfCancelled(); });

return out;
}

// ____________________________________________________________________________
bool LocatedTriplesPerBlock::hasUpdates(size_t blockIndex) const {
return map_.contains(blockIndex);
}

// ____________________________________________________________________________
NumAddedAndDeleted LocatedTriplesPerBlock::numTriples(size_t blockIndex) const {
// If no located triples for `blockIndex_` exist, there is no entry in `map_`.
if (!hasUpdates(blockIndex)) {
return {0, 0};
}

auto blockUpdateTriples = map_.at(blockIndex);
size_t countInserts = std::ranges::count_if(
blockUpdateTriples, &LocatedTriple::shouldTripleExist_);
return {countInserts, blockUpdateTriples.size() - countInserts};
}

// ____________________________________________________________________________
// Collect the relevant entries of a LocatedTriple into a triple.
template <size_t numIndexColumns>
requires(numIndexColumns >= 1 && numIndexColumns <= 3)
auto tieIdTableRow(auto& row) {
return [&row]<size_t... I>(std::index_sequence<I...>) {
return std::tie(row[I]...);
}(std::make_index_sequence<numIndexColumns>{});
}

// ____________________________________________________________________________
// Collect the relevant entries of a LocatedTriple into a triple.
template <size_t numIndexColumns>
requires(numIndexColumns >= 1 && numIndexColumns <= 3)
auto tieLocatedTriple(auto& lt) {
auto& ids = lt->triple_.ids_;
return [&ids]<size_t... I>(std::index_sequence<I...>) {
return std::tie(ids[3 - numIndexColumns + I]...);
}(std::make_index_sequence<numIndexColumns>{});
}

// ____________________________________________________________________________
template <size_t numIndexColumns>
IdTable LocatedTriplesPerBlock::mergeTriplesImpl(size_t blockIndex,
const IdTable& block) const {
// This method should only be called if there are located triples in the
// specified block.
AD_CONTRACT_CHECK(map_.contains(blockIndex));

AD_CONTRACT_CHECK(numIndexColumns <= block.numColumns());

auto numInsertsAndDeletes = numTriples(blockIndex);
IdTable result{block.numColumns(), block.getAllocator()};
result.resize(block.numRows() + numInsertsAndDeletes.numAdded_);

const auto& locatedTriples = map_.at(blockIndex);

auto lessThan = [](const auto& lt, const auto& row) {
return tieLocatedTriple<numIndexColumns>(lt) <
tieIdTableRow<numIndexColumns>(row);
};
auto equal = [](const auto& lt, const auto& row) {
return tieLocatedTriple<numIndexColumns>(lt) ==
tieIdTableRow<numIndexColumns>(row);
};

auto rowIt = block.begin();
auto locatedTripleIt = locatedTriples.begin();
auto resultIt = result.begin();

auto writeTripleToResult = [&result, &resultIt](auto& locatedTriple) {
for (size_t i = 0; i < numIndexColumns; i++) {
(*resultIt)[i] = locatedTriple.triple_.ids_[3 - numIndexColumns + i];
}
// Write UNDEF to any additional columns.
for (size_t i = numIndexColumns; i < result.numColumns(); i++) {
(*resultIt)[i] = ValueId::makeUndefined();
}
resultIt++;
};

while (rowIt != block.end() && locatedTripleIt != locatedTriples.end()) {
if (lessThan(locatedTripleIt, *rowIt)) {
if (locatedTripleIt->shouldTripleExist_) {
// Insertion of a non-existent triple.
writeTripleToResult(*locatedTripleIt);
}
locatedTripleIt++;
} else if (equal(locatedTripleIt, *rowIt)) {
if (!locatedTripleIt->shouldTripleExist_) {
// Deletion of an existing triple.
rowIt++;
}
locatedTripleIt++;
} else {
// The rowIt is not deleted - copy it
*resultIt++ = *rowIt++;
}
}

if (locatedTripleIt != locatedTriples.end()) {
AD_CORRECTNESS_CHECK(rowIt == block.end());
std::ranges::for_each(
std::ranges::subrange(locatedTripleIt, locatedTriples.end()) |
std::views::filter(&LocatedTriple::shouldTripleExist_),
writeTripleToResult);
}
if (rowIt != block.end()) {
AD_CORRECTNESS_CHECK(locatedTripleIt == locatedTriples.end());
while (rowIt != block.end()) {
*resultIt++ = *rowIt++;
}
}

result.resize(resultIt - result.begin());
return result;
}

// ____________________________________________________________________________
IdTable LocatedTriplesPerBlock::mergeTriples(size_t blockIndex,
const IdTable& block,
size_t numIndexColumns) const {
if (numIndexColumns == 3) {
return mergeTriplesImpl<3>(blockIndex, block);
} else if (numIndexColumns == 2) {
return mergeTriplesImpl<2>(blockIndex, block);
} else {
AD_CORRECTNESS_CHECK(numIndexColumns == 1);
return mergeTriplesImpl<1>(blockIndex, block);
}
}

// ____________________________________________________________________________
std::vector<LocatedTriples::iterator> LocatedTriplesPerBlock::add(
std::span<const LocatedTriple> locatedTriples) {
std::vector<LocatedTriples::iterator> handles;
handles.reserve(locatedTriples.size());
for (auto triple : locatedTriples) {
LocatedTriples& locatedTriplesInBlock = map_[triple.blockIndex_];
auto [handle, wasInserted] = locatedTriplesInBlock.emplace(triple);
AD_CORRECTNESS_CHECK(wasInserted == true);
AD_CORRECTNESS_CHECK(handle != locatedTriplesInBlock.end());
++numTriples_;
handles.emplace_back(handle);
}

updateAugmentedMetadata();

return handles;
}

// ____________________________________________________________________________
void LocatedTriplesPerBlock::erase(size_t blockIndex,
LocatedTriples::iterator iter) {
auto blockIter = map_.find(blockIndex);
AD_CONTRACT_CHECK(blockIter != map_.end(), "Block ", blockIndex,
" is not contained.");
auto& block = blockIter->second;
block.erase(iter);
numTriples_--;
if (block.empty()) {
map_.erase(blockIndex);
}
updateAugmentedMetadata();
}

// ____________________________________________________________________________
void LocatedTriplesPerBlock::setOriginalMetadata(
std::vector<CompressedBlockMetadata> metadata) {
originalMetadata_ = std::move(metadata);
updateAugmentedMetadata();
}

// ____________________________________________________________________________
void LocatedTriplesPerBlock::updateAugmentedMetadata() {
// TODO<C++23> use view::enumerate
size_t blockIndex = 0;
// Copy to preserve originalMetadata_.
augmentedMetadata_ = originalMetadata_;
for (auto& blockMetadata : augmentedMetadata_.value()) {
if (hasUpdates(blockIndex)) {
const auto& blockUpdates = map_.at(blockIndex);
blockMetadata.firstTriple_ =
std::min(blockMetadata.firstTriple_,
blockUpdates.begin()->triple_.toPermutedTriple());
blockMetadata.lastTriple_ =
std::max(blockMetadata.lastTriple_,
blockUpdates.rbegin()->triple_.toPermutedTriple());
}
blockIndex++;
}
}

// ____________________________________________________________________________
std::ostream& operator<<(std::ostream& os, const LocatedTriples& lts) {
os << "{ ";
std::ranges::copy(lts, std::ostream_iterator<LocatedTriple>(os, " "));
os << "}";
return os;
}

// ____________________________________________________________________________
std::ostream& operator<<(std::ostream& os, const std::vector<IdTriple<0>>& v) {
std::ranges::copy(v, std::ostream_iterator<IdTriple<0>>(os, ", "));
return os;
}
Loading

0 comments on commit ee0e2ac

Please sign in to comment.