From 2a09eceb9f9a85af6d2b8def4e6d4abaa18412fc Mon Sep 17 00:00:00 2001 From: Johannes Kalmbach Date: Fri, 8 Nov 2024 14:21:21 +0100 Subject: [PATCH] Add `DeltaTriplesManager` (#1603) The already existing `DeltaTriples` class maintains a dynamically changing set of insertions and deletions relative to the original input data, together with a (single) local vocab. The class is not threadsafe and has to be used with care. In particular, concurrent update queries have to be serialized, and while a query makes use of the "delta triples", it has to be made sure that they are not changed over the course of the processing of that query. Both of these problems are solved by the new `DeltaTriplesManager` class. The index has a single object of this class. It maintains a single `DeltaTriples` object, write access to which is strictly serialized. Each new query gets a so-called *snapshot* of the current delta triples. This is a full copy (of the delta triples located in each of the permutations and of the local vocab). These snapshots are read-only and multiple queries can share the same snapshot. A snapshot lives as long as one query using it is still being processed. --- src/engine/CountAvailablePredicates.cpp | 7 +- src/engine/GroupBy.cpp | 10 +-- src/engine/HasPredicateScan.cpp | 13 +-- src/engine/IndexScan.cpp | 12 +-- src/engine/Operation.h | 4 +- src/engine/QueryExecutionContext.h | 25 +++--- src/engine/QueryExecutionTree.cpp | 9 +-- src/engine/QueryExecutionTree.h | 8 +- src/index/DeltaTriples.cpp | 62 +++++++++++--- src/index/DeltaTriples.h | 94 +++++++++++++++++----- src/index/Index.cpp | 49 +++++++----- src/index/Index.h | 36 +++++---- src/index/IndexImpl.cpp | 35 ++++---- src/index/IndexImpl.h | 33 +++++--- src/index/LocatedTriples.cpp | 16 ++++ src/index/LocatedTriples.h | 10 ++- src/index/Permutation.cpp | 58 ++++++++------ src/index/Permutation.h | 41 +++++----- test/DeltaTriplesTest.cpp | 102 ++++++++++++++++++++++++ test/DeltaTriplesTestHelpers.h | 2 +- test/IndexTest.cpp | 66 +++++++++------ test/util/IndexTestHelpers.cpp | 16 ++-- 22 files changed, 497 insertions(+), 211 deletions(-) diff --git a/src/engine/CountAvailablePredicates.cpp b/src/engine/CountAvailablePredicates.cpp index 095015d6a2..e78fcca694 100644 --- a/src/engine/CountAvailablePredicates.cpp +++ b/src/engine/CountAvailablePredicates.cpp @@ -165,9 +165,10 @@ void CountAvailablePredicates::computePatternTrickAllEntities( TripleComponent::Iri::fromIriref(HAS_PATTERN_PREDICATE), std::nullopt, std::nullopt} .toScanSpecification(index); - auto fullHasPattern = index.getPermutation(Permutation::Enum::PSO) - .lazyScan(scanSpec, std::nullopt, {}, - cancellationHandle_, deltaTriples()); + auto fullHasPattern = + index.getPermutation(Permutation::Enum::PSO) + .lazyScan(scanSpec, std::nullopt, {}, cancellationHandle_, + locatedTriplesSnapshot()); for (const auto& idTable : fullHasPattern) { for (const auto& patternId : idTable.getColumn(1)) { AD_CORRECTNESS_CHECK(patternId.getDatatype() == Datatype::Int); diff --git a/src/engine/GroupBy.cpp b/src/engine/GroupBy.cpp index cc00887845..e6ff853c48 100644 --- a/src/engine/GroupBy.cpp +++ b/src/engine/GroupBy.cpp @@ -665,7 +665,7 @@ std::optional GroupBy::computeGroupByObjectWithCount() const { getExecutionContext()->getIndex().getPimpl().getPermutation( indexScan->permutation()); auto result = permutation.getDistinctCol1IdsAndCounts( - col0Id.value(), cancellationHandle_, deltaTriples()); + col0Id.value(), cancellationHandle_, locatedTriplesSnapshot()); indexScan->updateRuntimeInformationWhenOptimizedOut( {}, RuntimeInformation::Status::optimizedOut); @@ -717,8 +717,8 @@ std::optional GroupBy::computeGroupByForFullIndexScan() const { const auto& permutation = getExecutionContext()->getIndex().getPimpl().getPermutation( permutationEnum.value()); - auto table = permutation.getDistinctCol0IdsAndCounts(cancellationHandle_, - deltaTriples()); + auto table = permutation.getDistinctCol0IdsAndCounts( + cancellationHandle_, locatedTriplesSnapshot()); if (numCounts == 0) { table.setColumnSubset({{0}}); } @@ -840,7 +840,7 @@ std::optional GroupBy::computeGroupByForJoinWithFullScan() const { Id currentId = subresult->idTable()(0, columnIndex); size_t currentCount = 0; size_t currentCardinality = - index.getCardinality(currentId, permutation, deltaTriples()); + index.getCardinality(currentId, permutation, locatedTriplesSnapshot()); auto pushRow = [&]() { // If the count is 0 this means that the element with the `currentId` @@ -863,7 +863,7 @@ std::optional GroupBy::computeGroupByForJoinWithFullScan() const { // without the internally added triples, but that is not easy to // retrieve right now. currentCardinality = - index.getCardinality(id, permutation, deltaTriples()); + index.getCardinality(id, permutation, locatedTriplesSnapshot()); } currentCount += currentCardinality; } diff --git a/src/engine/HasPredicateScan.cpp b/src/engine/HasPredicateScan.cpp index 5c494ab13c..b01ede635b 100644 --- a/src/engine/HasPredicateScan.cpp +++ b/src/engine/HasPredicateScan.cpp @@ -267,9 +267,10 @@ ProtoResult HasPredicateScan::computeResult( TripleComponent::Iri::fromIriref(HAS_PATTERN_PREDICATE), std::nullopt, std::nullopt} .toScanSpecification(index); - auto hasPattern = index.getPermutation(Permutation::Enum::PSO) - .lazyScan(scanSpec, std::nullopt, {}, - cancellationHandle_, deltaTriples()); + auto hasPattern = + index.getPermutation(Permutation::Enum::PSO) + .lazyScan(scanSpec, std::nullopt, {}, cancellationHandle_, + locatedTriplesSnapshot()); auto getId = [this](const TripleComponent tc) { std::optional id = tc.toValueId(getIndex().getVocab()); @@ -339,9 +340,9 @@ void HasPredicateScan::computeFreeO( TripleComponent::Iri::fromIriref(HAS_PATTERN_PREDICATE), subjectAsId, std::nullopt} .toScanSpecification(index); - auto hasPattern = - index.getPermutation(Permutation::Enum::PSO) - .scan(std::move(scanSpec), {}, cancellationHandle_, deltaTriples()); + auto hasPattern = index.getPermutation(Permutation::Enum::PSO) + .scan(std::move(scanSpec), {}, cancellationHandle_, + locatedTriplesSnapshot()); AD_CORRECTNESS_CHECK(hasPattern.numRows() <= 1); for (Id patternId : hasPattern.getColumn(0)) { const auto& pattern = patterns[patternId.getInt()]; diff --git a/src/engine/IndexScan.cpp b/src/engine/IndexScan.cpp index 9217e91ac0..dc6781caef 100644 --- a/src/engine/IndexScan.cpp +++ b/src/engine/IndexScan.cpp @@ -162,7 +162,7 @@ ProtoResult IndexScan::computeResult(bool requestLaziness) { const auto& index = _executionContext->getIndex(); idTable = index.scan(getScanSpecification(), permutation_, additionalColumns(), - cancellationHandle_, deltaTriples(), getLimit()); + cancellationHandle_, locatedTriplesSnapshot(), getLimit()); AD_CORRECTNESS_CHECK(idTable.numColumns() == getResultWidth()); LOG(DEBUG) << "IndexScan result computation done.\n"; checkCancellation(); @@ -174,7 +174,7 @@ ProtoResult IndexScan::computeResult(bool requestLaziness) { size_t IndexScan::computeSizeEstimate() const { AD_CORRECTNESS_CHECK(_executionContext); return getIndex().getResultSizeOfScan(getScanSpecification(), permutation_, - deltaTriples()); + locatedTriplesSnapshot()); } // _____________________________________________________________________________ @@ -195,7 +195,7 @@ void IndexScan::determineMultiplicities() { return {1.0f}; } else if (numVariables_ == 2) { return idx.getMultiplicities(*getPermutedTriple()[0], permutation_, - deltaTriples()); + locatedTriplesSnapshot()); } else { AD_CORRECTNESS_CHECK(numVariables_ == 3); return idx.getMultiplicities(permutation_); @@ -245,8 +245,8 @@ Permutation::IdTableGenerator IndexScan::getLazyScan( .getImpl() .getPermutation(permutation()) .lazyScan(getScanSpecification(), std::move(actualBlocks), - additionalColumns(), cancellationHandle_, deltaTriples(), - getLimit()); + additionalColumns(), cancellationHandle_, + locatedTriplesSnapshot(), getLimit()); }; // ________________________________________________________________ @@ -254,7 +254,7 @@ std::optional IndexScan::getMetadataForScan() const { const auto& index = getExecutionContext()->getIndex().getImpl(); return index.getPermutation(permutation()) - .getMetadataAndBlocks(getScanSpecification(), deltaTriples()); + .getMetadataAndBlocks(getScanSpecification(), locatedTriplesSnapshot()); }; // ________________________________________________________________ diff --git a/src/engine/Operation.h b/src/engine/Operation.h index 61702c7766..6f95633b33 100644 --- a/src/engine/Operation.h +++ b/src/engine/Operation.h @@ -69,8 +69,8 @@ class Operation { const Index& getIndex() const { return _executionContext->getIndex(); } - const DeltaTriples& deltaTriples() const { - return _executionContext->deltaTriples(); + const auto& locatedTriplesSnapshot() const { + return _executionContext->locatedTriplesSnapshot(); } // Get a unique, not ambiguous string representation for a subtree. diff --git a/src/engine/QueryExecutionContext.h b/src/engine/QueryExecutionContext.h index 70657da59b..de7b5a4f6e 100644 --- a/src/engine/QueryExecutionContext.h +++ b/src/engine/QueryExecutionContext.h @@ -1,8 +1,7 @@ -// Copyright 2011, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: -// 2011-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) -// 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) +// Copyright 2011 - 2024, University of Freiburg +// Chair of Algorithms and Data Structures +// Authors: Björn Buchhold [2011 - 2017] +// Johannes Kalmbach [2017 - 2024] #pragma once @@ -92,7 +91,10 @@ class QueryExecutionContext { [[nodiscard]] const Index& getIndex() const { return _index; } - const DeltaTriples& deltaTriples() const { return *deltaTriples_; } + const LocatedTriplesSnapshot& locatedTriplesSnapshot() const { + AD_CORRECTNESS_CHECK(sharedLocatedTriplesSnapshot_ != nullptr); + return *sharedLocatedTriplesSnapshot_; + } void clearCacheUnpinnedOnly() { getQueryTreeCache().clearUnpinnedOnly(); } @@ -123,10 +125,13 @@ class QueryExecutionContext { private: const Index& _index; - // TODO This has to be stored externally once we properly support - // SPARQL UPDATE, currently it is just a stub to make the interface work. - std::shared_ptr deltaTriples_{ - std::make_shared(_index)}; + + // When the `QueryExecutionContext` is constructed, get a stable read-only + // snapshot of the current (located) delta triples. These can then be used + // by the respective query without interfering with further incoming + // update operations. + SharedLocatedTriplesSnapshot sharedLocatedTriplesSnapshot_{ + _index.deltaTriplesManager().getCurrentSnapshot()}; QueryResultCache* const _subtreeCache; // allocators are copied but hold shared state ad_utility::AllocatorWithLimit _allocator; diff --git a/src/engine/QueryExecutionTree.cpp b/src/engine/QueryExecutionTree.cpp index aed58d4fde..2b2c393928 100644 --- a/src/engine/QueryExecutionTree.cpp +++ b/src/engine/QueryExecutionTree.cpp @@ -1,8 +1,7 @@ -// Copyright 2015, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: -// 2015-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) -// 2018- Johannes Kalmbach (kalmbach@informatik.uni-freiburg.de) +// Copyright 2015 - 2024, University of Freiburg +// Chair of Algorithms and Data Structures +// Authors: Björn Buchhold [2015 - 2017] +// Johannes Kalmbach [2017 - 2024] #include "./QueryExecutionTree.h" diff --git a/src/engine/QueryExecutionTree.h b/src/engine/QueryExecutionTree.h index 0519082b78..6a4b63c712 100644 --- a/src/engine/QueryExecutionTree.h +++ b/src/engine/QueryExecutionTree.h @@ -1,6 +1,8 @@ -// Copyright 2015, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Björn Buchhold (buchhold@informatik.uni-freiburg.de) +// Copyright 2015 - 2024, University of Freiburg +// Chair of Algorithms and Data Structures +// Authors: Björn Buchhold +// Johannes Kalmbach + #pragma once #include diff --git a/src/index/DeltaTriples.cpp b/src/index/DeltaTriples.cpp index a07d342ec6..0d2ac3bac9 100644 --- a/src/index/DeltaTriples.cpp +++ b/src/index/DeltaTriples.cpp @@ -1,8 +1,8 @@ // Copyright 2023 - 2024, University of Freiburg -// Chair of Algorithms and Data Structures. -// Authors: -// 2023 Hannah Bast -// 2024 Julian Mundhahs +// Chair of Algorithms and Data Structures +// Authors: Hannah Bast +// Julian Mundhahs +// Johannes Kalmbach #include "index/DeltaTriples.h" @@ -21,8 +21,7 @@ LocatedTriples::iterator& DeltaTriples::LocatedTripleHandles::forPermutation( void DeltaTriples::clear() { triplesInserted_.clear(); triplesDeleted_.clear(); - std::ranges::for_each(locatedTriplesPerBlock_, - &LocatedTriplesPerBlock::clear); + std::ranges::for_each(locatedTriples(), &LocatedTriplesPerBlock::clear); } // ____________________________________________________________________________ @@ -33,7 +32,7 @@ DeltaTriples::locateAndAddTriples(CancellationHandle cancellationHandle, std::array, Permutation::ALL.size()> intermediateHandles; for (auto permutation : Permutation::ALL) { - auto& perm = index_.getImpl().getPermutation(permutation); + auto& perm = index_.getPermutation(permutation); auto locatedTriples = LocatedTriple::locateTriplesInPermutation( // TODO: replace with `getAugmentedMetadata` once integration // is done @@ -41,7 +40,7 @@ DeltaTriples::locateAndAddTriples(CancellationHandle cancellationHandle, cancellationHandle); cancellationHandle->throwIfCancelled(); intermediateHandles[static_cast(permutation)] = - locatedTriplesPerBlock_[static_cast(permutation)].add( + this->locatedTriples()[static_cast(permutation)].add( locatedTriples); cancellationHandle->throwIfCancelled(); } @@ -60,8 +59,8 @@ void DeltaTriples::eraseTripleInAllPermutations(LocatedTripleHandles& handles) { // Erase for all permutations. for (auto permutation : Permutation::ALL) { auto ltIter = handles.forPermutation(permutation); - locatedTriplesPerBlock_[static_cast(permutation)].erase( - ltIter->blockIndex_, ltIter); + locatedTriples()[static_cast(permutation)].erase(ltIter->blockIndex_, + ltIter); } } @@ -172,7 +171,48 @@ void DeltaTriples::modifyTriplesImpl(CancellationHandle cancellationHandle, } // ____________________________________________________________________________ -const LocatedTriplesPerBlock& DeltaTriples::getLocatedTriplesPerBlock( +const LocatedTriplesPerBlock& +LocatedTriplesSnapshot::getLocatedTriplesForPermutation( Permutation::Enum permutation) const { return locatedTriplesPerBlock_[static_cast(permutation)]; } + +// ____________________________________________________________________________ +SharedLocatedTriplesSnapshot DeltaTriples::getSnapshot() const { + // NOTE: Both members of the `LocatedTriplesSnapshot` are copied, but the + // `localVocab_` has no copy constructor (in order to avoid accidental + // copies), hence the explicit `clone`. + return SharedLocatedTriplesSnapshot{std::make_shared( + locatedTriples(), localVocab_.clone())}; +} + +// ____________________________________________________________________________ +DeltaTriples::DeltaTriples(const Index& index) + : DeltaTriples(index.getImpl()) {} + +// ____________________________________________________________________________ +DeltaTriplesManager::DeltaTriplesManager(const IndexImpl& index) + : deltaTriples_{index}, + currentLocatedTriplesSnapshot_{deltaTriples_.rlock()->getSnapshot()} {} + +// _____________________________________________________________________________ +void DeltaTriplesManager::modify( + const std::function& function) { + // While holding the lock for the underlying `DeltaTriples`, perform the + // actual `function` (typically some combination of insert and delete + // operations) and (while still holding the lock) update the + // `currentLocatedTriplesSnapshot_`. + deltaTriples_.withWriteLock([this, &function](DeltaTriples& deltaTriples) { + function(deltaTriples); + auto newSnapshot = deltaTriples.getSnapshot(); + currentLocatedTriplesSnapshot_.withWriteLock( + [&newSnapshot](auto& currentSnapshot) { + currentSnapshot = std::move(newSnapshot); + }); + }); +} + +// _____________________________________________________________________________ +SharedLocatedTriplesSnapshot DeltaTriplesManager::getCurrentSnapshot() const { + return *currentLocatedTriplesSnapshot_.rlock(); +} diff --git a/src/index/DeltaTriples.h b/src/index/DeltaTriples.h index 05342a845b..afe13c7c07 100644 --- a/src/index/DeltaTriples.h +++ b/src/index/DeltaTriples.h @@ -1,8 +1,8 @@ // Copyright 2023 - 2024, University of Freiburg -// Chair of Algorithms and Data Structures. -// Authors: -// 2023 Hannah Bast -// 2024 Julian Mundhahs +// Chair of Algorithms and Data Structures +// Authors: Hannah Bast +// Julian Mundhahs +// Johannes Kalmbach #pragma once @@ -12,6 +12,29 @@ #include "index/IndexBuilderTypes.h" #include "index/LocatedTriples.h" #include "index/Permutation.h" +#include "util/Synchronized.h" + +// Typedef for one `LocatedTriplesPerBlock` object for each of the six +// permutations. +using LocatedTriplesPerBlockAllPermutations = + std::array; + +// The locations of a set of delta triples (triples that were inserted or +// deleted since the index was built) in each of the six permutations, and a +// local vocab. This is all the information that is required to perform a query +// that correctly respects these delta triples, hence the name. +struct LocatedTriplesSnapshot { + LocatedTriplesPerBlockAllPermutations locatedTriplesPerBlock_; + LocalVocab localVocab_; + // Get `TripleWithPosition` objects for given permutation. + const LocatedTriplesPerBlock& getLocatedTriplesForPermutation( + Permutation::Enum permutation) const; +}; + +// A shared pointer to a constant `LocatedTriplesSnapshot`, but as an explicit +// class, such that it can be forward-declared. +class SharedLocatedTriplesSnapshot + : public std::shared_ptr {}; // A class for maintaining triples that are inserted or deleted after index // building, we call these delta triples. How it works in principle: @@ -33,9 +56,16 @@ class DeltaTriples { FRIEND_TEST(DeltaTriplesTest, clear); FRIEND_TEST(DeltaTriplesTest, addTriplesToLocalVocab); + public: + using Triples = std::vector>; + using CancellationHandle = ad_utility::SharedCancellationHandle; + private: // The index to which these triples are added. - const Index& index_; + const IndexImpl& index_; + + // The located triples for all the 6 permutations. + LocatedTriplesPerBlockAllPermutations locatedTriples_; // The local vocabulary of the delta triples (they may have components, // which are not contained in the vocabulary of the original index). @@ -52,10 +82,6 @@ class DeltaTriples { static_assert(static_cast(Permutation::Enum::OSP) == 5); static_assert(Permutation::ALL.size() == 6); - // The positions of the delta triples in each of the six permutations. - std::array - locatedTriplesPerBlock_; - // Each delta triple needs to know where it is stored in each of the six // `LocatedTriplesPerBlock` above. struct LocatedTripleHandles { @@ -66,8 +92,6 @@ class DeltaTriples { }; using TriplesToHandlesMap = ad_utility::HashMap, LocatedTripleHandles>; - using Triples = std::vector>; - using CancellationHandle = ad_utility::SharedCancellationHandle; // The sets of triples added to and subtracted from the original index. Any // triple can be at most in one of the sets. The information whether a triple @@ -78,15 +102,26 @@ class DeltaTriples { public: // Construct for given index. - explicit DeltaTriples(const Index& index) : index_(index) {} + explicit DeltaTriples(const Index& index); + explicit DeltaTriples(const IndexImpl& index) : index_{index} {}; + + DeltaTriples(const DeltaTriples&) = delete; + DeltaTriples& operator=(const DeltaTriples&) = delete; // Get the common `LocalVocab` of the delta triples. private: LocalVocab& localVocab() { return localVocab_; } + auto& locatedTriples() { return locatedTriples_; } + const auto& locatedTriples() const { return locatedTriples_; } public: const LocalVocab& localVocab() const { return localVocab_; } + const LocatedTriplesPerBlock& getLocatedTriplesForPermutation( + Permutation::Enum permutation) const { + return locatedTriples_.at(static_cast(permutation)); + } + // Clear `triplesAdded_` and `triplesSubtracted_` and all associated data // structures. void clear(); @@ -101,9 +136,10 @@ class DeltaTriples { // Delete triples. void deleteTriples(CancellationHandle cancellationHandle, Triples triples); - // Get `TripleWithPosition` objects for given permutation. - const LocatedTriplesPerBlock& getLocatedTriplesPerBlock( - Permutation::Enum permutation) const; + // Return a deep copy of the `LocatedTriples` and the corresponding + // `LocalVocab` which form a snapshot of the current status of this + // `DeltaTriples` object. + SharedLocatedTriplesSnapshot getSnapshot() const; private: // Find the position of the given triple in the given permutation and add it @@ -144,7 +180,27 @@ class DeltaTriples { void eraseTripleInAllPermutations(LocatedTripleHandles& handles); }; -// DELTA TRIPLES AND THE CACHE -// -// Changes to the DeltaTriples invalidate all cache results that have an index -// scan in their subtree, which is almost all entries in practice. +// This class synchronizes the access to a `DeltaTriples` object, thus avoiding +// race conditions between concurrent updates and queries. +class DeltaTriplesManager { + ad_utility::Synchronized deltaTriples_; + ad_utility::Synchronized + currentLocatedTriplesSnapshot_; + + public: + using CancellationHandle = DeltaTriples::CancellationHandle; + using Triples = DeltaTriples::Triples; + + explicit DeltaTriplesManager(const IndexImpl& index); + FRIEND_TEST(DeltaTriplesTest, DeltaTriplesManager); + + // Modify the underlying `DeltaTriples` by applying `function` and then update + // the current snapshot. Concurrent calls to `modify` will be serialized, and + // each call to `getCurrentSnapshot` will either return the snapshot before or + // after a modification, but never one of an ongoing modification. + void modify(const std::function& function); + + // Return a shared pointer to a deep copy of the current snapshot. This can + // be safely used to execute a query without interfering with future updates. + SharedLocatedTriplesSnapshot getCurrentSnapshot() const; +}; diff --git a/src/index/Index.cpp b/src/index/Index.cpp index 86af95a798..47fcad9c82 100644 --- a/src/index/Index.cpp +++ b/src/index/Index.cpp @@ -57,15 +57,17 @@ ad_utility::BlankNodeManager* Index::getBlankNodeManager() const { } // ____________________________________________________________________________ -size_t Index::getCardinality(const TripleComponent& comp, Permutation::Enum p, - const DeltaTriples& deltaTriples) const { - return pimpl_->getCardinality(comp, p, deltaTriples); +size_t Index::getCardinality( + const TripleComponent& comp, Permutation::Enum p, + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const { + return pimpl_->getCardinality(comp, p, locatedTriplesSnapshot); } // ____________________________________________________________________________ -size_t Index::getCardinality(Id id, Permutation::Enum p, - const DeltaTriples& deltaTriples) const { - return pimpl_->getCardinality(id, p, deltaTriples); +size_t Index::getCardinality( + Id id, Permutation::Enum p, + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const { + return pimpl_->getCardinality(id, p, locatedTriplesSnapshot); } // ____________________________________________________________________________ @@ -254,10 +256,10 @@ vector Index::getMultiplicities(Permutation::Enum p) const { } // ____________________________________________________________________________ -vector Index::getMultiplicities(const TripleComponent& key, - Permutation::Enum p, - const DeltaTriples& deltaTriples) const { - return pimpl_->getMultiplicities(key, p, deltaTriples); +vector Index::getMultiplicities( + const TripleComponent& key, Permutation::Enum p, + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const { + return pimpl_->getMultiplicities(key, p, locatedTriplesSnapshot); } // ____________________________________________________________________________ @@ -265,10 +267,10 @@ IdTable Index::scan( const ScanSpecificationAsTripleComponent& scanSpecification, Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns, const ad_utility::SharedCancellationHandle& cancellationHandle, - const DeltaTriples& deltaTriples, + const LocatedTriplesSnapshot& locatedTriplesSnapshot, const LimitOffsetClause& limitOffset) const { return pimpl_->scan(scanSpecification, p, additionalColumns, - cancellationHandle, deltaTriples, limitOffset); + cancellationHandle, locatedTriplesSnapshot, limitOffset); } // ____________________________________________________________________________ @@ -276,21 +278,32 @@ IdTable Index::scan( const ScanSpecification& scanSpecification, Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns, const ad_utility::SharedCancellationHandle& cancellationHandle, - const DeltaTriples& deltaTriples, + const LocatedTriplesSnapshot& locatedTriplesSnapshot, const LimitOffsetClause& limitOffset) const { return pimpl_->scan(scanSpecification, p, additionalColumns, - cancellationHandle, deltaTriples, limitOffset); + cancellationHandle, locatedTriplesSnapshot, limitOffset); } // ____________________________________________________________________________ -size_t Index::getResultSizeOfScan(const ScanSpecification& scanSpecification, - const Permutation::Enum& permutation, - const DeltaTriples& deltaTriples) const { +size_t Index::getResultSizeOfScan( + const ScanSpecification& scanSpecification, + const Permutation::Enum& permutation, + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const { return pimpl_->getResultSizeOfScan(scanSpecification, permutation, - deltaTriples); + locatedTriplesSnapshot); } // ____________________________________________________________________________ void Index::createFromFiles(const std::vector& files) { return pimpl_->createFromFiles(files); } + +// ____________________________________________________________________________ +const DeltaTriplesManager& Index::deltaTriplesManager() const { + return pimpl_->deltaTriplesManager(); +} + +// ____________________________________________________________________________ +DeltaTriplesManager& Index::deltaTriplesManager() { + return pimpl_->deltaTriplesManager(); +} diff --git a/src/index/Index.h b/src/index/Index.h index d42c5ced42..ec408f15df 100644 --- a/src/index/Index.h +++ b/src/index/Index.h @@ -1,4 +1,3 @@ -// Copyright 2015, University of Freiburg, // Chair of Algorithms and Data Structures. // Author: // 2014-2017 Björn Buchhold (buchhold@informatik.uni-freiburg.de) @@ -23,7 +22,8 @@ class IdTable; class TextBlockMetaData; class IndexImpl; -class DeltaTriples; +struct LocatedTriplesSnapshot; +class DeltaTriplesManager; class Index { private: @@ -116,14 +116,19 @@ class Index { // Get a (non-owning) pointer to the BlankNodeManager of this Index. ad_utility::BlankNodeManager* getBlankNodeManager() const; + // Get a (non-owning) pointer to the BlankNodeManager of this Index. + DeltaTriplesManager& deltaTriplesManager(); + const DeltaTriplesManager& deltaTriplesManager() const; + // -------------------------------------------------------------------------- // RDF RETRIEVAL // -------------------------------------------------------------------------- - [[nodiscard]] size_t getCardinality(const TripleComponent& comp, - Permutation::Enum permutation, - const DeltaTriples& deltaTriples) const; - [[nodiscard]] size_t getCardinality(Id id, Permutation::Enum permutation, - const DeltaTriples& deltaTriples) const; + [[nodiscard]] size_t getCardinality( + const TripleComponent& comp, Permutation::Enum permutation, + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const; + [[nodiscard]] size_t getCardinality( + Id id, Permutation::Enum permutation, + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const; // TODO Once we have an overview over the folding this logic should // probably not be in the index class. @@ -217,9 +222,9 @@ class Index { bool hasAllPermutations() const; // _____________________________________________________________________________ - vector getMultiplicities(const TripleComponent& key, - Permutation::Enum permutation, - const DeltaTriples& deltaTriples) const; + vector getMultiplicities( + const TripleComponent& key, Permutation::Enum permutation, + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const; // ___________________________________________________________________ vector getMultiplicities(Permutation::Enum p) const; @@ -243,21 +248,22 @@ class Index { Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns, const ad_utility::SharedCancellationHandle& cancellationHandle, - const DeltaTriples& deltaTriples, + const LocatedTriplesSnapshot& locatedTriplesSnapshot, const LimitOffsetClause& limitOffset = {}) const; // Similar to the overload of `scan` above, but the keys are specified as IDs. IdTable scan(const ScanSpecification& scanSpecification, Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns, const ad_utility::SharedCancellationHandle& cancellationHandle, - const DeltaTriples& deltaTriples, + const LocatedTriplesSnapshot& locatedTriplesSnapshot, const LimitOffsetClause& limitOffset = {}) const; // Similar to the previous overload of `scan`, but only get the exact size of // the scan result. - size_t getResultSizeOfScan(const ScanSpecification& scanSpecification, - const Permutation::Enum& permutation, - const DeltaTriples& deltaTriples) const; + size_t getResultSizeOfScan( + const ScanSpecification& scanSpecification, + const Permutation::Enum& permutation, + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const; // Get access to the implementation. This should be used rarely as it // requires including the rather expensive `IndexImpl.h` header diff --git a/src/index/IndexImpl.cpp b/src/index/IndexImpl.cpp index 7f5e479f59..1e69d9676b 100644 --- a/src/index/IndexImpl.cpp +++ b/src/index/IndexImpl.cpp @@ -44,6 +44,7 @@ static constexpr size_t NUM_EXTERNAL_SORTERS_AT_SAME_TIME = 2u; IndexImpl::IndexImpl(ad_utility::AllocatorWithLimit allocator) : allocator_{std::move(allocator)} { globalSingletonIndex_ = this; + deltaTriples_.emplace(*this); }; // _____________________________________________________________________________ @@ -1445,10 +1446,11 @@ Index::NumNormalAndInternal IndexImpl::numDistinctCol0( } // ___________________________________________________________________________ -size_t IndexImpl::getCardinality(Id id, Permutation::Enum permutation, - const DeltaTriples& deltaTriples) const { +size_t IndexImpl::getCardinality( + Id id, Permutation::Enum permutation, + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const { if (const auto& meta = - getPermutation(permutation).getMetadata(id, deltaTriples); + getPermutation(permutation).getMetadata(id, locatedTriplesSnapshot); meta.has_value()) { return meta.value().numRows_; } @@ -1456,9 +1458,9 @@ size_t IndexImpl::getCardinality(Id id, Permutation::Enum permutation, } // ___________________________________________________________________________ -size_t IndexImpl::getCardinality(const TripleComponent& comp, - Permutation::Enum permutation, - const DeltaTriples& deltaTriples) const { +size_t IndexImpl::getCardinality( + const TripleComponent& comp, Permutation::Enum permutation, + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const { // TODO This special case is only relevant for the `PSO` and `POS` // permutations, but this internal predicate should never appear in subjects // or objects anyway. @@ -1468,7 +1470,7 @@ size_t IndexImpl::getCardinality(const TripleComponent& comp, return TEXT_PREDICATE_CARDINALITY_ESTIMATE; } if (std::optional relId = comp.toValueId(getVocab()); relId.has_value()) { - return getCardinality(relId.value(), permutation, deltaTriples); + return getCardinality(relId.value(), permutation, locatedTriplesSnapshot); } return 0; } @@ -1491,10 +1493,10 @@ Index::Vocab::PrefixRanges IndexImpl::prefixRanges( // _____________________________________________________________________________ vector IndexImpl::getMultiplicities( const TripleComponent& key, Permutation::Enum permutation, - const DeltaTriples& deltaTriples) const { + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const { if (auto keyId = key.toValueId(getVocab()); keyId.has_value()) { - auto meta = - getPermutation(permutation).getMetadata(keyId.value(), deltaTriples); + auto meta = getPermutation(permutation) + .getMetadata(keyId.value(), locatedTriplesSnapshot); if (meta.has_value()) { return {meta.value().getCol1Multiplicity(), meta.value().getCol2Multiplicity()}; @@ -1520,30 +1522,31 @@ IdTable IndexImpl::scan( const Permutation::Enum& permutation, Permutation::ColumnIndicesRef additionalColumns, const ad_utility::SharedCancellationHandle& cancellationHandle, - const DeltaTriples& deltaTriples, + const LocatedTriplesSnapshot& locatedTriplesSnapshot, const LimitOffsetClause& limitOffset) const { auto scanSpecification = scanSpecificationAsTc.toScanSpecification(*this); return scan(scanSpecification, permutation, additionalColumns, - cancellationHandle, deltaTriples, limitOffset); + cancellationHandle, locatedTriplesSnapshot, limitOffset); } // _____________________________________________________________________________ IdTable IndexImpl::scan( const ScanSpecification& scanSpecification, Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns, const ad_utility::SharedCancellationHandle& cancellationHandle, - const DeltaTriples& deltaTriples, + const LocatedTriplesSnapshot& locatedTriplesSnapshot, const LimitOffsetClause& limitOffset) const { return getPermutation(p).scan(scanSpecification, additionalColumns, - cancellationHandle, deltaTriples, limitOffset); + cancellationHandle, locatedTriplesSnapshot, + limitOffset); } // _____________________________________________________________________________ size_t IndexImpl::getResultSizeOfScan( const ScanSpecification& scanSpecification, const Permutation::Enum& permutation, - const DeltaTriples& deltaTriples) const { + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const { return getPermutation(permutation) - .getResultSizeOfScan(scanSpecification, deltaTriples); + .getResultSizeOfScan(scanSpecification, locatedTriplesSnapshot); } // _____________________________________________________________________________ diff --git a/src/index/IndexImpl.h b/src/index/IndexImpl.h index d62f4a7e13..0d5b396ccc 100644 --- a/src/index/IndexImpl.h +++ b/src/index/IndexImpl.h @@ -18,6 +18,7 @@ #include "global/SpecialIds.h" #include "index/CompressedRelation.h" #include "index/ConstantsIndexBuilding.h" +#include "index/DeltaTriples.h" #include "index/DocsDB.h" #include "index/Index.h" #include "index/IndexBuilderTypes.h" @@ -188,6 +189,8 @@ class IndexImpl { // BlankNodeManager, initialized during `readConfiguration` std::unique_ptr blankNodeManager_{nullptr}; + std::optional deltaTriples_; + public: explicit IndexImpl(ad_utility::AllocatorWithLimit allocator); @@ -261,6 +264,11 @@ class IndexImpl { ad_utility::BlankNodeManager* getBlankNodeManager() const; + DeltaTriplesManager& deltaTriplesManager() { return deltaTriples_.value(); } + const DeltaTriplesManager& deltaTriplesManager() const { + return deltaTriples_.value(); + } + // -------------------------------------------------------------------------- // -- RETRIEVAL --- // -------------------------------------------------------------------------- @@ -283,12 +291,12 @@ class IndexImpl { // ___________________________________________________________________________ size_t getCardinality(Id id, Permutation::Enum permutation, - const DeltaTriples&) const; + const LocatedTriplesSnapshot&) const; // ___________________________________________________________________________ - size_t getCardinality(const TripleComponent& comp, - Permutation::Enum permutation, - const DeltaTriples& deltaTriples) const; + size_t getCardinality( + const TripleComponent& comp, Permutation::Enum permutation, + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const; // ___________________________________________________________________________ std::string indexToString(VocabIndex id) const; @@ -420,9 +428,9 @@ class IndexImpl { bool hasAllPermutations() const { return SPO().isLoaded(); } // _____________________________________________________________________________ - vector getMultiplicities(const TripleComponent& key, - Permutation::Enum permutation, - const DeltaTriples&) const; + vector getMultiplicities( + const TripleComponent& key, Permutation::Enum permutation, + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const; // ___________________________________________________________________ vector getMultiplicities(Permutation::Enum permutation) const; @@ -432,20 +440,21 @@ class IndexImpl { const Permutation::Enum& permutation, Permutation::ColumnIndicesRef additionalColumns, const ad_utility::SharedCancellationHandle& cancellationHandle, - const DeltaTriples& deltaTriples, + const LocatedTriplesSnapshot& locatedTriplesSnapshot, const LimitOffsetClause& limitOffset = {}) const; // _____________________________________________________________________________ IdTable scan(const ScanSpecification& scanSpecification, Permutation::Enum p, Permutation::ColumnIndicesRef additionalColumns, const ad_utility::SharedCancellationHandle& cancellationHandle, - const DeltaTriples& deltaTriples, + const LocatedTriplesSnapshot& locatedTriplesSnapshot, const LimitOffsetClause& limitOffset = {}) const; // _____________________________________________________________________________ - size_t getResultSizeOfScan(const ScanSpecification& scanSpecification, - const Permutation::Enum& permutation, - const DeltaTriples& deltaTriples) const; + size_t getResultSizeOfScan( + const ScanSpecification& scanSpecification, + const Permutation::Enum& permutation, + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const; private: // Private member functions diff --git a/src/index/LocatedTriples.cpp b/src/index/LocatedTriples.cpp index 9bcad6838b..e898327a9a 100644 --- a/src/index/LocatedTriples.cpp +++ b/src/index/LocatedTriples.cpp @@ -283,3 +283,19 @@ std::ostream& operator<<(std::ostream& os, const std::vector>& v) { std::ranges::copy(v, std::ostream_iterator>(os, ", ")); return os; } + +// ____________________________________________________________________________ +bool LocatedTriplesPerBlock::containsTriple(const IdTriple<0>& triple, + bool shouldExist) const { + auto blockContains = [&triple, shouldExist](const LocatedTriples& lt, + size_t blockIndex) { + LocatedTriple locatedTriple{blockIndex, triple, shouldExist}; + locatedTriple.blockIndex_ = blockIndex; + return ad_utility::contains(lt, locatedTriple); + }; + + return std::ranges::any_of(map_, [&blockContains](auto& indexAndBlock) { + const auto& [index, block] = indexAndBlock; + return blockContains(block, index); + }); +} diff --git a/src/index/LocatedTriples.h b/src/index/LocatedTriples.h index c9d82d6745..c1b612a775 100644 --- a/src/index/LocatedTriples.h +++ b/src/index/LocatedTriples.h @@ -127,14 +127,14 @@ class LocatedTriplesPerBlock { IdTable mergeTriples(size_t blockIndex, const IdTable& block, size_t numIndexColumns, bool includeGraphColumn) const; - // Add `locatedTriples` to the `LocatedTriplesPerBlock`. + // Add `getLocatedTriplesForPermutation` to the `LocatedTriplesPerBlock`. // Return handles to where they were added (`LocatedTriples` is a sorted set, // see above). We need the handles so that we can easily remove the - // `locatedTriples` from the set again in case we need to. + // `getLocatedTriplesForPermutation` from the set again in case we need to. // // PRECONDITIONS: // - // 1. The `locatedTriples` must not already exist in + // 1. The `getLocatedTriplesForPermutation` must not already exist in // `LocatedTriplesPerBlock`. std::vector add( std::span locatedTriples); @@ -167,6 +167,10 @@ class LocatedTriplesPerBlock { augmentedMetadata_ = originalMetadata_; } + // Only used for testing. Return `true` iff a `LocatedTriple` with the given + // value for `shouldExist` is contained in any block. + bool containsTriple(const IdTriple<0>& triple, bool shouldExist) const; + // This operator is only for debugging and testing. It returns a // human-readable representation. friend std::ostream& operator<<(std::ostream& os, diff --git a/src/index/Permutation.cpp b/src/index/Permutation.cpp index 16f5113d68..cbe4b5dd1f 100644 --- a/src/index/Permutation.cpp +++ b/src/index/Permutation.cpp @@ -55,7 +55,7 @@ void Permutation::loadFromDisk(const std::string& onDiskBase, IdTable Permutation::scan(const ScanSpecification& scanSpec, ColumnIndicesRef additionalColumns, const CancellationHandle& cancellationHandle, - const DeltaTriples& deltaTriples, + const LocatedTriplesSnapshot& locatedTriplesSnapshot, const LimitOffsetClause& limitOffset) const { if (!isLoaded_) { throw std::runtime_error("This query requires the permutation " + @@ -64,35 +64,38 @@ IdTable Permutation::scan(const ScanSpecification& scanSpec, const auto& p = getActualPermutation(scanSpec); - return p.reader().scan(scanSpec, p.meta_.blockData(), additionalColumns, - cancellationHandle, locatedTriples(deltaTriples), - limitOffset); + return p.reader().scan( + scanSpec, p.meta_.blockData(), additionalColumns, cancellationHandle, + getLocatedTriplesForPermutation(locatedTriplesSnapshot), limitOffset); } // _____________________________________________________________________ size_t Permutation::getResultSizeOfScan( - const ScanSpecification& scanSpec, const DeltaTriples& deltaTriples) const { + const ScanSpecification& scanSpec, + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const { const auto& p = getActualPermutation(scanSpec); - return p.reader().getResultSizeOfScan(scanSpec, p.meta_.blockData(), - locatedTriples(deltaTriples)); + return p.reader().getResultSizeOfScan( + scanSpec, p.meta_.blockData(), + getLocatedTriplesForPermutation(locatedTriplesSnapshot)); } // ____________________________________________________________________________ IdTable Permutation::getDistinctCol1IdsAndCounts( Id col0Id, const CancellationHandle& cancellationHandle, - const DeltaTriples& deltaTriples) const { + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const { const auto& p = getActualPermutation(col0Id); - return p.reader().getDistinctCol1IdsAndCounts(col0Id, p.meta_.blockData(), - cancellationHandle, - locatedTriples(deltaTriples)); + return p.reader().getDistinctCol1IdsAndCounts( + col0Id, p.meta_.blockData(), cancellationHandle, + getLocatedTriplesForPermutation(locatedTriplesSnapshot)); } // ____________________________________________________________________________ IdTable Permutation::getDistinctCol0IdsAndCounts( const CancellationHandle& cancellationHandle, - const DeltaTriples& deltaTriples) const { + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const { return reader().getDistinctCol0IdsAndCounts( - meta_.blockData(), cancellationHandle, locatedTriples(deltaTriples)); + meta_.blockData(), cancellationHandle, + getLocatedTriplesForPermutation(locatedTriplesSnapshot)); } // _____________________________________________________________________ @@ -137,25 +140,27 @@ std::string_view Permutation::toString(Permutation::Enum permutation) { // _____________________________________________________________________ std::optional Permutation::getMetadata( - Id col0Id, const DeltaTriples& deltaTriples) const { + Id col0Id, const LocatedTriplesSnapshot& locatedTriplesSnapshot) const { const auto& p = getActualPermutation(col0Id); if (p.meta_.col0IdExists(col0Id)) { return p.meta_.getMetaData(col0Id); } - return p.reader().getMetadataForSmallRelation(p.meta_.blockData(), col0Id, - locatedTriples(deltaTriples)); + return p.reader().getMetadataForSmallRelation( + p.meta_.blockData(), col0Id, + getLocatedTriplesForPermutation(locatedTriplesSnapshot)); } // _____________________________________________________________________ std::optional Permutation::getMetadataAndBlocks( - const ScanSpecification& scanSpec, const DeltaTriples& deltaTriples) const { + const ScanSpecification& scanSpec, + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const { const auto& p = getActualPermutation(scanSpec); CompressedRelationReader::ScanSpecAndBlocks mb{ scanSpec, CompressedRelationReader::getRelevantBlocks( scanSpec, p.meta_.blockData())}; - auto firstAndLastTriple = - p.reader().getFirstAndLastTriple(mb, locatedTriples(deltaTriples)); + auto firstAndLastTriple = p.reader().getFirstAndLastTriple( + mb, getLocatedTriplesForPermutation(locatedTriplesSnapshot)); if (!firstAndLastTriple.has_value()) { return std::nullopt; } @@ -169,7 +174,7 @@ Permutation::IdTableGenerator Permutation::lazyScan( std::optional> blocks, ColumnIndicesRef additionalColumns, ad_utility::SharedCancellationHandle cancellationHandle, - const DeltaTriples& deltaTriples, + const LocatedTriplesSnapshot& locatedTriplesSnapshot, const LimitOffsetClause& limitOffset) const { const auto& p = getActualPermutation(scanSpec); if (!blocks.has_value()) { @@ -178,9 +183,10 @@ Permutation::IdTableGenerator Permutation::lazyScan( blocks = std::vector(blockSpan.begin(), blockSpan.end()); } ColumnIndices columns{additionalColumns.begin(), additionalColumns.end()}; - return p.reader().lazyScan(scanSpec, std::move(blocks.value()), - std::move(columns), std::move(cancellationHandle), - locatedTriples(deltaTriples), limitOffset); + return p.reader().lazyScan( + scanSpec, std::move(blocks.value()), std::move(columns), + std::move(cancellationHandle), + getLocatedTriplesForPermutation(locatedTriplesSnapshot), limitOffset); } // ______________________________________________________________________ @@ -210,7 +216,7 @@ const Permutation& Permutation::getActualPermutation(Id id) const { } // ______________________________________________________________________ -const LocatedTriplesPerBlock& Permutation::locatedTriples( - const DeltaTriples& deltaTriples) const { - return deltaTriples.getLocatedTriplesPerBlock(permutation_); +const LocatedTriplesPerBlock& Permutation::getLocatedTriplesForPermutation( + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const { + return locatedTriplesSnapshot.getLocatedTriplesForPermutation(permutation_); } diff --git a/src/index/Permutation.h b/src/index/Permutation.h index 93cad7e775..118153708b 100644 --- a/src/index/Permutation.h +++ b/src/index/Permutation.h @@ -1,6 +1,7 @@ -// Copyright 2018, University of Freiburg, -// Chair of Algorithms and Data Structures. -// Author: Johannes Kalmbach (johannes.kalmbach@gmail.com) +// Copyright 2018 - 2024, University of Freiburg +// Chair of Algorithms and Data Structures +// Author: Johannes Kalmbach + #pragma once #include @@ -18,7 +19,8 @@ class IdTable; // Forward declaration of `LocatedTriplesPerBlock` class LocatedTriplesPerBlock; -class DeltaTriples; +class SharedLocatedTriplesSnapshot; +struct LocatedTriplesSnapshot; // Helper class to store static properties of the different permutations to // avoid code duplication. The first template parameter is a search functor for @@ -66,7 +68,7 @@ class Permutation { IdTable scan(const ScanSpecification& scanSpec, ColumnIndicesRef additionalColumns, const CancellationHandle& cancellationHandle, - const DeltaTriples& deltaTriples, + const LocatedTriplesSnapshot& locatedTriplesSnapshot, const LimitOffsetClause& limitOffset = {}) const; // For a given relation, determine the `col1Id`s and their counts. This is @@ -74,11 +76,11 @@ class Permutation { // in `meta_`. IdTable getDistinctCol1IdsAndCounts( Id col0Id, const CancellationHandle& cancellationHandle, - const DeltaTriples& deltaTriples) const; + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const; IdTable getDistinctCol0IdsAndCounts( const CancellationHandle& cancellationHandle, - const DeltaTriples& deltaTriples) const; + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const; // Typedef to propagate the `MetadataAndblocks` and `IdTableGenerator` type. using MetadataAndBlocks = @@ -102,11 +104,11 @@ class Permutation { const ScanSpecification& scanSpec, std::optional> blocks, ColumnIndicesRef additionalColumns, CancellationHandle cancellationHandle, - const DeltaTriples& deltaTriples, + const LocatedTriplesSnapshot& locatedTriplesSnapshot, const LimitOffsetClause& limitOffset = {}) const; std::optional getMetadata( - Id col0Id, const DeltaTriples& deltaTriples) const; + Id col0Id, const LocatedTriplesSnapshot& locatedTriplesSnapshot) const; // Return the metadata for the scan specified by the `scanSpecification` // along with the metadata for all the blocks that are relevant for this scan. @@ -114,12 +116,13 @@ class Permutation { // empty) return `nullopt`. std::optional getMetadataAndBlocks( const ScanSpecification& scanSpec, - const DeltaTriples& deltaTriples) const; + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const; /// Similar to the previous `scan` function, but only get the size of the /// result - size_t getResultSizeOfScan(const ScanSpecification& scanSpec, - const DeltaTriples& deltaTriples) const; + size_t getResultSizeOfScan( + const ScanSpecification& scanSpec, + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const; // _______________________________________________________ void setKbName(const string& name) { meta_.setName(name); } @@ -146,19 +149,21 @@ class Permutation { const Permutation& getActualPermutation(const ScanSpecification& spec) const; const Permutation& getActualPermutation(Id id) const; - const LocatedTriplesPerBlock& locatedTriples(const DeltaTriples&) const; + // From the given snapshot, get the located triples for this permutation. + const LocatedTriplesPerBlock& getLocatedTriplesForPermutation( + const LocatedTriplesSnapshot& locatedTriplesSnapshot) const; const CompressedRelationReader& reader() const { return reader_.value(); } private: - // for Log output, e.g. "POS" + // Readable name for this permutation, e.g., `POS`. std::string readableName_; - // e.g. ".pos" + // File name suffix for this permutation, e.g., `.pos`. std::string fileSuffix_; - // order of the 3 keys S(0), P(1), and O(2) for which this permutation is - // sorted, for example {1, 0, 2} for PSO. + // The order of the three components (S=0, P=1, O=2) in this permutation, + // e.g., `{1, 0, 2}` for `PSO`. array keyOrder_; - + // The metadata for this permutation. MetaData meta_; // This member is `optional` because we initialize it in a deferred way in the diff --git a/test/DeltaTriplesTest.cpp b/test/DeltaTriplesTest.cpp index e9e858d727..cb481b43b8 100644 --- a/test/DeltaTriplesTest.cpp +++ b/test/DeltaTriplesTest.cpp @@ -319,3 +319,105 @@ TEST_F(DeltaTriplesTest, rewriteLocalVocabEntriesAndBlankNodes) { auto s4 = triples[0].ids_[0]; EXPECT_EQ(s4.getBits(), blank0.getBits()); } + +// _____________________________________________________________________________ +TEST_F(DeltaTriplesTest, DeltaTriplesManager) { + // Preparation. + DeltaTriplesManager deltaTriplesManager(testQec->getIndex().getImpl()); + auto& vocab = testQec->getIndex().getVocab(); + auto cancellationHandle = + std::make_shared>(); + std::vector threads; + static constexpr size_t numThreads = 18; + static constexpr size_t numIterations = 21; + + // Insert and delete a well-defined set of triples, some independent and some + // dependent on the thread index. Check that the snapshot before in the + // middle of these updates is as expected. + auto insertAndDelete = [&](size_t threadIdx) { + LocalVocab localVocab; + SharedLocatedTriplesSnapshot beforeUpdate = + deltaTriplesManager.getCurrentSnapshot(); + for (size_t i = 0; i < numIterations; ++i) { + // The first triple in both vectors is the same for all threads, the + // others are exclusive to this thread via the `threadIdx`. + auto triplesToInsert = makeIdTriples( + vocab, localVocab, + {" ", absl::StrCat(" "), + absl::StrCat(" ")}); + auto triplesToDelete = makeIdTriples( + vocab, localVocab, + {" ", absl::StrCat(" "), + absl::StrCat(" ")}); + // Insert the `triplesToInsert`. + deltaTriplesManager.modify([&](DeltaTriples& deltaTriples) { + deltaTriples.insertTriples(cancellationHandle, triplesToInsert); + }); + // We should have successfully completed an update, so the snapshot + // pointer should have changed. + EXPECT_NE(beforeUpdate, deltaTriplesManager.getCurrentSnapshot()); + // Delete the `triplesToDelete`. + deltaTriplesManager.modify([&](DeltaTriples& deltaTriples) { + deltaTriples.deleteTriples(cancellationHandle, triplesToDelete); + }); + + // Make some checks in the middle of these updates (while the other + // threads are likely to be in the middle of their updates as well). + if (i == numIterations / 2) { + { + // None of the thread-exclusive triples should be contained in the + // original snapshot and this should not change over time. The + // Boolean argument specifies whether the triple was inserted (`true`) + // or deleted (`false`). + const auto& locatedSPO = + beforeUpdate->getLocatedTriplesForPermutation(Permutation::SPO); + EXPECT_FALSE(locatedSPO.containsTriple(triplesToInsert.at(1), true)); + EXPECT_FALSE(locatedSPO.containsTriple(triplesToInsert.at(1), false)); + EXPECT_FALSE(locatedSPO.containsTriple(triplesToInsert.at(2), true)); + EXPECT_FALSE(locatedSPO.containsTriple(triplesToInsert.at(2), false)); + EXPECT_FALSE(locatedSPO.containsTriple(triplesToDelete.at(2), true)); + EXPECT_FALSE(locatedSPO.containsTriple(triplesToDelete.at(2), false)); + } + { + // Check for several of the thread-exclusive triples that they are + // properly contained in the current snapshot. + // + auto p = deltaTriplesManager.getCurrentSnapshot(); + const auto& locatedSPO = + p->getLocatedTriplesForPermutation(Permutation::SPO); + EXPECT_TRUE(locatedSPO.containsTriple(triplesToInsert.at(1), true)); + // This triple is exclusive to the thread and is inserted and then + // immediately deleted again. The `DeltaTriples` thus only store it as + // deleted. It might be contained in the original input, hence we + // cannot simply drop it. + EXPECT_TRUE(locatedSPO.containsTriple(triplesToInsert.at(2), false)); + EXPECT_TRUE(locatedSPO.containsTriple(triplesToDelete.at(2), false)); + } + } + } + }; + + // Run the above for each of `numThreads` threads, where each thread knows + // its index (used to create the thread-exclusive triples). + for (size_t i = 0; i < numThreads; ++i) { + threads.emplace_back(insertAndDelete, i); + } + threads.clear(); + + // Check that without updates, the snapshot pointer does not change. + auto p1 = deltaTriplesManager.getCurrentSnapshot(); + auto p2 = deltaTriplesManager.getCurrentSnapshot(); + EXPECT_EQ(p1, p2); + + // Each of the threads above inserts on thread-exclusive triple, deletes one + // thread-exclusive triple and inserts one thread-exclusive triple that is + // deleted right after (This triple is stored as deleted in the `DeltaTriples` + // because it might be contained in the original input). Additionally, there + // is one common triple inserted by// all the threads and one common triple + // that is deleted by all the threads. + // + + auto deltaImpl = deltaTriplesManager.deltaTriples_.rlock(); + EXPECT_THAT(*deltaImpl, NumTriples(numThreads + 1, 2 * numThreads + 1, + 3 * numThreads + 2)); +} diff --git a/test/DeltaTriplesTestHelpers.h b/test/DeltaTriplesTestHelpers.h index 586a54196a..bf64175a17 100644 --- a/test/DeltaTriplesTestHelpers.h +++ b/test/DeltaTriplesTestHelpers.h @@ -25,7 +25,7 @@ inline auto InAllPermutations = absl::StrCat(".getLocatedTriplesPerBlock(", Permutation::toString(perm), ")"), [perm](const DeltaTriples& deltaTriples) { - return deltaTriples.getLocatedTriplesPerBlock(perm); + return deltaTriples.getLocatedTriplesForPermutation(perm); }, InnerMatcher); })); diff --git a/test/IndexTest.cpp b/test/IndexTest.cpp index b707b11111..6ce7f6f732 100644 --- a/test/IndexTest.cpp +++ b/test/IndexTest.cpp @@ -41,7 +41,7 @@ auto makeTestScanWidthOne = [](const IndexImpl& index, IdTable result = index.scan({c0, c1, std::nullopt}, permutation, additionalColumns, std::make_shared>(), - qec.deltaTriples()); + qec.locatedTriplesSnapshot()); ASSERT_EQ(result.numColumns(), 1 + additionalColumns.size()); ASSERT_EQ(result, makeIdTableFromVector(expected)); }; @@ -62,7 +62,7 @@ auto makeTestScanWidthTwo = [](const IndexImpl& index, index.scan({c0, std::nullopt, std::nullopt}, permutation, Permutation::ColumnIndicesRef{}, std::make_shared>(), - qec.deltaTriples()); + qec.locatedTriplesSnapshot()); ASSERT_EQ(wol, makeIdTableFromVector(expected)); }; }; @@ -92,7 +92,7 @@ TEST(IndexTest, createFromTurtleTest) { return; } const auto& [index, qec] = getIndex(); - const auto& deltaTriples = qec.deltaTriples(); + const auto& locatedTriplesSnapshot = qec.locatedTriplesSnapshot(); auto getId = makeGetId(getQec(kb)->getIndex()); Id a = getId(""); @@ -103,33 +103,49 @@ TEST(IndexTest, createFromTurtleTest) { Id c2 = getId(""); // TODO We could also test the multiplicities here. - ASSERT_TRUE(index.PSO().getMetadata(b, deltaTriples).has_value()); - ASSERT_TRUE(index.PSO().getMetadata(b2, deltaTriples).has_value()); - ASSERT_FALSE(index.PSO().getMetadata(a2, deltaTriples).has_value()); - ASSERT_FALSE(index.PSO().getMetadata(c, deltaTriples).has_value()); + ASSERT_TRUE( + index.PSO().getMetadata(b, locatedTriplesSnapshot).has_value()); + ASSERT_TRUE( + index.PSO().getMetadata(b2, locatedTriplesSnapshot).has_value()); + ASSERT_FALSE( + index.PSO().getMetadata(a2, locatedTriplesSnapshot).has_value()); + ASSERT_FALSE( + index.PSO().getMetadata(c, locatedTriplesSnapshot).has_value()); ASSERT_FALSE( index.PSO() .getMetadata(Id::makeFromVocabIndex(VocabIndex::make(735)), - deltaTriples) + locatedTriplesSnapshot) .has_value()); - ASSERT_FALSE( - index.PSO().getMetadata(b, deltaTriples).value().isFunctional()); - ASSERT_TRUE( - index.PSO().getMetadata(b2, deltaTriples).value().isFunctional()); + ASSERT_FALSE(index.PSO() + .getMetadata(b, locatedTriplesSnapshot) + .value() + .isFunctional()); + ASSERT_TRUE(index.PSO() + .getMetadata(b2, locatedTriplesSnapshot) + .value() + .isFunctional()); - ASSERT_TRUE(index.POS().getMetadata(b, deltaTriples).has_value()); - ASSERT_TRUE(index.POS().getMetadata(b2, deltaTriples).has_value()); - ASSERT_FALSE(index.POS().getMetadata(a2, deltaTriples).has_value()); - ASSERT_FALSE(index.POS().getMetadata(c, deltaTriples).has_value()); + ASSERT_TRUE( + index.POS().getMetadata(b, locatedTriplesSnapshot).has_value()); + ASSERT_TRUE( + index.POS().getMetadata(b2, locatedTriplesSnapshot).has_value()); + ASSERT_FALSE( + index.POS().getMetadata(a2, locatedTriplesSnapshot).has_value()); + ASSERT_FALSE( + index.POS().getMetadata(c, locatedTriplesSnapshot).has_value()); ASSERT_FALSE( index.POS() .getMetadata(Id::makeFromVocabIndex(VocabIndex::make(735)), - deltaTriples) + locatedTriplesSnapshot) .has_value()); - ASSERT_TRUE( - index.POS().getMetadata(b, deltaTriples).value().isFunctional()); - ASSERT_TRUE( - index.POS().getMetadata(b2, deltaTriples).value().isFunctional()); + ASSERT_TRUE(index.POS() + .getMetadata(b, locatedTriplesSnapshot) + .value() + .isFunctional()); + ASSERT_TRUE(index.POS() + .getMetadata(b2, locatedTriplesSnapshot) + .value() + .isFunctional()); // Relation b // Pair index @@ -167,7 +183,7 @@ TEST(IndexTest, createFromTurtleTest) { const auto& qec = *getQec(kb); const IndexImpl& index = qec.getIndex().getImpl(); - const auto& deltaTriples = qec.deltaTriples(); + const auto& deltaTriples = qec.locatedTriplesSnapshot(); auto getId = makeGetId(getQec(kb)->getIndex()); Id zero = getId("<0>"); @@ -224,7 +240,7 @@ TEST(IndexTest, createFromOnDiskIndexTest) { " ."; const auto& qec = *getQec(kb); const IndexImpl& index = qec.getIndex().getImpl(); - const auto& deltaTriples = qec.deltaTriples(); + const auto& deltaTriples = qec.locatedTriplesSnapshot(); auto getId = makeGetId(getQec(kb)->getIndex()); Id b = getId(""); @@ -465,8 +481,8 @@ TEST(IndexTest, NumDistinctEntities) { EXPECT_FLOAT_EQ(multiplicities[1], 7.0 / 2.0); EXPECT_FLOAT_EQ(multiplicities[2], 7.0 / 7.0); - multiplicities = - index.getMultiplicities(iri(""), Permutation::SPO, qec.deltaTriples()); + multiplicities = index.getMultiplicities(iri(""), Permutation::SPO, + qec.locatedTriplesSnapshot()); EXPECT_FLOAT_EQ(multiplicities[0], 2.5); EXPECT_FLOAT_EQ(multiplicities[1], 1); } diff --git a/test/util/IndexTestHelpers.cpp b/test/util/IndexTestHelpers.cpp index ca8d1f09ee..79eb77b0d5 100644 --- a/test/util/IndexTestHelpers.cpp +++ b/test/util/IndexTestHelpers.cpp @@ -55,17 +55,19 @@ namespace { // folded into the permutations as additional columns. void checkConsistencyBetweenPatternPredicateAndAdditionalColumn( const Index& index) { - DeltaTriples deltaTriples(index); + DeltaTriplesManager deltaTriplesManager(index.getImpl()); + auto sharedLocatedTriplesSnapshot = deltaTriplesManager.getCurrentSnapshot(); + const auto& locatedTriplesSnapshot = *sharedLocatedTriplesSnapshot; static constexpr size_t col0IdTag = 43; auto cancellationDummy = std::make_shared>(); auto iriOfHasPattern = TripleComponent::Iri::fromIriref(HAS_PATTERN_PREDICATE); auto checkSingleElement = [&cancellationDummy, &iriOfHasPattern, - &deltaTriples](const Index& index, - size_t patternIdx, Id id) { + &locatedTriplesSnapshot]( + const Index& index, size_t patternIdx, Id id) { auto scanResultHasPattern = index.scan( ScanSpecificationAsTripleComponent{iriOfHasPattern, id, std::nullopt}, - Permutation::Enum::PSO, {}, cancellationDummy, deltaTriples); + Permutation::Enum::PSO, {}, cancellationDummy, locatedTriplesSnapshot); // Each ID has at most one pattern, it can have none if it doesn't // appear as a subject in the knowledge graph. AD_CORRECTNESS_CHECK(scanResultHasPattern.numRows() <= 1); @@ -86,7 +88,7 @@ void checkConsistencyBetweenPatternPredicateAndAdditionalColumn( ScanSpecification{col0Id, std::nullopt, std::nullopt}, permutation, std::array{ColumnIndex{ADDITIONAL_COLUMN_INDEX_SUBJECT_PATTERN}, ColumnIndex{ADDITIONAL_COLUMN_INDEX_OBJECT_PATTERN}}, - cancellationDummy, deltaTriples); + cancellationDummy, locatedTriplesSnapshot); ASSERT_EQ(scanResult.numColumns(), 4u); for (const auto& row : scanResult) { auto patternIdx = row[2].getInt(); @@ -112,12 +114,12 @@ void checkConsistencyBetweenPatternPredicateAndAdditionalColumn( auto cancellationHandle = std::make_shared>(); auto predicates = index.getImpl().PSO().getDistinctCol0IdsAndCounts( - cancellationHandle, deltaTriples); + cancellationHandle, locatedTriplesSnapshot); for (const auto& predicate : predicates.getColumn(0)) { checkConsistencyForPredicate(predicate); } auto objects = index.getImpl().OSP().getDistinctCol0IdsAndCounts( - cancellationHandle, deltaTriples); + cancellationHandle, locatedTriplesSnapshot); for (const auto& object : objects.getColumn(0)) { checkConsistencyForObject(object); }