Skip to content

Commit

Permalink
Extend PrefilterExpression for Literal and Iri (#1653)
Browse files Browse the repository at this point in the history
With this PR, the prefilter expressions implemented in #1619 also apply to literals and IRIs. For example the following query only extracts the relevant, prefiltered blocks from the `IndexScan`:
```
SELECT * {
?s ?p ?o FILTER (?o >= "hallo" && ?o <= "hello")
}
```
  • Loading branch information
realHannes authored Dec 6, 2024
1 parent ec806f0 commit 9d9bab0
Show file tree
Hide file tree
Showing 9 changed files with 403 additions and 225 deletions.
35 changes: 35 additions & 0 deletions src/engine/sparqlExpressions/LiteralExpression.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ struct SingleUseExpression : public SparqlExpression {

std::span<SparqlExpression::Ptr> childrenImpl() override { return {}; }
};

} // namespace detail

/// The actual instantiations and aliases of LiteralExpressions.
Expand All @@ -224,4 +225,38 @@ using IdExpression = detail::LiteralExpression<ValueId>;
using VectorIdExpression =
detail::LiteralExpression<VectorWithMemoryLimit<ValueId>>;
using SingleUseExpression = detail::SingleUseExpression;

namespace detail {

//______________________________________________________________________________
using IdOrLocalVocabEntry = prefilterExpressions::IdOrLocalVocabEntry;
// Given a `SparqlExpression*` pointing to a `LiteralExpression`, this helper
// function retrieves a corresponding `IdOrLocalVocabEntry` variant
// (`std::variant<ValueId, LocalVocabEntry>`) for `LiteralExpression`s that
// contain a suitable type.
// Given the boolean flag `stringAndIriOnly` is set to `true`, only `Literal`s,
// `Iri`s and `ValueId`s of type `VocabIndex`/`LocalVocabIndex` are returned. If
// `stringAndIriOnly` is set to `false` (default), all `ValueId` types retrieved
// from `LiteralExpression<ValueId>` will be returned.
inline std::optional<IdOrLocalVocabEntry>
getIdOrLocalVocabEntryFromLiteralExpression(const SparqlExpression* child,
bool stringAndIriOnly = false) {
using enum Datatype;
if (const auto* idExpr = dynamic_cast<const IdExpression*>(child)) {
auto idType = idExpr->value().getDatatype();
if (stringAndIriOnly && idType != VocabIndex && idType != LocalVocabIndex) {
return std::nullopt;
}
return idExpr->value();
} else if (const auto* literalExpr =
dynamic_cast<const StringLiteralExpression*>(child)) {
return LocalVocabEntry{literalExpr->value()};
} else if (const auto* iriExpr = dynamic_cast<const IriExpression*>(child)) {
return LocalVocabEntry{iriExpr->value()};
} else {
return std::nullopt;
}
}
} // namespace detail

} // namespace sparqlExpression
121 changes: 93 additions & 28 deletions src/engine/sparqlExpressions/PrefilterExpressionIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include <ranges>

#include "global/ValueIdComparators.h"
#include "util/ConstexprMap.h"
#include "util/OverloadCallOperator.h"

namespace prefilterExpressions {

Expand Down Expand Up @@ -207,34 +209,37 @@ std::vector<BlockMetadata> PrefilterExpression::evaluateAndCheckImpl(
return relevantBlocks;
}

//______________________________________________________________________________
ValueId PrefilterExpression::getValueIdFromIdOrLocalVocabEntry(
const IdOrLocalVocabEntry& referenceValue, LocalVocab& vocab) {
return std::visit(ad_utility::OverloadCallOperator{
[](const ValueId& referenceId) { return referenceId; },
[&vocab](const LocalVocabEntry& referenceLVE) {
return Id::makeFromLocalVocabIndex(
vocab.getIndexAndAddIfNotContained(referenceLVE));
}},
referenceValue);
}

// SECTION RELATIONAL OPERATIONS
//______________________________________________________________________________
template <CompOp Comparison>
std::unique_ptr<PrefilterExpression>
RelationalExpression<Comparison>::logicalComplement() const {
using enum CompOp;
switch (Comparison) {
case LT:
// Complement X < Y: X >= Y
return std::make_unique<GreaterEqualExpression>(referenceId_);
case LE:
// Complement X <= Y: X > Y
return std::make_unique<GreaterThanExpression>(referenceId_);
case EQ:
// Complement X == Y: X != Y
return std::make_unique<NotEqualExpression>(referenceId_);
case NE:
// Complement X != Y: X == Y
return std::make_unique<EqualExpression>(referenceId_);
case GE:
// Complement X >= Y: X < Y
return std::make_unique<LessThanExpression>(referenceId_);
case GT:
// Complement X > Y: X <= Y
return std::make_unique<LessEqualExpression>(referenceId_);
default:
AD_FAIL();
}
using namespace ad_utility;
using P = std::pair<CompOp, CompOp>;
// The complementation logic implemented with the following mapping procedure:
// (1) ?var < referenceValue -> ?var >= referenceValue
// (2) ?var <= referenceValue -> ?var > referenceValue
// (3) ?var >= referenceValue -> ?var < referenceValue
// (4) ?var > referenceValue -> ?var <= referenceValue
// (5) ?var = referenceValue -> ?var != referenceValue
// (6) ?var != referenceValue -> ?var = referenceValue
constexpr ConstexprMap<CompOp, CompOp, 6> complementMap(
{P{LT, GE}, P{LE, GT}, P{GE, LT}, P{GT, LE}, P{EQ, NE}, P{NE, EQ}});
return std::make_unique<RelationalExpression<complementMap.at(Comparison)>>(
rightSideReferenceValue_);
};

//______________________________________________________________________________
Expand All @@ -261,18 +266,21 @@ std::vector<BlockMetadata> RelationalExpression<Comparison>::evaluateImpl(
}
}

LocalVocab vocab{};
auto referenceId =
getValueIdFromIdOrLocalVocabEntry(rightSideReferenceValue_, vocab);
// Use getRangesForId (from valueIdComparators) to extract the ranges
// containing the relevant ValueIds.
// For pre-filtering with CompOp::EQ, we have to consider empty ranges.
// Reason: The referenceId_ could be contained within the bounds formed by
// Reason: The referenceId could be contained within the bounds formed by
// the IDs of firstTriple_ and lastTriple_ (set false flag to keep
// empty ranges).
auto relevantIdRanges =
Comparison != CompOp::EQ
? getRangesForId(valueIdsInput.begin(), valueIdsInput.end(),
referenceId_, Comparison)
referenceId, Comparison)
: getRangesForId(valueIdsInput.begin(), valueIdsInput.end(),
referenceId_, Comparison, false);
referenceId, Comparison, false);

// The vector for relevant BlockMetadata values which contain ValueIds
// defined as relevant by relevantIdRanges.
Expand Down Expand Up @@ -312,23 +320,37 @@ bool RelationalExpression<Comparison>::operator==(
if (!otherRelational) {
return false;
}
return referenceId_ == otherRelational->referenceId_;
return rightSideReferenceValue_ == otherRelational->rightSideReferenceValue_;
};

//______________________________________________________________________________
template <CompOp Comparison>
std::unique_ptr<PrefilterExpression> RelationalExpression<Comparison>::clone()
const {
return std::make_unique<RelationalExpression<Comparison>>(referenceId_);
return std::make_unique<RelationalExpression<Comparison>>(
rightSideReferenceValue_);
};

//______________________________________________________________________________
template <CompOp Comparison>
std::string RelationalExpression<Comparison>::asString(
[[maybe_unused]] size_t depth) const {
auto referenceValueToString = [](std::stringstream& stream,
const IdOrLocalVocabEntry& referenceValue) {
std::visit(
ad_utility::OverloadCallOperator{
[&stream](const ValueId& referenceId) { stream << referenceId; },
[&stream](const LocalVocabEntry& referenceValue) {
stream << referenceValue.toStringRepresentation();
}},
referenceValue);
};

std::stringstream stream;
stream << "Prefilter RelationalExpression<" << getRelationalOpStr(Comparison)
<< ">\nValueId: " << referenceId_ << std::endl;
<< ">\nreferenceValue_ : ";
referenceValueToString(stream, rightSideReferenceValue_);
stream << " ." << std::endl;
return stream.str();
};

Expand Down Expand Up @@ -456,6 +478,23 @@ template class LogicalExpression<LogicalOperator::OR>;

namespace detail {

//______________________________________________________________________________
// Returns the corresponding mirrored `RelationalExpression<mirrored
// comparison>` for the given `CompOp comparison` template argument. For
// example, the mirroring procedure will transform the relational expression
// `referenceValue > ?var` into `?var < referenceValue`.
template <CompOp comparison>
static std::unique_ptr<PrefilterExpression> makeMirroredExpression(
const IdOrLocalVocabEntry& referenceValue) {
using enum CompOp;
using namespace ad_utility;
using P = std::pair<CompOp, CompOp>;
constexpr ConstexprMap<CompOp, CompOp, 6> mirrorMap(
{P{LT, GT}, P{LE, GE}, P{GE, LE}, P{GT, LT}, P{EQ, EQ}, P{NE, NE}});
return std::make_unique<RelationalExpression<mirrorMap.at(comparison)>>(
referenceValue);
}

//______________________________________________________________________________
void checkPropertiesForPrefilterConstruction(
const std::vector<PrefilterExprVariablePair>& vec) {
Expand All @@ -473,5 +512,31 @@ void checkPropertiesForPrefilterConstruction(
}
}

//______________________________________________________________________________
template <CompOp comparison>
std::vector<PrefilterExprVariablePair> makePrefilterExpressionVec(
const IdOrLocalVocabEntry& referenceValue, const Variable& variable,
const bool mirrored) {
std::vector<PrefilterExprVariablePair> resVec{};
resVec.emplace_back(
mirrored
? makeMirroredExpression<comparison>(referenceValue)
: std::make_unique<RelationalExpression<comparison>>(referenceValue),
variable);
return resVec;
}

//______________________________________________________________________________
#define INSTANTIATE_MAKE_PREFILTER(Comparison) \
template std::vector<PrefilterExprVariablePair> \
makePrefilterExpressionVec<Comparison>(const IdOrLocalVocabEntry&, \
const Variable&, const bool);
INSTANTIATE_MAKE_PREFILTER(CompOp::LT);
INSTANTIATE_MAKE_PREFILTER(CompOp::LE);
INSTANTIATE_MAKE_PREFILTER(CompOp::GE);
INSTANTIATE_MAKE_PREFILTER(CompOp::GT);
INSTANTIATE_MAKE_PREFILTER(CompOp::EQ);
INSTANTIATE_MAKE_PREFILTER(CompOp::NE);

} // namespace detail
} // namespace prefilterExpressions
53 changes: 40 additions & 13 deletions src/engine/sparqlExpressions/PrefilterExpressionIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

namespace prefilterExpressions {

using IdOrLocalVocabEntry = std::variant<ValueId, LocalVocabEntry>;

//______________________________________________________________________________
// The maximum recursion depth for `info()` / `operator<<()`. A depth of `3`
// should be sufficient for most `PrefilterExpressions` in our use case.
Expand Down Expand Up @@ -101,16 +103,21 @@ class PrefilterExpression {
return str;
}

// Static helper to retrieve the reference `ValueId` from the
// `IdOrLocalVocabEntry` variant.
static ValueId getValueIdFromIdOrLocalVocabEntry(
const IdOrLocalVocabEntry& refernceValue, LocalVocab& vocab);

private:
// Performs the following conditional checks on the provided `BlockMetadata`
// values:
// (1) unqiueness of blocks
// (2) sorted (order)
// (3) Constant values for all columns `< evaluationColumn`
// This function subsequently invokes the `evaluateImpl` method and
// checks the corresponding result for those conditions again.
// If a respective condition is violated, the function performing the checks
// will throw a `std::runtime_error`.
// Note: Use `evaluate` for general evaluation of `PrefilterExpression`
// instead of this method.
// Performs the following conditional checks on
// the provided `BlockMetadata` values: (1) unqiueness of blocks (2) sorted
// (order) (3) Constant values for all columns `< evaluationColumn` This
// function subsequently invokes the `evaluateImpl` method and checks the
// corresponding result for those conditions again. If a respective condition
// is violated, the function performing the checks will throw a
// `std::runtime_error`.
std::vector<BlockMetadata> evaluateAndCheckImpl(
std::span<const BlockMetadata> input, size_t evaluationColumn) const;

Expand All @@ -130,12 +137,18 @@ using CompOp = valueIdComparators::Comparison;
template <CompOp Comparison>
class RelationalExpression : public PrefilterExpression {
private:
// The ValueId on which we perform the relational comparison on.
ValueId referenceId_;
// This is the right hand side value of the relational expression. The left
// hand value is indirectly supplied during the evaluation process via the
// `evaluationColumn` argument. `evaluationColumn` represents the column index
// associated with the `Variable` column of the `IndexScan`.
// E.g., a less-than expression with a value of 3 will represent the logical
// relation ?var < 3. A equal-to expression with a value of "Freiburg" will
// represent ?var = "Freiburg".
IdOrLocalVocabEntry rightSideReferenceValue_;

public:
explicit RelationalExpression(const ValueId referenceId)
: referenceId_(referenceId) {}
explicit RelationalExpression(const IdOrLocalVocabEntry& referenceValue)
: rightSideReferenceValue_(referenceValue) {}

std::unique_ptr<PrefilterExpression> logicalComplement() const override;
bool operator==(const PrefilterExpression& other) const override;
Expand Down Expand Up @@ -239,5 +252,19 @@ using PrefilterExprVariablePair =
void checkPropertiesForPrefilterConstruction(
const std::vector<PrefilterExprVariablePair>& vec);

//______________________________________________________________________________
// Creates a `RelationalExpression<comparison>` prefilter expression based on
// the templated `CompOp` comparison operation and the reference
// `IdOrLocalVocabEntry` value. With the next step, the corresponding
// `<RelationalExpression<comparison>, Variable>` pair is created, and finally
// returned in a vector. The `mirrored` flag indicates if the given
// `RelationalExpression<comparison>` should be mirrored. The mirroring
// procedure changes the (asymmetrical) comparison operations:
// e.g. `5 < ?x` is changed to `?x > 5`.
template <CompOp comparison>
std::vector<PrefilterExprVariablePair> makePrefilterExpressionVec(
const IdOrLocalVocabEntry& referenceValue, const Variable& variable,
bool mirrored);

} // namespace detail
} // namespace prefilterExpressions
Loading

0 comments on commit 9d9bab0

Please sign in to comment.