Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow unbound variables in compliance with the SPARQL standard #1586

Merged
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/ServerMain.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,12 @@ int main(int argc, char** argv) {
optionFactory.getProgramOption<"service-max-value-rows">(),
"The maximal number of result rows to be passed to a SERVICE operation "
"as a VALUES clause to optimize its computation.");
add("throw-on-unbound-variables",
optionFactory.getProgramOption<"throw-on-unbound-variables">(),
"If set to true, the queries that use GROUP BY, BIND, or ORDER BY with "
"variables that are unbound in the query throw an exception. These "
"queries technically are allowed by the SPARQL standard, but typically "
"are the result of typos and unintended by the user");
po::variables_map optionsMap;

try {
Expand Down
6 changes: 6 additions & 0 deletions src/engine/GroupBy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ GroupBy::GroupBy(QueryExecutionContext* qec, vector<Variable> groupByVariables,
: Operation{qec},
_groupByVariables{std::move(groupByVariables)},
_aliases{std::move(aliases)} {
AD_CORRECTNESS_CHECK(subtree != nullptr);
// Ignore all variables that are not contained in the subtree.
std::erase_if(_groupByVariables,
[&map = subtree->getVariableColumns()](const auto& var) {
return !map.contains(var);
});
// Sort the groupByVariables to ensure that the cache key is order
// invariant.
// Note: It is tempting to do the same also for the aliases, but that would
Expand Down
6 changes: 6 additions & 0 deletions src/engine/Operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ class Operation {
// recursively collect all Warnings generated by all descendants
vector<string> collectWarnings() const;

// Add a warning to the `Operation`. The warning will be returned by
// `collectWarnings()` above.
void addWarning(std::string warning) {
_warnings.push_back(std::move(warning));
}

/**
* @return A list of columns on which the result of this operation is sorted.
*/
Expand Down
13 changes: 11 additions & 2 deletions src/engine/QueryExecutionTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,20 @@ std::string QueryExecutionTree::getCacheKey() const {

// _____________________________________________________________________________
size_t QueryExecutionTree::getVariableColumn(const Variable& variable) const {
auto optIdx = getVariableColumnOrNullopt(variable);
if (!optIdx.has_value()) {
AD_THROW("Variable could not be mapped to result column. Var: " +
variable.name());
}
return optIdx.value();
}
// _____________________________________________________________________________
std::optional<size_t> QueryExecutionTree::getVariableColumnOrNullopt(
const Variable& variable) const {
AD_CONTRACT_CHECK(rootOperation_);
const auto& varCols = getVariableColumns();
if (!varCols.contains(variable)) {
AD_THROW("Variable could not be mapped to result column. Var: " +
variable.name());
return std::nullopt;
}
return varCols.at(variable).columnIndex_;
}
Expand Down
7 changes: 7 additions & 0 deletions src/engine/QueryExecutionTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,15 @@ class QueryExecutionTree {

bool isEmpty() const { return !rootOperation_; }

// Get the column index that the given `variable` will have in the result of
// this query. Throw if the variable is not part of the `VariableToColumnMap`.
size_t getVariableColumn(const Variable& variable) const;

// Similar to `getVariableColumn` above, but return `nullopt` if the variable
// is not part of the `VariableToColumnMap`.
std::optional<size_t> getVariableColumnOrNullopt(
const Variable& variable) const;

size_t getResultWidth() const { return rootOperation_->getResultWidth(); }

std::shared_ptr<const Result> getResult(bool requestLaziness = false) const {
Expand Down
28 changes: 25 additions & 3 deletions src/engine/QueryPlanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ std::vector<QueryPlanner::SubtreePlan> QueryPlanner::createExecutionTrees(

checkCancellation();

for (const auto& warning : pq.warnings()) {
warnings_.push_back(warning);
}
return lastRow;
}

Expand All @@ -201,7 +204,15 @@ QueryExecutionTree QueryPlanner::createExecutionTree(ParsedQuery& pq,
auto lastRow = createExecutionTrees(pq, isSubquery);
auto minInd = findCheapestExecutionTree(lastRow);
LOG(DEBUG) << "Done creating execution plan.\n";
return *lastRow[minInd]._qet;
auto result = std::move(*lastRow[minInd]._qet);
auto& rootOperation = *result.getRootOperation();
// Collect all the warnings and pass them to the created tree, s.t. they
// become visible to the user once the query is executed.
for (const auto& warning : warnings_) {
rootOperation.addWarning(warning);
}
warnings_.clear();
return result;
} catch (ad_utility::CancellationException& e) {
e.setOperation("Query planning");
throw;
Expand Down Expand Up @@ -388,8 +399,19 @@ vector<QueryPlanner::SubtreePlan> QueryPlanner::getOrderByRow(
plan.idsOfIncludedTextLimits_ = parent.idsOfIncludedTextLimits_;
vector<pair<ColumnIndex, bool>> sortIndices;
for (auto& ord : pq._orderBy) {
sortIndices.emplace_back(parent._qet->getVariableColumn(ord.variable_),
ord.isDescending_);
auto idx = parent._qet->getVariableColumnOrNullopt(ord.variable_);
if (!idx.has_value()) {
// Skip variables that are not visible in the query execution tree so
// far.
continue;
}
sortIndices.emplace_back(idx.value(), ord.isDescending_);
}

// If none of the sort variables was bound in the query we don't actually
// have to add an ORDER BY or INTERNAL SORT BY.
if (sortIndices.empty()) {
return previous;
}

if (pq._isInternalSort == IsInternalSort::True) {
Expand Down
5 changes: 5 additions & 0 deletions src/engine/QueryPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,11 @@ class QueryPlanner {

std::optional<size_t> textLimit_ = std::nullopt;

// Used to collect warnings (created by the parser or the query planner) which
// are then passed on to the created `QueryExecutionTree` s.t. they can be
// reported alongside the result of the query once it is executed.
std::vector<std::string> warnings_;

[[nodiscard]] std::vector<QueryPlanner::SubtreePlan> optimize(
ParsedQuery::GraphPattern* rootPattern);

Expand Down
18 changes: 12 additions & 6 deletions src/engine/sparqlExpressions/LiteralExpression.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class LiteralExpression : public SparqlExpression {
string getCacheKey(const VariableToColumnMap& varColMap) const override {
if constexpr (std::is_same_v<T, ::Variable>) {
if (!varColMap.contains(_value)) {
AD_THROW(absl::StrCat("Variable ", _value.name(), " not found"));
return "Unbound Variable";
}
return {"#column_" + std::to_string(varColMap.at(_value).columnIndex_) +
"#"};
Expand Down Expand Up @@ -158,18 +158,24 @@ class LiteralExpression : public SparqlExpression {
return std::move(resultFromSameRow.value());
}
}

// If the variable is not part of the input, then it is always UNDEF.
auto column = context->getColumnIndexForVariable(variable);
if (!column.has_value()) {
return Id::makeUndefined();
}
// If a variable is grouped, then we know that it always has the same
// value and can treat it as a constant. This is not possible however when
// we are inside an aggregate, because for example `SUM(?variable)` must
// still compute the sum over the whole group.
if (context->_groupedVariables.contains(variable) && !isInsideAggregate()) {
auto column = context->getColumnIndexForVariable(variable);
const auto& table = context->_inputTable;
auto constantValue = table.at(context->_beginIndex, column);
assert((std::ranges::all_of(
auto constantValue = table.at(context->_beginIndex, column.value());
AD_EXPENSIVE_CHECK((std::ranges::all_of(
table.begin() + context->_beginIndex,
table.begin() + context->_endIndex,
[&](const auto& row) { return row[column] == constantValue; })));
table.begin() + context->_endIndex, [&](const auto& row) {
return row[column.value()] == constantValue;
})));
return constantValue;
} else {
return variable;
Expand Down
6 changes: 5 additions & 1 deletion src/engine/sparqlExpressions/RegexExpression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,11 @@ ExpressionResult RegexExpression::evaluatePrefixRegex(
// of binary searches and the result is a set of intervals.
std::vector<ad_utility::SetOfIntervals> resultSetOfIntervals;
if (context->isResultSortedBy(variable)) {
auto column = context->getColumnIndexForVariable(variable);
auto optColumn = context->getColumnIndexForVariable(variable);
AD_CORRECTNESS_CHECK(optColumn.has_value(),
"We have previously asserted that the input is sorted "
"by the variable, so we expect it to exist");
const auto& column = optColumn.value();
for (auto [lowerId, upperId] : lowerAndUpperIds) {
// Two binary searches to find the lower and upper bounds of the range.
auto lower = std::lower_bound(
Expand Down
6 changes: 5 additions & 1 deletion src/engine/sparqlExpressions/RelationalExpressions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,13 @@ ad_utility::SetOfIntervals evaluateWithBinarySearch(
// Set up iterators into the `IdTable` that only access the column where the
// `variable` is stored.
auto columnIndex = context->getColumnIndexForVariable(variable);
AD_CORRECTNESS_CHECK(
columnIndex.has_value(),
"The input must be sorted to evaluate a relational expression using "
"binary search. This should never happen for unbound variables");

auto getIdFromColumn = ad_utility::makeAssignableLambda(
[columnIndex](const auto& idTable, auto i) {
[columnIndex = columnIndex.value()](const auto& idTable, auto i) {
return idTable(i, columnIndex);
});

Expand Down
7 changes: 3 additions & 4 deletions src/engine/sparqlExpressions/SparqlExpressionTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,11 @@ bool EvaluationContext::isResultSortedBy(const Variable& variable) {
}

// _____________________________________________________________________________
[[nodiscard]] ColumnIndex EvaluationContext::getColumnIndexForVariable(
const Variable& var) const {
[[nodiscard]] std::optional<ColumnIndex>
EvaluationContext::getColumnIndexForVariable(const Variable& var) const {
const auto& map = _variableToColumnMap;
if (!map.contains(var)) {
throw std::runtime_error(absl::StrCat(
"Variable ", var.name(), " was not found in input to expression."));
return std::nullopt;
}
return map.at(var).columnIndex_;
}
Expand Down
2 changes: 1 addition & 1 deletion src/engine/sparqlExpressions/SparqlExpressionTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ struct EvaluationContext {
[[nodiscard]] size_t size() const;

// ____________________________________________________________________________
[[nodiscard]] ColumnIndex getColumnIndexForVariable(
[[nodiscard]] std::optional<ColumnIndex> getColumnIndexForVariable(
const Variable& var) const;

// _____________________________________________________________________________
Expand Down
3 changes: 2 additions & 1 deletion src/global/RuntimeParameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ inline auto& RuntimeParameters() {
Bool<"group-by-hash-map-enabled">{false},
Bool<"group-by-disable-index-scan-optimizations">{false},
SizeT<"service-max-value-rows">{10'000},
SizeT<"query-planning-budget">{1500}};
SizeT<"query-planning-budget">{1500},
Bool<"throw-on-unbound-variables">{false}};
}();
return params;
}
Expand Down
48 changes: 29 additions & 19 deletions src/parser/ParsedQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <vector>

#include "engine/sparqlExpressions/SparqlExpressionPimpl.h"
#include "global/RuntimeParameters.h"
#include "parser/RdfEscaping.h"
#include "parser/sparqlParser/SparqlQleverVisitor.h"
#include "util/Conversions.h"
Expand Down Expand Up @@ -341,13 +342,13 @@ const std::vector<Alias>& ParsedQuery::getAliases() const {
void ParsedQuery::checkVariableIsVisible(
const Variable& variable, const std::string& locationDescription,
const ad_utility::HashSet<Variable>& additionalVisibleVariables,
std::string_view otherPossibleLocationDescription) const {
std::string_view otherPossibleLocationDescription) {
if (!ad_utility::contains(getVisibleVariables(), variable) &&
!additionalVisibleVariables.contains(variable)) {
throw InvalidSparqlQueryException(absl::StrCat(
"Variable ", variable.name(), " was used by " + locationDescription,
", but is not defined in the query body",
otherPossibleLocationDescription, "."));
addWarningOrThrow(absl::StrCat("Variable ", variable.name(),
" was used by " + locationDescription,
", but is not defined in the query body",
otherPossibleLocationDescription, "."));
}
}

Expand All @@ -356,7 +357,7 @@ void ParsedQuery::checkUsedVariablesAreVisible(
const sparqlExpression::SparqlExpressionPimpl& expression,
const std::string& locationDescription,
const ad_utility::HashSet<Variable>& additionalVisibleVariables,
std::string_view otherPossibleLocationDescription) const {
std::string_view otherPossibleLocationDescription) {
for (const auto* var : expression.containedVariables()) {
checkVariableIsVisible(*var,
locationDescription + " in expression \"" +
Expand Down Expand Up @@ -447,20 +448,19 @@ void ParsedQuery::addOrderByClause(OrderClause orderClause, bool isGroupBy,
auto processVariableOrderKey = [this, isGroupBy, &noteForImplicitGroupBy,
&variablesFromAliases,
additionalError](VariableOrderKey orderKey) {
checkVariableIsVisible(orderKey.variable_, "ORDER BY", variablesFromAliases,
additionalError);

// Check whether grouping is done. The variable being ordered by
// must then be either grouped or the result of an alias in the select
// clause.
if (isGroupBy &&
!ad_utility::contains(_groupByVariables, orderKey.variable_) &&
(!variablesFromAliases.contains(orderKey.variable_))) {
throw InvalidSparqlQueryException(
"Variable " + orderKey.variable_.name() +
if (!isGroupBy) {
checkVariableIsVisible(orderKey.variable_, "ORDER BY",
variablesFromAliases, additionalError);
} else if (!ad_utility::contains(_groupByVariables, orderKey.variable_) &&
(!variablesFromAliases.contains(orderKey.variable_))) {
// Check whether grouping is done. The variable being ordered by
// must then be either grouped or the result of an alias in the select
// clause.
addWarningOrThrow(absl::StrCat(
"Variable " + orderKey.variable_.name(),
" was used in an ORDER BY clause, but is neither grouped nor "
"created as an alias in the SELECT clause." +
noteForImplicitGroupBy);
"created as an alias in the SELECT clause.",
noteForImplicitGroupBy));
}
_orderBy.push_back(std::move(orderKey));
};
Expand Down Expand Up @@ -499,8 +499,18 @@ Variable ParsedQuery::getNewInternalVariable() {
return variable;
}

// _____________________________________________________________________________
Variable ParsedQuery::blankNodeToInternalVariable(std::string_view blankNode) {
AD_CONTRACT_CHECK(blankNode.starts_with("_:"));
return Variable{absl::StrCat(QLEVER_INTERNAL_BLANKNODE_VARIABLE_PREFIX,
blankNode.substr(2))};
}

// _____________________________________________________________________________
void ParsedQuery::addWarningOrThrow(std::string warning) {
if (RuntimeParameters().get<"throw-on-unbound-variables">()) {
throw InvalidSparqlQueryException(std::move(warning));
} else {
addWarning(std::move(warning));
}
}
28 changes: 24 additions & 4 deletions src/parser/ParsedQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ class ParsedQuery {
LimitOffsetClause _limitOffset{};
string _originalString;

// Contains warnings about queries that are valid according to the SPARQL
// standard, but are probably semantically wrong.
std::vector<std::string> warnings_;

using HeaderClause =
std::variant<SelectClause, ConstructClause, UpdateClause, AskClause>;
// Use explicit default initialization for `SelectClause` because its
Expand Down Expand Up @@ -158,6 +162,21 @@ class ParsedQuery {
// Add variables, that were found in the query body.
void registerVariablesVisibleInQueryBody(const vector<Variable>& variables);

// Return all the warnings that have been added via `addWarning()` or
// `addWarningOrThrow`.
const std::vector<std::string>& warnings() const { return warnings_; }

// Add a warning to the query. The warning becomes part of the return value of
// the `warnings()` function above.
void addWarning(std::string warning) {
warnings_.push_back(std::move(warning));
}

// If unbound variables that are used in a query are supposed to throw because
// the corresponding `RuntimeParameter` is set, then throw. Else add a
// warning.
void addWarningOrThrow(std::string warning);

// Returns all variables that are visible in the Query Body.
const std::vector<Variable>& getVisibleVariables() const;

Expand Down Expand Up @@ -196,20 +215,21 @@ class ParsedQuery {
Variable addInternalAlias(sparqlExpression::SparqlExpressionPimpl expression);

// If the `variable` is neither visible in the query body nor contained in the
// `additionalVisibleVariables`, throw an `InvalidQueryException` that uses
// the `locationDescription` inside the message.
// `additionalVisibleVariables`, add a warning or throw an exception (see
// `addWarningOrThrow`) that uses the `locationDescription` inside the
// message.
void checkVariableIsVisible(
const Variable& variable, const std::string& locationDescription,
const ad_utility::HashSet<Variable>& additionalVisibleVariables = {},
std::string_view otherPossibleLocationDescription = "") const;
std::string_view otherPossibleLocationDescription = "");

// Similar to `checkVariableIsVisible` above, but performs the check for each
// of the variables that are used inside the `expression`.
void checkUsedVariablesAreVisible(
const sparqlExpression::SparqlExpressionPimpl& expression,
const std::string& locationDescription,
const ad_utility::HashSet<Variable>& additionalVisibleVariables = {},
std::string_view otherPossibleLocationDescription = "") const;
std::string_view otherPossibleLocationDescription = "");

// Add the `groupKeys` (either variables or expressions) to the query and
// check whether all the variables are visible inside the query body.
Expand Down
Loading
Loading