Skip to content

Commit

Permalink
Allow selecting all variables as payload in spatial search SERVICE (a…
Browse files Browse the repository at this point in the history
…d-freiburg#1656)

In the spatial search service (triggered via `SERVICE spatialSearch:`) all variables from the right hand side of the join (except for the join column) that shall be part of the result have to be explicitly selected via the `<payload>` parameter. This PR adds a special value `<all>` for this parameter, which automatically selects all the variables from the right side as payload.
  • Loading branch information
ullingerc authored Dec 6, 2024
1 parent c76f24b commit 5b27eeb
Show file tree
Hide file tree
Showing 12 changed files with 413 additions and 101 deletions.
75 changes: 32 additions & 43 deletions src/engine/SpatialJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,30 +175,24 @@ string SpatialJoin::getDescriptor() const {
// ____________________________________________________________________________
size_t SpatialJoin::getResultWidth() const {
if (childLeft_ && childRight_) {
// Helper visitor lambda to determine the size of the right child after
// selecting only the join column and explicitly requested payload columns
auto sizeRight = [this]<typename T>(const T& value) {
if constexpr (std::is_same_v<T, PayloadAllVariables>) {
return childRight_->getResultWidth();
} else {
static_assert(std::is_same_v<T, std::vector<Variable>>);

// We convert to a set here, because we allow multiple occurrences of
// variables in payloadVariables_
absl::flat_hash_set<Variable> pvSet{value.begin(), value.end()};

// The payloadVariables_ may contain the right join variable
return pvSet.size() + (pvSet.contains(config_.right_) ? 0 : 1);
}
};

// don't subtract anything because of a common join column. In the case
// of the spatial join, the join column is different for both sides (e.g.
// objects with a distance of at most 500m to each other, then each object
// might contain different positions, which should be kept).
// For the right join table we only use the selected columns.
auto widthChildren = childLeft_->getResultWidth() +
std::visit(sizeRight, config_.payloadVariables_);
size_t sizeRight;
if (config_.payloadVariables_.isAll()) {
sizeRight = childRight_->getResultWidth();
} else {
// We convert to a set here, because we allow multiple occurrences of
// variables in payloadVariables_
std::vector<Variable> pv = config_.payloadVariables_.getVariables();
absl::flat_hash_set<Variable> pvSet{pv.begin(), pv.end()};

// The payloadVariables_ may contain the right join variable
return pvSet.size() + (pvSet.contains(config_.right_) ? 0 : 1);
}
auto widthChildren = childLeft_->getResultWidth() + sizeRight;

if (config_.distanceVariable_.has_value()) {
return widthChildren + 1;
Expand Down Expand Up @@ -324,34 +318,29 @@ VariableToColumnMap SpatialJoin::getVarColMapPayloadVars() const {
"Child right must be added before payload variable to "
"column map can be computed.");

auto payloadVariablesVisitor = [this]<typename T>(const T& value) {
const auto& varColMap = childRight_->getVariableColumns();
VariableToColumnMap newVarColMap;
const auto& varColMap = childRight_->getVariableColumns();
VariableToColumnMap newVarColMap;

if constexpr (std::is_same_v<T, PayloadAllVariables>) {
newVarColMap = VariableToColumnMap{varColMap};
} else {
static_assert(std::is_same_v<T, std::vector<Variable>>);
for (const auto& var : value) {
if (varColMap.contains(var)) {
newVarColMap.insert_or_assign(var, varColMap.at(var));
} else {
addWarningOrThrow(
absl::StrCat("Variable '", var.name(),
"' selected as payload to spatial join but not "
"present in right child."));
}
if (config_.payloadVariables_.isAll()) {
newVarColMap = VariableToColumnMap{varColMap};
} else {
for (const auto& var : config_.payloadVariables_.getVariables()) {
if (varColMap.contains(var)) {
newVarColMap.insert_or_assign(var, varColMap.at(var));
} else {
addWarningOrThrow(
absl::StrCat("Variable '", var.name(),
"' selected as payload to spatial join but not "
"present in right child."));
}
AD_CONTRACT_CHECK(
varColMap.contains(config_.right_),
"Right variable is not defined in right child of spatial join.");
newVarColMap.insert_or_assign(config_.right_,
varColMap.at(config_.right_));
}
return newVarColMap;
};
AD_CONTRACT_CHECK(
varColMap.contains(config_.right_),
"Right variable is not defined in right child of spatial join.");
newVarColMap.insert_or_assign(config_.right_, varColMap.at(config_.right_));
}

return std::visit(payloadVariablesVisitor, config_.payloadVariables_);
return newVarColMap;
}

// ____________________________________________________________________________
Expand Down
15 changes: 2 additions & 13 deletions src/engine/SpatialJoin.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "engine/Operation.h"
#include "global/Id.h"
#include "parser/PayloadVariables.h"
#include "parser/data/Variable.h"

// A nearest neighbor search with optionally a maximum distance.
Expand All @@ -27,18 +28,6 @@ struct MaxDistanceConfig {
// Configuration to restrict the results provided by the SpatialJoin
using SpatialJoinTask = std::variant<NearestNeighborsConfig, MaxDistanceConfig>;

// Represents the selection of all variables of the right join table as payload
struct PayloadAllVariables : std::monostate {
bool operator==([[maybe_unused]] const std::vector<Variable>& other) const {
return false;
};
};

// Configuration to select which columns of the right join table should be
// part of the result.
using PayloadVariables =
std::variant<PayloadAllVariables, std::vector<Variable>>;

// Selection of a SpatialJoin algorithm
enum class SpatialJoinAlgorithm { BASELINE, S2_GEOMETRY, BOUNDING_BOX };
const SpatialJoinAlgorithm SPATIAL_JOIN_DEFAULT_ALGORITHM =
Expand All @@ -60,7 +49,7 @@ struct SpatialJoinConfiguration {
// If given a vector of variables, the selected variables will be part of the
// result table - the join column will automatically be part of the result.
// You may use PayloadAllVariables to select all columns of the right table.
PayloadVariables payloadVariables_ = PayloadAllVariables{};
PayloadVariables payloadVariables_ = PayloadVariables::all();

// Choice of algorithm. Both algorithms have equal results, but different
// runtime characteristics.
Expand Down
1 change: 1 addition & 0 deletions src/parser/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ add_library(parser
MagicServiceQuery.cpp
PathQuery.cpp
SpatialQuery.cpp
PayloadVariables.cpp
PropertyPath.cpp
data/SparqlFilter.cpp
SelectClause.cpp
Expand Down
72 changes: 72 additions & 0 deletions src/parser/PayloadVariables.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Christoph Ullinger <[email protected]>

#include "parser/PayloadVariables.h"

// ____________________________________________________________________________
PayloadVariables::PayloadVariables(std::vector<Variable> variables)
: variables_{std::move(variables)} {};

// ____________________________________________________________________________
PayloadVariables PayloadVariables::all() {
PayloadVariables pv{};
pv.setToAll();
return pv;
};

// ____________________________________________________________________________
void PayloadVariables::addVariable(const Variable& variable) {
// Helper visitor to check if the payload variables has not been set to all
// and a variable can be added. If yes, add it.
auto addVarVisitor = [&]<typename T>(T& value) {
if constexpr (std::is_same_v<T, std::vector<Variable>>) {
value.push_back(variable);
}
};

std::visit(addVarVisitor, variables_);
};

// ____________________________________________________________________________
void PayloadVariables::setToAll() {
variables_ = detail::PayloadAllVariables{};
};

// ____________________________________________________________________________
bool PayloadVariables::empty() const {
// Helper visitor to check if the payload variables are empty
auto emptyVisitor = []<typename T>(const T& value) -> bool {
if constexpr (std::is_same_v<T, detail::PayloadAllVariables>) {
return false;
} else {
static_assert(std::is_same_v<T, std::vector<Variable>>);
return value.empty();
}
};

return std::visit(emptyVisitor, variables_);
};

// ____________________________________________________________________________
bool PayloadVariables::isAll() const {
return std::holds_alternative<detail::PayloadAllVariables>(variables_);
};

// ____________________________________________________________________________
const std::vector<Variable>& PayloadVariables::getVariables() const {
// Helper visitor to check if the payload variables has been set to all: then
// throw, otherwise return the vector
auto getVarVisitor =
[]<typename T>(const T& value) -> const std::vector<Variable>& {
if constexpr (std::is_same_v<T, std::vector<Variable>>) {
return value;
} else {
AD_THROW(
"getVariables may only be called on a non-all PayloadVariables "
"object.");
}
};

return std::visit(getVarVisitor, variables_);
};
57 changes: 57 additions & 0 deletions src/parser/PayloadVariables.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright 2024, University of Freiburg,
// Chair of Algorithms and Data Structures.
// Author: Christoph Ullinger <[email protected]>

#pragma once

#include <vector>

#include "parser/data/Variable.h"

namespace detail {
// Represents the selection of all variables as payload
struct PayloadAllVariables : std::monostate {
bool operator==([[maybe_unused]] const std::vector<Variable>& other) const {
return false;
};
};
} // namespace detail

// This class represents a list of variables to be included in the result of
// an operation. This is currently used in the spatial search.
class PayloadVariables {
public:
// Construct an empty payload variables object
PayloadVariables() = default;

// Construct a payload variables object from a vector of variables
PayloadVariables(std::vector<Variable> variables);

// Construct a payload variables object that is set to all
static PayloadVariables all();

// Add a variable to the payload variables or do nothing if all variables are
// already selected
void addVariable(const Variable& variable);

// Select all variables.
void setToAll();

// Returns whether the payload variables object is empty.
bool empty() const;

// Returns whether all variables have been selected.
bool isAll() const;

// Returns a vector of variables if all has not been set. Otherwise throws.
const std::vector<Variable>& getVariables() const;

// For testing: equality operator
bool operator==(const PayloadVariables& other) const {
return variables_ == other.variables_;
};

private:
std::variant<detail::PayloadAllVariables, std::vector<Variable>> variables_ =
std::vector<Variable>{};
};
26 changes: 23 additions & 3 deletions src/parser/SpatialQuery.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,13 @@

#include "parser/SpatialQuery.h"

#include <type_traits>
#include <variant>

#include "engine/SpatialJoin.h"
#include "parser/MagicServiceIriConstants.h"
#include "parser/PayloadVariables.h"
#include "parser/data/Variable.h"

namespace parsedQuery {

Expand Down Expand Up @@ -60,7 +65,22 @@ void SpatialQuery::addParameter(const SparqlTriple& triple) {
"<baseline>, <s2> or <boundingBox>.");
}
} else if (predString == "payload") {
payloadVariables_.push_back(getVariable("payload", object));
if (object.isVariable()) {
// Single selected variable

// If we have already selected all payload variables, we can ignore
// another explicitly selected variable.
payloadVariables_.addVariable(getVariable("payload", object));
} else if (object.isIri() &&
extractParameterName(object, SPATIAL_SEARCH_IRI) == "all") {
// All variables selected
payloadVariables_.setToAll();
} else {
throw SpatialSearchException(
"The argument to the <payload> parameter must be either a variable "
"to be selected or <all>.");
}

} else {
throw SpatialSearchException(absl::StrCat(
"Unsupported argument ", predString,
Expand Down Expand Up @@ -95,7 +115,7 @@ SpatialJoinConfiguration SpatialQuery::toSpatialJoinConfiguration() const {
"spatialSearch: { [Config Triples] { <Something> <ThatSelects> ?right "
"} }.");
} else if (!ignoreMissingRightChild_ && !childGraphPattern_.has_value() &&
!payloadVariables_.empty()) {
!payloadVariables_.isAll() && !payloadVariables_.empty()) {
throw SpatialSearchException(
"The right variable for the spatial search is declared outside the "
"SERVICE, but the <payload> parameter was set. Please move the "
Expand All @@ -112,7 +132,7 @@ SpatialJoinConfiguration SpatialQuery::toSpatialJoinConfiguration() const {
// Payload variables
PayloadVariables pv;
if (!childGraphPattern_.has_value()) {
pv = PayloadAllVariables{};
pv = PayloadVariables::all();
} else {
pv = payloadVariables_;
}
Expand Down
3 changes: 2 additions & 1 deletion src/parser/SpatialQuery.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "parser/GraphPattern.h"
#include "parser/MagicServiceQuery.h"
#include "parser/PayloadVariables.h"

namespace parsedQuery {

Expand Down Expand Up @@ -39,7 +40,7 @@ struct SpatialQuery : MagicServiceQuery {
// this vector is required to be empty - the user may not specify the payload
// configuration parameter. It will then be automatically set to
// `PayloadAllVariables` to ensure appropriate semantics.
std::vector<Variable> payloadVariables_;
PayloadVariables payloadVariables_;

// Optional further argument: the join algorithm. If it is not given, the
// default algorithm is used implicitly.
Expand Down
Loading

0 comments on commit 5b27eeb

Please sign in to comment.