Skip to content

Commit

Permalink
PR feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
Jonathan24680 committed Nov 22, 2024
1 parent 8743986 commit 23f2ed2
Show file tree
Hide file tree
Showing 4 changed files with 174 additions and 31 deletions.
7 changes: 3 additions & 4 deletions src/engine/SpatialJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,12 +336,13 @@ PreparedSpatialJoinParams SpatialJoin::prepareJoin() const {
// ____________________________________________________________________________
Result SpatialJoin::computeResult([[maybe_unused]] bool requestLaziness) {
SpatialJoinAlgorithms algorithms{_executionContext, prepareJoin(),
addDistToResult_, config_};
addDistToResult_, config_, this};
if (algorithm_ == Algorithm::Baseline) {
return algorithms.BaselineAlgorithm();
} else if (algorithm_ == Algorithm::S2Geometry) {
return algorithms.S2geometryAlgorithm();
} else if (algorithm_ == Algorithm::BoundingBox) {
} else {
AD_CORRECTNESS_CHECK(algorithm_ == Algorithm::BoundingBox);
// as the BoundingBoxAlgorithms only works for max distance and not for
// nearest neighbors, S2geometry gets called as a backup, if the query is
// asking for the nearest neighbors
Expand All @@ -350,8 +351,6 @@ Result SpatialJoin::computeResult([[maybe_unused]] bool requestLaziness) {
} else {
return algorithms.S2geometryAlgorithm();
}
} else {
AD_THROW("choose a valid Algorithm");
}
}

Expand Down
43 changes: 25 additions & 18 deletions src/engine/SpatialJoinAlgorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ using namespace BoostGeometryNamespace;
SpatialJoinAlgorithms::SpatialJoinAlgorithms(
QueryExecutionContext* qec, PreparedSpatialJoinParams params,
bool addDistToResult,
std::variant<NearestNeighborsConfig, MaxDistanceConfig> config)
std::variant<NearestNeighborsConfig, MaxDistanceConfig> config,
std::optional<SpatialJoin*> spatialJoin)
: qec_{qec},
params_{std::move(params)},
addDistToResult_{addDistToResult},
config_{std::move(config)} {}
config_{std::move(config)},
spatialJoin_{spatialJoin} {}

// ____________________________________________________________________________
std::optional<GeoPoint> SpatialJoinAlgorithms::getPoint(const IdTable* restable,
Expand Down Expand Up @@ -230,7 +232,7 @@ Result SpatialJoinAlgorithms::S2geometryAlgorithm() {

// ____________________________________________________________________________
std::vector<Box> SpatialJoinAlgorithms::computeBoundingBox(
const Point& startPoint) {
const Point& startPoint) const {
const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol,
rightJoinCol, numColumns, maxDist, maxResults] = params_;
AD_CORRECTNESS_CHECK(maxDist.has_value(),
Expand Down Expand Up @@ -391,7 +393,7 @@ std::vector<Box> SpatialJoinAlgorithms::computeBoundingBoxForLargeDistances(

// ____________________________________________________________________________
bool SpatialJoinAlgorithms::isContainedInBoundingBoxes(
const std::vector<Box>& boundingBox, Point point) {
const std::vector<Box>& boundingBox, Point point) const {
convertToNormalCoordinates(point);

return std::ranges::any_of(boundingBox, [point](const Box& aBox) {
Expand Down Expand Up @@ -431,16 +433,21 @@ std::array<bool, 2> SpatialJoinAlgorithms::isAPoleTouched(

// ____________________________________________________________________________
Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() {
bool alreadyWarned = false;

auto printWarning = [&]() {
if (!alreadyWarned) {
AD_LOG_WARN << "expected a point here, but no point is given. Skipping "
"this point and future 'non points' of this query"
<< std::endl;
alreadyWarned = true;
}
};
auto printWarning =
[alreadyWarned = false](std::optional<SpatialJoin*> spatialJoin) mutable {
if (!alreadyWarned) {
std::string warning =
"The input to a spatial join contained at least one element, "
"that is not a point geometry and is thus skipped. Note that "
"QLever currently only accepts point geometries for the "
"spatial joins";
AD_LOG_WARN << warning << std::endl;
alreadyWarned = true;
if (spatialJoin) {
spatialJoin.value()->addWarning(warning);
}
}
};

const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol,
rightJoinCol, numColumns, maxDist, maxResults] = params_;
Expand All @@ -467,7 +474,7 @@ Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() {
auto geopoint = getPoint(smallerResult, i, smallerResJoinCol);

if (!geopoint) {
printWarning();
printWarning(spatialJoin_);
continue;
}

Expand All @@ -482,7 +489,7 @@ Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() {
auto geopoint1 = getPoint(otherResult, i, otherResJoinCol);

if (!geopoint1) {
printWarning();
printWarning(spatialJoin_);
continue;
}

Expand All @@ -504,8 +511,8 @@ Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() {
}
auto distance = computeDist(idTableLeft, idTableRight, rowLeft, rowRight,
leftJoinCol, rightJoinCol);
if (distance.getDatatype() == Datatype::Int &&
distance.getInt() <= maxDist) {
if (distance.getInt() <= maxDist) {
AD_CORRECTNESS_CHECK(distance.getDatatype() == Datatype::Int);
addResultTableEntry(&result, idTableLeft, idTableRight, rowLeft,
rowRight, distance);
}
Expand Down
12 changes: 7 additions & 5 deletions src/engine/SpatialJoinAlgorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,21 @@ class SpatialJoinAlgorithms {
SpatialJoinAlgorithms(
QueryExecutionContext* qec, PreparedSpatialJoinParams params,
bool addDistToResult,
std::variant<NearestNeighborsConfig, MaxDistanceConfig> config);
std::variant<NearestNeighborsConfig, MaxDistanceConfig> config,
std::optional<SpatialJoin*> spatialJoin = std::nullopt);
Result BaselineAlgorithm();
Result S2geometryAlgorithm();
Result BoundingBoxAlgorithm();

std::vector<BoostGeometryNamespace::Box>
OnlyForTestingWrapperComputeBoundingBox(
const BoostGeometryNamespace::Point& startPoint) {
const BoostGeometryNamespace::Point& startPoint) const {
return computeBoundingBox(startPoint);
}

bool OnlyForTestingWrapperContainedInBoundingBoxes(
const std::vector<BoostGeometryNamespace::Box>& boundingBox,
const BoostGeometryNamespace::Point& point) {
const BoostGeometryNamespace::Point& point) const {
return isContainedInBoundingBoxes(boundingBox, point);
}

Expand Down Expand Up @@ -70,7 +71,7 @@ class SpatialJoinAlgorithms {
// the right, which when seen on the sphere look like a single box, but on the
// map and in the internal representation it looks like two/more boxes)
std::vector<BoostGeometryNamespace::Box> computeBoundingBox(
const BoostGeometryNamespace::Point& startPoint);
const BoostGeometryNamespace::Point& startPoint) const;

// This helper function calculates the bounding boxes based on a box, where
// definitely no match can occur. This means every element in the anti
Expand All @@ -87,12 +88,13 @@ class SpatialJoinAlgorithms {
// bounding boxes
bool isContainedInBoundingBoxes(
const std::vector<BoostGeometryNamespace::Box>& boundingBox,
BoostGeometryNamespace::Point point);
BoostGeometryNamespace::Point point) const;

QueryExecutionContext* qec_;
PreparedSpatialJoinParams params_;
bool addDistToResult_;
std::variant<NearestNeighborsConfig, MaxDistanceConfig> config_;
std::optional<SpatialJoin*> spatialJoin_;

// circumference in meters at the equator (max) and the pole (min) (as the
// earth is not exactly a sphere the circumference is different. Note that
Expand Down
143 changes: 139 additions & 4 deletions test/engine/SpatialJoinAlgorithmsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ class SpatialJoinParamTest
std::shared_ptr<QueryExecutionTree> leftChild,
std::shared_ptr<QueryExecutionTree> rightChild, bool addLeftChildFirst,
std::vector<std::vector<std::string>> expectedOutputUnorderedRows,
std::vector<std::string> columnNames) {
std::vector<std::string> columnNames,
bool isWrongPointInputTest = false) {
// this function is like transposing a matrix. An entry which has been
// stored at (i, k) is now stored at (k, i). The reason this is needed is
// the following: this function receives the input as a vector of vector of
Expand Down Expand Up @@ -107,6 +108,23 @@ class SpatialJoinParamTest
}*/

EXPECT_THAT(vec, ::testing::UnorderedElementsAreArray(expectedOutput));

if (isWrongPointInputTest &&
GetParam() == SpatialJoin::Algorithm::BoundingBox) {
auto warnings = spatialJoin->collectWarnings();
bool containsWrongPointWarning = false;
std::string warningMessage =
"The input to a spatial join contained at least one element, "
"that is not a point geometry and is thus skipped. Note that "
"QLever currently only accepts point geometries for the "
"spatial joins";
for (const auto& warning : warnings) {
if (warning == warningMessage) {
containsWrongPointWarning = true;
}
}
ASSERT_TRUE(containsWrongPointWarning);
}
}

// build the test using the small dataset. Let the SpatialJoin operation be
Expand Down Expand Up @@ -224,15 +242,14 @@ class SpatialJoinParamTest
auto qec = buildTestQEC();
auto numTriples = qec->getIndex().numTriples().normal;
ASSERT_EQ(numTriples, 15);
// ====================== build small input
// =================================
// ====================== build small input ==============================
TripleComponent point1{Variable{"?point1"}};
TripleComponent subject{
ad_utility::triple_component::Iri::fromIriref("<geometry1>")};
auto smallChild = ad_utility::makeExecutionTree<IndexScan>(
qec, Permutation::Enum::PSO,
SparqlTriple{subject, std::string{"<asWKT>"}, point1});
// ====================== build big input ==================================
// ====================== build big input ================================
auto bigChild =
buildIndexScan(qec, {"?obj2", std::string{"<asWKT>"}, "?point2"});

Expand All @@ -249,6 +266,33 @@ class SpatialJoinParamTest
columnNames);
}

void testWrongPointInInput(
std::string specialPredicate, bool addLeftChildFirst,
std::vector<std::vector<std::string>> expectedOutput,
std::vector<std::string> columnNames) {
auto kg = createSmallDatasetWithPoints();
// make first point wrong:
auto pos = kg.find("POINT(");
kg = kg.insert(pos + 7, "wrongStuff");

ad_utility::MemorySize blocksizePermutations = 128_MB;
auto qec = ad_utility::testing::getQec(kg, true, true, false,
blocksizePermutations, false);
auto numTriples = qec->getIndex().numTriples().normal;
ASSERT_EQ(numTriples, 15);
// ====================== build inputs ================================
TripleComponent point1{Variable{"?point1"}};
TripleComponent point2{Variable{"?point2"}};
auto leftChild =
buildIndexScan(qec, {"?obj1", std::string{"<asWKT>"}, "?point1"});
auto rightChild =
buildIndexScan(qec, {"?obj2", std::string{"<asWKT>"}, "?point2"});

createAndTestSpatialJoin(
qec, SparqlTriple{point1, specialPredicate, point2}, leftChild,
rightChild, addLeftChildFirst, expectedOutput, columnNames, true);
}

protected:
bool useBaselineAlgorithm_;
};
Expand Down Expand Up @@ -518,6 +562,63 @@ std::vector<std::vector<std::string>> expectedMaxDist10000000_rows_small{
mergeToRow(sEif, sEye, expectedDistEyeEif),
mergeToRow(sEif, sLib, expectedDistEifLib)};

std::vector<std::vector<std::string>> expectedMaxDist1_rows_small_wrong_point{
mergeToRow(sMun, sMun, expectedDistSelf),
mergeToRow(sEye, sEye, expectedDistSelf),
mergeToRow(sLib, sLib, expectedDistSelf),
mergeToRow(sEif, sEif, expectedDistSelf),
};

std::vector<std::vector<std::string>>
expectedMaxDist5000_rows_small_wrong_point{
mergeToRow(sMun, sMun, expectedDistSelf),
mergeToRow(sEye, sEye, expectedDistSelf),
mergeToRow(sLib, sLib, expectedDistSelf),
mergeToRow(sEif, sEif, expectedDistSelf)};

std::vector<std::vector<std::string>>
expectedMaxDist500000_rows_small_wrong_point{
mergeToRow(sMun, sMun, expectedDistSelf),
mergeToRow(sMun, sEif, expectedDistMunEif),
mergeToRow(sEye, sEye, expectedDistSelf),
mergeToRow(sEye, sEif, expectedDistEyeEif),
mergeToRow(sLib, sLib, expectedDistSelf),
mergeToRow(sEif, sEif, expectedDistSelf),
mergeToRow(sEif, sMun, expectedDistMunEif),
mergeToRow(sEif, sEye, expectedDistEyeEif)};

std::vector<std::vector<std::string>>
expectedMaxDist1000000_rows_small_wrong_point{
mergeToRow(sMun, sMun, expectedDistSelf),
mergeToRow(sMun, sEif, expectedDistMunEif),
mergeToRow(sMun, sEye, expectedDistMunEye),
mergeToRow(sEye, sEye, expectedDistSelf),
mergeToRow(sEye, sEif, expectedDistEyeEif),
mergeToRow(sEye, sMun, expectedDistMunEye),
mergeToRow(sLib, sLib, expectedDistSelf),
mergeToRow(sEif, sEif, expectedDistSelf),
mergeToRow(sEif, sMun, expectedDistMunEif),
mergeToRow(sEif, sEye, expectedDistEyeEif)};

std::vector<std::vector<std::string>>
expectedMaxDist10000000_rows_small_wrong_point{
mergeToRow(sMun, sMun, expectedDistSelf),
mergeToRow(sMun, sEif, expectedDistMunEif),
mergeToRow(sMun, sEye, expectedDistMunEye),
mergeToRow(sMun, sLib, expectedDistMunLib),
mergeToRow(sEye, sEye, expectedDistSelf),
mergeToRow(sEye, sEif, expectedDistEyeEif),
mergeToRow(sEye, sMun, expectedDistMunEye),
mergeToRow(sEye, sLib, expectedDistEyeLib),
mergeToRow(sLib, sLib, expectedDistSelf),
mergeToRow(sLib, sMun, expectedDistMunLib),
mergeToRow(sLib, sEye, expectedDistEyeLib),
mergeToRow(sLib, sEif, expectedDistEifLib),
mergeToRow(sEif, sEif, expectedDistSelf),
mergeToRow(sEif, sMun, expectedDistMunEif),
mergeToRow(sEif, sEye, expectedDistEyeEif),
mergeToRow(sEif, sLib, expectedDistEifLib)};

std::vector<std::vector<std::string>> expectedMaxDist1_rows_diff{
mergeToRow(TF, sTF, expectedDistSelf),
mergeToRow(Mun, sMun, expectedDistSelf),
Expand Down Expand Up @@ -895,6 +996,40 @@ TEST_P(SpatialJoinParamTest, diffSizeIdTables) {
false);
}

TEST_P(SpatialJoinParamTest, wrongPointInInput) {
// expected behavior: point is skipped
std::vector<std::string> columnNames{"?obj1", "?point1", "?obj2", "?point2",
"?distOfTheTwoObjectsAddedInternally"};
testWrongPointInInput("<max-distance-in-meters:1>", true,
expectedMaxDist1_rows_small_wrong_point, columnNames);
testWrongPointInInput("<max-distance-in-meters:1>", false,
expectedMaxDist1_rows_small_wrong_point, columnNames);
testWrongPointInInput("<max-distance-in-meters:5000>", true,
expectedMaxDist5000_rows_small_wrong_point,
columnNames);
testWrongPointInInput("<max-distance-in-meters:5000>", false,
expectedMaxDist5000_rows_small_wrong_point,
columnNames);
testWrongPointInInput("<max-distance-in-meters:500000>", true,
expectedMaxDist500000_rows_small_wrong_point,
columnNames);
testWrongPointInInput("<max-distance-in-meters:500000>", false,
expectedMaxDist500000_rows_small_wrong_point,
columnNames);
testWrongPointInInput("<max-distance-in-meters:1000000>", true,
expectedMaxDist1000000_rows_small_wrong_point,
columnNames);
testWrongPointInInput("<max-distance-in-meters:1000000>", false,
expectedMaxDist1000000_rows_small_wrong_point,
columnNames);
testWrongPointInInput("<max-distance-in-meters:10000000>", true,
expectedMaxDist10000000_rows_small_wrong_point,
columnNames);
testWrongPointInInput("<max-distance-in-meters:10000000>", false,
expectedMaxDist10000000_rows_small_wrong_point,
columnNames);
}

INSTANTIATE_TEST_SUITE_P(
SpatialJoin, SpatialJoinParamTest,
::testing::Values(SpatialJoin::Algorithm::Baseline,
Expand Down

0 comments on commit 23f2ed2

Please sign in to comment.