From 23f2ed2ff9a11913a712ea5919ce69361be6dd64 Mon Sep 17 00:00:00 2001 From: Jonathan Zeller Date: Fri, 22 Nov 2024 17:43:05 +0100 Subject: [PATCH] PR feedback --- src/engine/SpatialJoin.cpp | 7 +- src/engine/SpatialJoinAlgorithms.cpp | 43 ++++--- src/engine/SpatialJoinAlgorithms.h | 12 +- test/engine/SpatialJoinAlgorithmsTest.cpp | 143 +++++++++++++++++++++- 4 files changed, 174 insertions(+), 31 deletions(-) diff --git a/src/engine/SpatialJoin.cpp b/src/engine/SpatialJoin.cpp index 4bf1eb372d..1ad7d8179a 100644 --- a/src/engine/SpatialJoin.cpp +++ b/src/engine/SpatialJoin.cpp @@ -336,12 +336,13 @@ PreparedSpatialJoinParams SpatialJoin::prepareJoin() const { // ____________________________________________________________________________ Result SpatialJoin::computeResult([[maybe_unused]] bool requestLaziness) { SpatialJoinAlgorithms algorithms{_executionContext, prepareJoin(), - addDistToResult_, config_}; + addDistToResult_, config_, this}; if (algorithm_ == Algorithm::Baseline) { return algorithms.BaselineAlgorithm(); } else if (algorithm_ == Algorithm::S2Geometry) { return algorithms.S2geometryAlgorithm(); - } else if (algorithm_ == Algorithm::BoundingBox) { + } else { + AD_CORRECTNESS_CHECK(algorithm_ == Algorithm::BoundingBox); // as the BoundingBoxAlgorithms only works for max distance and not for // nearest neighbors, S2geometry gets called as a backup, if the query is // asking for the nearest neighbors @@ -350,8 +351,6 @@ Result SpatialJoin::computeResult([[maybe_unused]] bool requestLaziness) { } else { return algorithms.S2geometryAlgorithm(); } - } else { - AD_THROW("choose a valid Algorithm"); } } diff --git a/src/engine/SpatialJoinAlgorithms.cpp b/src/engine/SpatialJoinAlgorithms.cpp index 8f455156f3..f036be5e07 100644 --- a/src/engine/SpatialJoinAlgorithms.cpp +++ b/src/engine/SpatialJoinAlgorithms.cpp @@ -16,11 +16,13 @@ using namespace BoostGeometryNamespace; SpatialJoinAlgorithms::SpatialJoinAlgorithms( QueryExecutionContext* qec, PreparedSpatialJoinParams params, bool addDistToResult, - std::variant config) + std::variant config, + std::optional spatialJoin) : qec_{qec}, params_{std::move(params)}, addDistToResult_{addDistToResult}, - config_{std::move(config)} {} + config_{std::move(config)}, + spatialJoin_{spatialJoin} {} // ____________________________________________________________________________ std::optional SpatialJoinAlgorithms::getPoint(const IdTable* restable, @@ -230,7 +232,7 @@ Result SpatialJoinAlgorithms::S2geometryAlgorithm() { // ____________________________________________________________________________ std::vector SpatialJoinAlgorithms::computeBoundingBox( - const Point& startPoint) { + const Point& startPoint) const { const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol, rightJoinCol, numColumns, maxDist, maxResults] = params_; AD_CORRECTNESS_CHECK(maxDist.has_value(), @@ -391,7 +393,7 @@ std::vector SpatialJoinAlgorithms::computeBoundingBoxForLargeDistances( // ____________________________________________________________________________ bool SpatialJoinAlgorithms::isContainedInBoundingBoxes( - const std::vector& boundingBox, Point point) { + const std::vector& boundingBox, Point point) const { convertToNormalCoordinates(point); return std::ranges::any_of(boundingBox, [point](const Box& aBox) { @@ -431,16 +433,21 @@ std::array SpatialJoinAlgorithms::isAPoleTouched( // ____________________________________________________________________________ Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() { - bool alreadyWarned = false; - - auto printWarning = [&]() { - if (!alreadyWarned) { - AD_LOG_WARN << "expected a point here, but no point is given. Skipping " - "this point and future 'non points' of this query" - << std::endl; - alreadyWarned = true; - } - }; + auto printWarning = + [alreadyWarned = false](std::optional spatialJoin) mutable { + if (!alreadyWarned) { + std::string warning = + "The input to a spatial join contained at least one element, " + "that is not a point geometry and is thus skipped. Note that " + "QLever currently only accepts point geometries for the " + "spatial joins"; + AD_LOG_WARN << warning << std::endl; + alreadyWarned = true; + if (spatialJoin) { + spatialJoin.value()->addWarning(warning); + } + } + }; const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol, rightJoinCol, numColumns, maxDist, maxResults] = params_; @@ -467,7 +474,7 @@ Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() { auto geopoint = getPoint(smallerResult, i, smallerResJoinCol); if (!geopoint) { - printWarning(); + printWarning(spatialJoin_); continue; } @@ -482,7 +489,7 @@ Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() { auto geopoint1 = getPoint(otherResult, i, otherResJoinCol); if (!geopoint1) { - printWarning(); + printWarning(spatialJoin_); continue; } @@ -504,8 +511,8 @@ Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() { } auto distance = computeDist(idTableLeft, idTableRight, rowLeft, rowRight, leftJoinCol, rightJoinCol); - if (distance.getDatatype() == Datatype::Int && - distance.getInt() <= maxDist) { + if (distance.getInt() <= maxDist) { + AD_CORRECTNESS_CHECK(distance.getDatatype() == Datatype::Int); addResultTableEntry(&result, idTableLeft, idTableRight, rowLeft, rowRight, distance); } diff --git a/src/engine/SpatialJoinAlgorithms.h b/src/engine/SpatialJoinAlgorithms.h index 886b1bc583..0719b769b4 100644 --- a/src/engine/SpatialJoinAlgorithms.h +++ b/src/engine/SpatialJoinAlgorithms.h @@ -23,20 +23,21 @@ class SpatialJoinAlgorithms { SpatialJoinAlgorithms( QueryExecutionContext* qec, PreparedSpatialJoinParams params, bool addDistToResult, - std::variant config); + std::variant config, + std::optional spatialJoin = std::nullopt); Result BaselineAlgorithm(); Result S2geometryAlgorithm(); Result BoundingBoxAlgorithm(); std::vector OnlyForTestingWrapperComputeBoundingBox( - const BoostGeometryNamespace::Point& startPoint) { + const BoostGeometryNamespace::Point& startPoint) const { return computeBoundingBox(startPoint); } bool OnlyForTestingWrapperContainedInBoundingBoxes( const std::vector& boundingBox, - const BoostGeometryNamespace::Point& point) { + const BoostGeometryNamespace::Point& point) const { return isContainedInBoundingBoxes(boundingBox, point); } @@ -70,7 +71,7 @@ class SpatialJoinAlgorithms { // the right, which when seen on the sphere look like a single box, but on the // map and in the internal representation it looks like two/more boxes) std::vector computeBoundingBox( - const BoostGeometryNamespace::Point& startPoint); + const BoostGeometryNamespace::Point& startPoint) const; // This helper function calculates the bounding boxes based on a box, where // definitely no match can occur. This means every element in the anti @@ -87,12 +88,13 @@ class SpatialJoinAlgorithms { // bounding boxes bool isContainedInBoundingBoxes( const std::vector& boundingBox, - BoostGeometryNamespace::Point point); + BoostGeometryNamespace::Point point) const; QueryExecutionContext* qec_; PreparedSpatialJoinParams params_; bool addDistToResult_; std::variant config_; + std::optional spatialJoin_; // circumference in meters at the equator (max) and the pole (min) (as the // earth is not exactly a sphere the circumference is different. Note that diff --git a/test/engine/SpatialJoinAlgorithmsTest.cpp b/test/engine/SpatialJoinAlgorithmsTest.cpp index 02c88b70c0..d8fdf2d219 100644 --- a/test/engine/SpatialJoinAlgorithmsTest.cpp +++ b/test/engine/SpatialJoinAlgorithmsTest.cpp @@ -33,7 +33,8 @@ class SpatialJoinParamTest std::shared_ptr leftChild, std::shared_ptr rightChild, bool addLeftChildFirst, std::vector> expectedOutputUnorderedRows, - std::vector columnNames) { + std::vector columnNames, + bool isWrongPointInputTest = false) { // this function is like transposing a matrix. An entry which has been // stored at (i, k) is now stored at (k, i). The reason this is needed is // the following: this function receives the input as a vector of vector of @@ -107,6 +108,23 @@ class SpatialJoinParamTest }*/ EXPECT_THAT(vec, ::testing::UnorderedElementsAreArray(expectedOutput)); + + if (isWrongPointInputTest && + GetParam() == SpatialJoin::Algorithm::BoundingBox) { + auto warnings = spatialJoin->collectWarnings(); + bool containsWrongPointWarning = false; + std::string warningMessage = + "The input to a spatial join contained at least one element, " + "that is not a point geometry and is thus skipped. Note that " + "QLever currently only accepts point geometries for the " + "spatial joins"; + for (const auto& warning : warnings) { + if (warning == warningMessage) { + containsWrongPointWarning = true; + } + } + ASSERT_TRUE(containsWrongPointWarning); + } } // build the test using the small dataset. Let the SpatialJoin operation be @@ -224,15 +242,14 @@ class SpatialJoinParamTest auto qec = buildTestQEC(); auto numTriples = qec->getIndex().numTriples().normal; ASSERT_EQ(numTriples, 15); - // ====================== build small input - // ================================= + // ====================== build small input ============================== TripleComponent point1{Variable{"?point1"}}; TripleComponent subject{ ad_utility::triple_component::Iri::fromIriref("")}; auto smallChild = ad_utility::makeExecutionTree( qec, Permutation::Enum::PSO, SparqlTriple{subject, std::string{""}, point1}); - // ====================== build big input ================================== + // ====================== build big input ================================ auto bigChild = buildIndexScan(qec, {"?obj2", std::string{""}, "?point2"}); @@ -249,6 +266,33 @@ class SpatialJoinParamTest columnNames); } + void testWrongPointInInput( + std::string specialPredicate, bool addLeftChildFirst, + std::vector> expectedOutput, + std::vector columnNames) { + auto kg = createSmallDatasetWithPoints(); + // make first point wrong: + auto pos = kg.find("POINT("); + kg = kg.insert(pos + 7, "wrongStuff"); + + ad_utility::MemorySize blocksizePermutations = 128_MB; + auto qec = ad_utility::testing::getQec(kg, true, true, false, + blocksizePermutations, false); + auto numTriples = qec->getIndex().numTriples().normal; + ASSERT_EQ(numTriples, 15); + // ====================== build inputs ================================ + TripleComponent point1{Variable{"?point1"}}; + TripleComponent point2{Variable{"?point2"}}; + auto leftChild = + buildIndexScan(qec, {"?obj1", std::string{""}, "?point1"}); + auto rightChild = + buildIndexScan(qec, {"?obj2", std::string{""}, "?point2"}); + + createAndTestSpatialJoin( + qec, SparqlTriple{point1, specialPredicate, point2}, leftChild, + rightChild, addLeftChildFirst, expectedOutput, columnNames, true); + } + protected: bool useBaselineAlgorithm_; }; @@ -518,6 +562,63 @@ std::vector> expectedMaxDist10000000_rows_small{ mergeToRow(sEif, sEye, expectedDistEyeEif), mergeToRow(sEif, sLib, expectedDistEifLib)}; +std::vector> expectedMaxDist1_rows_small_wrong_point{ + mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sEif, sEif, expectedDistSelf), +}; + +std::vector> + expectedMaxDist5000_rows_small_wrong_point{ + mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sEif, sEif, expectedDistSelf)}; + +std::vector> + expectedMaxDist500000_rows_small_wrong_point{ + mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sMun, sEif, expectedDistMunEif), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sEye, sEif, expectedDistEyeEif), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sEif, sEif, expectedDistSelf), + mergeToRow(sEif, sMun, expectedDistMunEif), + mergeToRow(sEif, sEye, expectedDistEyeEif)}; + +std::vector> + expectedMaxDist1000000_rows_small_wrong_point{ + mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sMun, sEif, expectedDistMunEif), + mergeToRow(sMun, sEye, expectedDistMunEye), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sEye, sEif, expectedDistEyeEif), + mergeToRow(sEye, sMun, expectedDistMunEye), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sEif, sEif, expectedDistSelf), + mergeToRow(sEif, sMun, expectedDistMunEif), + mergeToRow(sEif, sEye, expectedDistEyeEif)}; + +std::vector> + expectedMaxDist10000000_rows_small_wrong_point{ + mergeToRow(sMun, sMun, expectedDistSelf), + mergeToRow(sMun, sEif, expectedDistMunEif), + mergeToRow(sMun, sEye, expectedDistMunEye), + mergeToRow(sMun, sLib, expectedDistMunLib), + mergeToRow(sEye, sEye, expectedDistSelf), + mergeToRow(sEye, sEif, expectedDistEyeEif), + mergeToRow(sEye, sMun, expectedDistMunEye), + mergeToRow(sEye, sLib, expectedDistEyeLib), + mergeToRow(sLib, sLib, expectedDistSelf), + mergeToRow(sLib, sMun, expectedDistMunLib), + mergeToRow(sLib, sEye, expectedDistEyeLib), + mergeToRow(sLib, sEif, expectedDistEifLib), + mergeToRow(sEif, sEif, expectedDistSelf), + mergeToRow(sEif, sMun, expectedDistMunEif), + mergeToRow(sEif, sEye, expectedDistEyeEif), + mergeToRow(sEif, sLib, expectedDistEifLib)}; + std::vector> expectedMaxDist1_rows_diff{ mergeToRow(TF, sTF, expectedDistSelf), mergeToRow(Mun, sMun, expectedDistSelf), @@ -895,6 +996,40 @@ TEST_P(SpatialJoinParamTest, diffSizeIdTables) { false); } +TEST_P(SpatialJoinParamTest, wrongPointInInput) { + // expected behavior: point is skipped + std::vector columnNames{"?obj1", "?point1", "?obj2", "?point2", + "?distOfTheTwoObjectsAddedInternally"}; + testWrongPointInInput("", true, + expectedMaxDist1_rows_small_wrong_point, columnNames); + testWrongPointInInput("", false, + expectedMaxDist1_rows_small_wrong_point, columnNames); + testWrongPointInInput("", true, + expectedMaxDist5000_rows_small_wrong_point, + columnNames); + testWrongPointInInput("", false, + expectedMaxDist5000_rows_small_wrong_point, + columnNames); + testWrongPointInInput("", true, + expectedMaxDist500000_rows_small_wrong_point, + columnNames); + testWrongPointInInput("", false, + expectedMaxDist500000_rows_small_wrong_point, + columnNames); + testWrongPointInInput("", true, + expectedMaxDist1000000_rows_small_wrong_point, + columnNames); + testWrongPointInInput("", false, + expectedMaxDist1000000_rows_small_wrong_point, + columnNames); + testWrongPointInInput("", true, + expectedMaxDist10000000_rows_small_wrong_point, + columnNames); + testWrongPointInInput("", false, + expectedMaxDist10000000_rows_small_wrong_point, + columnNames); +} + INSTANTIATE_TEST_SUITE_P( SpatialJoin, SpatialJoinParamTest, ::testing::Values(SpatialJoin::Algorithm::Baseline,