From 89491a1fd5aef96b0d07638269750af6be10138a Mon Sep 17 00:00:00 2001 From: Jonathan Zeller Date: Thu, 21 Nov 2024 21:52:34 +0100 Subject: [PATCH] PR feedback --- src/engine/SpatialJoinAlgorithms.cpp | 29 ++--- test/engine/SpatialJoinAlgorithmsTest.cpp | 145 ++++++++++++++-------- 2 files changed, 108 insertions(+), 66 deletions(-) diff --git a/src/engine/SpatialJoinAlgorithms.cpp b/src/engine/SpatialJoinAlgorithms.cpp index d01ac1642c..8f455156f3 100644 --- a/src/engine/SpatialJoinAlgorithms.cpp +++ b/src/engine/SpatialJoinAlgorithms.cpp @@ -7,6 +7,7 @@ #include #include + #include "util/GeoSparqlHelpers.h" using namespace BoostGeometryNamespace; @@ -232,7 +233,8 @@ std::vector SpatialJoinAlgorithms::computeBoundingBox( const Point& startPoint) { const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol, rightJoinCol, numColumns, maxDist, maxResults] = params_; - AD_CORRECTNESS_CHECK(maxDist.has_value(), "Max distance must have a value for this operation"); + AD_CORRECTNESS_CHECK(maxDist.has_value(), + "Max distance must have a value for this operation"); // haversine function auto haversine = [](double theta) { return (1 - std::cos(theta)) / 2; }; @@ -313,7 +315,8 @@ std::vector SpatialJoinAlgorithms::computeBoundingBoxForLargeDistances( const Point& startPoint) const { const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol, rightJoinCol, numColumns, maxDist, maxResults] = params_; - AD_CORRECTNESS_CHECK(maxDist.has_value(), "Max distance must have a value for this operation"); + AD_CORRECTNESS_CHECK(maxDist.has_value(), + "Max distance must have a value for this operation"); // point on the opposite side of the globe Point antiPoint(startPoint.get<0>() + 180, startPoint.get<1>() * -1); @@ -429,16 +432,16 @@ std::array SpatialJoinAlgorithms::isAPoleTouched( // ____________________________________________________________________________ Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() { bool alreadyWarned = false; - + auto printWarning = [&]() { if (!alreadyWarned) { - AD_LOG_WARN - << "expected a point here, but no point is given. Skipping this point and future 'non points' of this query" - << std::endl; + AD_LOG_WARN << "expected a point here, but no point is given. Skipping " + "this point and future 'non points' of this query" + << std::endl; alreadyWarned = true; } }; - + const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol, rightJoinCol, numColumns, maxDist, maxResults] = params_; IdTable result{numColumns, qec_->getAllocator()}; @@ -455,12 +458,10 @@ Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() { std::swap(smallerResJoinCol, otherResJoinCol); } - // bgi::rtree> rtree; - bgi::rtree, bgi::indexable, bgi::equal_to, ad_utility::AllocatorWithLimit> - rtree(bgi::quadratic<16>{}, bgi::indexable{}, bgi::equal_to{}, qec_->getAllocator()); - // bgi::rtree, ad_utility::AllocatorWithLimit> rtree{qec_->getAllocator()}; - //bgi::rtree> dummyRtree; - //bgi::rtree, ad_utility::AllocatorWithLimit> rtree(dummyRtree, qec_->getAllocator()); + bgi::rtree, bgi::indexable, + bgi::equal_to, ad_utility::AllocatorWithLimit> + rtree(bgi::quadratic<16>{}, bgi::indexable{}, + bgi::equal_to{}, qec_->getAllocator()); for (size_t i = 0; i < smallerResult->numRows(); i++) { // get point of row i auto geopoint = getPoint(smallerResult, i, smallerResJoinCol); @@ -476,7 +477,7 @@ Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() { rtree.insert(std::make_pair(std::move(p), i)); } std::vector> results{ - qec_->getAllocator()}; + qec_->getAllocator()}; for (size_t i = 0; i < otherResult->numRows(); i++) { auto geopoint1 = getPoint(otherResult, i, otherResJoinCol); diff --git a/test/engine/SpatialJoinAlgorithmsTest.cpp b/test/engine/SpatialJoinAlgorithmsTest.cpp index a203b2b849..02c88b70c0 100644 --- a/test/engine/SpatialJoinAlgorithmsTest.cpp +++ b/test/engine/SpatialJoinAlgorithmsTest.cpp @@ -217,32 +217,37 @@ class SpatialJoinParamTest columnNames); } - void testDiffSizeIdTables(std::string specialPredicate, bool addLeftChildFirst, + void testDiffSizeIdTables( + std::string specialPredicate, bool addLeftChildFirst, std::vector> expectedOutput, std::vector columnNames, bool bigChildLeft) { - auto qec = buildTestQEC(); - auto numTriples = qec->getIndex().numTriples().normal; - ASSERT_EQ(numTriples, 15); - // ====================== build small input ================================= - TripleComponent point1{Variable{"?point1"}}; - TripleComponent subject{ad_utility::triple_component::Iri::fromIriref("")}; - auto smallChild = ad_utility::makeExecutionTree( - qec, Permutation::Enum::PSO, SparqlTriple{subject, std::string{""}, point1}); - // ====================== build big input ================================== - auto bigChild = buildIndexScan(qec, {"?obj2", std::string{""}, "?point2"}); - - auto firstChild = bigChildLeft ? bigChild : smallChild; - auto secondChild = bigChildLeft ? smallChild : bigChild; - auto firstVariable = + auto qec = buildTestQEC(); + auto numTriples = qec->getIndex().numTriples().normal; + ASSERT_EQ(numTriples, 15); + // ====================== build small input + // ================================= + TripleComponent point1{Variable{"?point1"}}; + TripleComponent subject{ + ad_utility::triple_component::Iri::fromIriref("")}; + auto smallChild = ad_utility::makeExecutionTree( + qec, Permutation::Enum::PSO, + SparqlTriple{subject, std::string{""}, point1}); + // ====================== build big input ================================== + auto bigChild = + buildIndexScan(qec, {"?obj2", std::string{""}, "?point2"}); + + auto firstChild = bigChildLeft ? bigChild : smallChild; + auto secondChild = bigChildLeft ? smallChild : bigChild; + auto firstVariable = bigChildLeft ? TripleComponent{Variable{"?point2"}} : point1; auto secondVariable = bigChildLeft ? point1 : TripleComponent{Variable{"?point2"}}; - - createAndTestSpatialJoin( + + createAndTestSpatialJoin( qec, SparqlTriple{firstVariable, specialPredicate, secondVariable}, firstChild, secondChild, addLeftChildFirst, expectedOutput, columnNames); -} + } protected: bool useBaselineAlgorithm_; @@ -592,25 +597,29 @@ std::vector> expectedMaxDist10000000_rows_diff{ mergeToRow(Eif, sLib, expectedDistEifLib)}; std::vector> expectedMaxDist1_rows_diffIDTable{ - mergeToRow({sTF.at(1)}, sTF, expectedDistSelf) -}; + mergeToRow({sTF.at(1)}, sTF, expectedDistSelf)}; std::vector> expectedMaxDist5000_rows_diffIDTable{ - mergeToRow({sTF.at(1)}, sTF, expectedDistSelf), - mergeToRow({sTF.at(1)}, sMun, expectedDistUniMun) -}; + mergeToRow({sTF.at(1)}, sTF, expectedDistSelf), + mergeToRow({sTF.at(1)}, sMun, expectedDistUniMun)}; std::vector> expectedMaxDist500000_rows_diffIDTable{ - mergeToRow({sTF.at(1)}, sTF, expectedDistSelf) -}; + mergeToRow({sTF.at(1)}, sTF, expectedDistSelf), + mergeToRow({sTF.at(1)}, sMun, expectedDistUniMun), + mergeToRow({sTF.at(1)}, sEif, expectedDistUniEif)}; std::vector> expectedMaxDist1000000_rows_diffIDTable{ - mergeToRow({sTF.at(1)}, sTF, expectedDistSelf) -}; + mergeToRow({sTF.at(1)}, sTF, expectedDistSelf), + mergeToRow({sTF.at(1)}, sMun, expectedDistUniMun), + mergeToRow({sTF.at(1)}, sEif, expectedDistUniEif), + mergeToRow({sTF.at(1)}, sEye, expectedDistUniEye)}; std::vector> expectedMaxDist10000000_rows_diffIDTable{ - mergeToRow({sTF.at(1)}, sTF, expectedDistSelf) -}; + mergeToRow({sTF.at(1)}, sTF, expectedDistSelf), + mergeToRow({sTF.at(1)}, sMun, expectedDistUniMun), + mergeToRow({sTF.at(1)}, sEif, expectedDistUniEif), + mergeToRow({sTF.at(1)}, sEye, expectedDistUniEye), + mergeToRow({sTF.at(1)}, sLib, expectedDistUniLib)}; std::vector> expectedNearestNeighbors1{ mergeToRow(TF, TF, expectedDistSelf), @@ -828,30 +837,62 @@ TEST_P(SpatialJoinParamTest, computeResultSmallDatasetDifferentSizeChildren) { } TEST_P(SpatialJoinParamTest, diffSizeIdTables) { - std::vector columnNames{"?point1", - "?obj2", - "?point2", + std::vector columnNames{"?point1", "?obj2", "?point2", "?distOfTheTwoObjectsAddedInternally"}; - /*testDiffSizeIdTables("", true, expectedMaxDist1_rows_diffIDTable, columnNames, true); - testDiffSizeIdTables("", true, expectedMaxDist1_rows_diffIDTable, columnNames, false); - testDiffSizeIdTables("", false, expectedMaxDist1_rows_diffIDTable, columnNames, true); - testDiffSizeIdTables("", false, expectedMaxDist1_rows_diffIDTable, columnNames, false); - testDiffSizeIdTables("", true, expectedMaxDist5000_rows_diffIDTable, columnNames, true); - testDiffSizeIdTables("", true, expectedMaxDist5000_rows_diffIDTable, columnNames, false); - testDiffSizeIdTables("", false, expectedMaxDist5000_rows_diffIDTable, columnNames, true); - testDiffSizeIdTables("", false, expectedMaxDist5000_rows_diffIDTable, columnNames, false); - testDiffSizeIdTables("", true, expectedMaxDist500000_rows_diffIDTable, columnNames, true); - testDiffSizeIdTables("", true, expectedMaxDist500000_rows_diffIDTable, columnNames, false); - testDiffSizeIdTables("", false, expectedMaxDist500000_rows_diffIDTable, columnNames, true); - testDiffSizeIdTables("", false, expectedMaxDist500000_rows_diffIDTable, columnNames, false); - testDiffSizeIdTables("", true, expectedMaxDist1000000_rows_diffIDTable, columnNames, true); - testDiffSizeIdTables("", true, expectedMaxDist1000000_rows_diffIDTable, columnNames, false); - testDiffSizeIdTables("", false, expectedMaxDist1000000_rows_diffIDTable, columnNames, true); - testDiffSizeIdTables("", false, expectedMaxDist1000000_rows_diffIDTable, columnNames, false); - testDiffSizeIdTables("", true, expectedMaxDist10000000_rows_diffIDTable, columnNames, true); - testDiffSizeIdTables("", true, expectedMaxDist10000000_rows_diffIDTable, columnNames, false); - testDiffSizeIdTables("", false, expectedMaxDist10000000_rows_diffIDTable, columnNames, true); - testDiffSizeIdTables("", false, expectedMaxDist10000000_rows_diffIDTable, columnNames, false);*/ + testDiffSizeIdTables("", true, + expectedMaxDist1_rows_diffIDTable, columnNames, true); + testDiffSizeIdTables("", true, + expectedMaxDist1_rows_diffIDTable, columnNames, false); + testDiffSizeIdTables("", false, + expectedMaxDist1_rows_diffIDTable, columnNames, true); + testDiffSizeIdTables("", false, + expectedMaxDist1_rows_diffIDTable, columnNames, false); + testDiffSizeIdTables("", true, + expectedMaxDist5000_rows_diffIDTable, columnNames, true); + testDiffSizeIdTables("", true, + expectedMaxDist5000_rows_diffIDTable, columnNames, + false); + testDiffSizeIdTables("", false, + expectedMaxDist5000_rows_diffIDTable, columnNames, true); + testDiffSizeIdTables("", false, + expectedMaxDist5000_rows_diffIDTable, columnNames, + false); + testDiffSizeIdTables("", true, + expectedMaxDist500000_rows_diffIDTable, columnNames, + true); + testDiffSizeIdTables("", true, + expectedMaxDist500000_rows_diffIDTable, columnNames, + false); + testDiffSizeIdTables("", false, + expectedMaxDist500000_rows_diffIDTable, columnNames, + true); + testDiffSizeIdTables("", false, + expectedMaxDist500000_rows_diffIDTable, columnNames, + false); + testDiffSizeIdTables("", true, + expectedMaxDist1000000_rows_diffIDTable, columnNames, + true); + testDiffSizeIdTables("", true, + expectedMaxDist1000000_rows_diffIDTable, columnNames, + false); + testDiffSizeIdTables("", false, + expectedMaxDist1000000_rows_diffIDTable, columnNames, + true); + testDiffSizeIdTables("", false, + expectedMaxDist1000000_rows_diffIDTable, columnNames, + false); + testDiffSizeIdTables("", true, + expectedMaxDist10000000_rows_diffIDTable, columnNames, + true); + testDiffSizeIdTables("", true, + expectedMaxDist10000000_rows_diffIDTable, columnNames, + false); + testDiffSizeIdTables("", false, + expectedMaxDist10000000_rows_diffIDTable, columnNames, + true); + testDiffSizeIdTables("", false, + expectedMaxDist10000000_rows_diffIDTable, columnNames, + false); } INSTANTIATE_TEST_SUITE_P(