Skip to content

Commit

Permalink
add BoundingBox Algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
Jonathan24680 committed Oct 18, 2024
1 parent 2b4e6b3 commit e9ed3e4
Show file tree
Hide file tree
Showing 5 changed files with 607 additions and 9 deletions.
17 changes: 14 additions & 3 deletions src/engine/SpatialJoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ size_t SpatialJoin::getCostEstimate() {
if (childLeft_ && childRight_) {
size_t inputEstimate =
childLeft_->getSizeEstimate() * childRight_->getSizeEstimate();
if (useBaselineAlgorithm_) {
if (algorithm_ == Algorithm::Baseline) {
return inputEstimate * inputEstimate;
} else {
// Let n be the size of the left table and m the size of the right table.
Expand Down Expand Up @@ -337,10 +337,21 @@ PreparedSpatialJoinParams SpatialJoin::prepareJoin() const {
Result SpatialJoin::computeResult([[maybe_unused]] bool requestLaziness) {
SpatialJoinAlgorithms algorithms{_executionContext, prepareJoin(),
addDistToResult_, config_};
if (useBaselineAlgorithm_) {
if (algorithm_ == Algorithm::Baseline) {
return algorithms.BaselineAlgorithm();
} else {
} else if (algorithm_ == Algorithm::S2Geometry) {
return algorithms.S2geometryAlgorithm();
} else if (algorithm_ == Algorithm::BoundingBox) {
// as the BoundingBoxAlgorithms only works for max distance and not for
// nearest neighbors, S2geometry gets called as a backup, if the query is
// asking for the nearest neighbors
if (std::get_if<MaxDistanceConfig>(&config_)) {
return algorithms.BoundingBoxAlgorithm();
} else {
return algorithms.S2geometryAlgorithm();
}
} else {
AD_THROW("choose a valid Algorithm");

Check warning on line 354 in src/engine/SpatialJoin.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoin.cpp#L354

Added line #L354 was not covered by tests
}
}

Expand Down
14 changes: 10 additions & 4 deletions src/engine/SpatialJoin.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,14 +96,20 @@ class SpatialJoin : public Operation {
// purposes
std::optional<size_t> getMaxResults() const;

void selectAlgorithm(bool useBaselineAlgorithm) {
useBaselineAlgorithm_ = useBaselineAlgorithm;
}
// options which can be used for the algorithm, which calculates the result
enum Algorithm { Baseline, S2Geometry, BoundingBox };

void selectAlgorithm(Algorithm algorithm) { algorithm_ = algorithm; }

std::pair<size_t, size_t> onlyForTestingGetConfig() const {
return std::pair{getMaxDist().value_or(-1), getMaxResults().value_or(-1)};
}

std::variant<NearestNeighborsConfig, MaxDistanceConfig>
onlyForTestingGetRealConfig() const {
return config_;
}

std::shared_ptr<QueryExecutionTree> onlyForTestingGetLeftChild() const {
return childLeft_;
}
Expand Down Expand Up @@ -135,5 +141,5 @@ class SpatialJoin : public Operation {
// between the two objects
bool addDistToResult_ = true;
const string nameDistanceInternal_ = "?distOfTheTwoObjectsAddedInternally";
bool useBaselineAlgorithm_ = false;
Algorithm algorithm_ = BoundingBox;
};
264 changes: 264 additions & 0 deletions src/engine/SpatialJoinAlgorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,3 +223,267 @@ Result SpatialJoinAlgorithms::S2geometryAlgorithm() {
return Result(std::move(result), std::vector<ColumnIndex>{},
Result::getMergedLocalVocab(*resultLeft, *resultRight));
}

// ____________________________________________________________________________
std::vector<box> SpatialJoinAlgorithms::computeBoundingBox(
const point& startPoint) {
const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol,
rightJoinCol, numColumns, maxDist, maxResults] = params_;
if (!maxDist.has_value()) {
AD_THROW("Max distance must have a value for this operation");
}

Check warning on line 234 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L233-L234

Added lines #L233 - L234 were not covered by tests
// haversine function
auto haversine = [](double theta) { return (1 - std::cos(theta)) / 2; };

// inverse haversine function
auto archaversine = [](double theta) { return std::acos(1 - 2 * theta); };

// safety buffer for numerical inaccuracies
double maxDistInMetersBuffer = static_cast<double>(maxDist.value());
if (maxDist.value() < 10) {
maxDistInMetersBuffer = 10;
} else if (maxDist.value() < std::numeric_limits<long long>::max() / 1.02) {
maxDistInMetersBuffer = 1.01 * maxDist.value();
} else {
maxDistInMetersBuffer = std::numeric_limits<long long>::max();
}

Check warning on line 249 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L248-L249

Added lines #L248 - L249 were not covered by tests

// for large distances, where the lower calculation would just result in
// a single bounding box for the whole planet, do an optimized version
if (maxDist.value() > circumferenceMax_ / 4.0 &&
maxDist.value() < circumferenceMax_ / 2.01) {
return computeAntiBoundingBox(startPoint);
}

// compute latitude bound
double upperLatBound =
startPoint.get<1>() + maxDistInMetersBuffer * (360 / circumferenceMax_);
double lowerLatBound =
startPoint.get<1>() - maxDistInMetersBuffer * (360 / circumferenceMax_);
bool poleReached = false;
// test for "overflows"
if (lowerLatBound <= -90) {
lowerLatBound = -90;
poleReached = true; // south pole reached
}
if (upperLatBound >= 90) {
upperLatBound = 90;
poleReached = true; // north pole reached
}
if (poleReached) {
return {box(point(-180.0f, lowerLatBound), point(180.0f, upperLatBound))};
}

// compute longitude bound. For an explanation of the calculation and the
// naming convention see my master thesis
double alpha = maxDistInMetersBuffer / radius_;
double gamma =
(90 - std::abs(startPoint.get<1>())) * (2 * std::numbers::pi / 360);
double beta = std::acos((std::cos(gamma) / std::cos(alpha)));
double delta = 0;
if (maxDistInMetersBuffer > circumferenceMax_ / 20) {
// use law of cosines
delta = std::acos((std::cos(alpha) - std::cos(gamma) * std::cos(beta)) /
(std::sin(gamma) * std::sin(beta)));
} else {
// use law of haversines for numerical stability
delta = archaversine((haversine(alpha - haversine(gamma - beta))) /
(std::sin(gamma) * std::sin(beta)));
}
double lonRange = delta * 360 / (2 * std::numbers::pi);
double leftLonBound = startPoint.get<0>() - lonRange;
double rightLonBound = startPoint.get<0>() + lonRange;
// test for "overflows" and create two bounding boxes if necessary
if (leftLonBound < -180) {
box box1 =
box(point(-180, lowerLatBound), point(rightLonBound, upperLatBound));
box box2 = box(point(leftLonBound + 360, lowerLatBound),
point(180, upperLatBound));
return {box1, box2};
} else if (rightLonBound > 180) {
box box1 =
box(point(leftLonBound, lowerLatBound), point(180, upperLatBound));
box box2 = box(point(-180, lowerLatBound),
point(rightLonBound - 360, upperLatBound));
return {box1, box2};
}
// default case, when no bound has an "overflow"
return {box(point(leftLonBound, lowerLatBound),
point(rightLonBound, upperLatBound))};
}

// ____________________________________________________________________________
std::vector<box> SpatialJoinAlgorithms::computeAntiBoundingBox(
const point& startPoint) {
const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol,
rightJoinCol, numColumns, maxDist, maxResults] = params_;
if (!maxDist.has_value()) {
AD_THROW("Max distance must have a value for this operation");
}

Check warning on line 322 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L321-L322

Added lines #L321 - L322 were not covered by tests
// point on the opposite side of the globe
point antiPoint(startPoint.get<0>() + 180, startPoint.get<1>() * -1);
if (antiPoint.get<0>() > 180) {
antiPoint.set<0>(antiPoint.get<0>() - 360);
}
// for an explanation of the formula see the master thesis. Divide by two two
// only consider the distance from the point to the antiPoint, subtract
// maxDist and a safety margine from that
double antiDist =
(circumferenceMin_ / 2.0) - maxDist.value() * 1.01; // safety margin
// use the bigger circumference as an additional safety margin, use 2.01
// instead of 2.0 because of rounding inaccuracies in floating point
// operations
double distToAntiPoint = (360 / circumferenceMax_) * (antiDist / 2.01);
double upperBound = antiPoint.get<1>() + distToAntiPoint;
double lowerBound = antiPoint.get<1>() - distToAntiPoint;
double leftBound = antiPoint.get<0>() - distToAntiPoint;
double rightBound = antiPoint.get<0>() + distToAntiPoint;
bool northPoleTouched = false;
bool southPoleTouched = false;
bool boxCrosses180Longitude = false; // if the 180 to -180 line is touched
// if a pole is crossed, ignore the part after the crossing
if (upperBound > 90) {
upperBound = 90;
northPoleTouched = true;
}
if (lowerBound < -90) {
lowerBound = -90;
southPoleTouched = true;
}
if (leftBound < -180) {
leftBound += 360;
}
if (rightBound > 180) {
rightBound -= 360;
}
if (rightBound < leftBound) {
boxCrosses180Longitude = true;
}
// compute bounding boxes using the anti bounding box from above
std::vector<box> boxes;
if (!northPoleTouched) {
// add upper bounding box(es)
if (boxCrosses180Longitude) {
boxes.push_back(box(point(leftBound, upperBound), point(180, 90)));
boxes.push_back(box(point(-180, upperBound), point(rightBound, 90)));
} else {
boxes.push_back(box(point(leftBound, upperBound), point(rightBound, 90)));
}
}
if (!southPoleTouched) {
// add lower bounding box(es)
if (boxCrosses180Longitude) {
boxes.push_back(box(point(leftBound, -90), point(180, lowerBound)));
boxes.push_back(box(point(-180, -90), point(rightBound, lowerBound)));
} else {
boxes.push_back(
box(point(leftBound, -90), point(rightBound, lowerBound)));
}
}
// add the box(es) inbetween the longitude lines
if (boxCrosses180Longitude) {
// only one box needed to cover the longitudes
boxes.push_back(box(point(rightBound, -90), point(leftBound, 90)));
} else {
// two boxes needed, one left and one right of the anti bounding box
boxes.push_back(box(point(-180, -90), point(leftBound, 90)));
boxes.push_back(box(point(rightBound, -90), point(180, 90)));
}
return boxes;
}

// ____________________________________________________________________________
bool SpatialJoinAlgorithms::containedInBoundingBoxes(
const std::vector<box>& bbox, point point1) {
// correct lon bounds if necessary
while (point1.get<0>() < -180) {
point1.set<0>(point1.get<0>() + 360);
}
while (point1.get<0>() > 180) {
point1.set<0>(point1.get<0>() - 360);
}
if (point1.get<1>() < -90) {
point1.set<1>(-90);
} else if (point1.get<1>() > 90) {
point1.set<1>(90);
}

for (size_t i = 0; i < bbox.size(); i++) {
if (boost::geometry::covered_by(point1, bbox.at(i))) {
return true;
}
}
return false;
}

// ____________________________________________________________________________
Result SpatialJoinAlgorithms::BoundingBoxAlgorithm() {
const auto [idTableLeft, resultLeft, idTableRight, resultRight, leftJoinCol,
rightJoinCol, numColumns, maxDist, maxResults] = params_;
IdTable result{numColumns, qec_->getAllocator()};

// create r-tree for smaller result table
auto smallerResult = idTableLeft;
auto otherResult = idTableRight;
bool leftResSmaller = true;
/*auto smallerChild = childLeft_;
auto otherChild = childRight_;
auto smallerVariable = leftChildVariable_;
auto otherVariable = rightChildVariable_;*/
auto smallerResJoinCol = leftJoinCol;
auto otherResJoinCol = rightJoinCol;
if (idTableLeft->numRows() > idTableRight->numRows()) {
smallerResult = idTableRight;
otherResult = idTableLeft;
leftResSmaller = false;

Check warning on line 438 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L436-L438

Added lines #L436 - L438 were not covered by tests
/*smallerChild = childRight_;
otherChild = childLeft_;
smallerVariable = rightChildVariable_;
otherVariable = leftChildVariable_; */
smallerResJoinCol = rightJoinCol;
otherResJoinCol = leftJoinCol;
}

Check warning on line 445 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L443-L445

Added lines #L443 - L445 were not covered by tests

// Todo in the benchmark: use different algorithms and compare their
// performance
bgi::rtree<value, bgi::quadratic<16>> rtree;
for (size_t i = 0; i < smallerResult->numRows(); i++) {
// get point of row i
// ColumnIndex smallerJoinCol = getJoinCol(smallerChild, smallerVariable);
auto geopoint = getPoint(smallerResult, i, smallerResJoinCol);
point p(geopoint.value().getLng(), geopoint.value().getLat());

// add every point together with the row number into the rtree
rtree.insert(std::make_pair(p, i));
}
for (size_t i = 0; i < otherResult->numRows(); i++) {
// ColumnIndex otherJoinCol = getJoinCol(otherChild, otherVariable);
auto geopoint1 = getPoint(otherResult, i, otherResJoinCol);
point p(geopoint1.value().getLng(), geopoint1.value().getLat());

// query the other rtree for every point using the following bounding box
std::vector<box> bbox = computeBoundingBox(p);
std::vector<value> results;
for (size_t k = 0; k < bbox.size(); k++) {
rtree.query(bgi::intersects(bbox.at(k)), std::back_inserter(results));
}
for (size_t k = 0; k < results.size(); k++) {
size_t rowLeft = results.at(k).second;
size_t rowRight = i;
if (!leftResSmaller) {
rowLeft = i;
rowRight = results.at(k).second;
}

Check warning on line 476 in src/engine/SpatialJoinAlgorithms.cpp

View check run for this annotation

Codecov / codecov/patch

src/engine/SpatialJoinAlgorithms.cpp#L474-L476

Added lines #L474 - L476 were not covered by tests
auto distance = computeDist(idTableLeft, idTableRight, rowLeft, rowRight,
leftJoinCol, rightJoinCol);
if (distance.getDatatype() == Datatype::Int &&
distance.getInt() <= maxDist) {
addResultTableEntry(&result, idTableLeft, idTableRight, rowLeft,
rowRight, distance);
}
}
}
Result resTable =
Result(std::move(result), std::vector<ColumnIndex>{}, LocalVocab{});
return resTable;
}
Loading

0 comments on commit e9ed3e4

Please sign in to comment.