From 7f8f63ae0d4fb595a28b5a54b02738c5d22e6500 Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Sun, 3 Nov 2024 21:09:17 +0100 Subject: [PATCH] RFC: Provide selection methods based on internal iteration (#164) - [x] I agree to follow the project's [code of conduct](https://github.com/georust/geo/blob/master/CODE_OF_CONDUCT.md). - [x] I added an entry to `rstar/CHANGELOG.md` if knowledge of this change could be valuable to users. --- This avoids the overhead of allocating an internal buffer to keep track of upcoming nodes when implementing the `Iterator` trait. I also found a mistake in the old code from #37 (lack of early return in the parent case) and now the benchmarks also look somewhat nicer, i.e. directly comparing internal and external iteration on the same data set: ```console locate_at_point (successful) time: [115.62 ns 116.51 ns 117.43 ns] locate_at_point_int (successful) time: [66.831 ns 67.264 ns 67.653 ns] locate_at_point (unsuccessful) time: [167.70 ns 168.03 ns 168.34 ns] locate_at_point_int (unsuccessful) time: [167.90 ns 168.28 ns 168.64 ns] ``` Closes #163 --- rstar-benches/Cargo.toml | 10 +-- rstar-benches/benches/benchmarks.rs | 22 ++++- rstar/src/algorithm/iterators.rs | 101 +++++++++++++++++++++ rstar/src/rtree.rs | 130 +++++++++++++++++++++++++++- 4 files changed, 256 insertions(+), 7 deletions(-) diff --git a/rstar-benches/Cargo.toml b/rstar-benches/Cargo.toml index 431fb88..d3da8fc 100644 --- a/rstar-benches/Cargo.toml +++ b/rstar-benches/Cargo.toml @@ -4,11 +4,11 @@ version = "0.1.1" authors = ["Stefan Altmayer ", "The Georust Developers "] [dev-dependencies] -criterion = { version = "0.4.0", features = ["html_reports"] } -geo = "0.26.0" -geo-types = { version = "0.7.9", features = ["use-rstar_0_10"] } -rand = "0.7" -rand_hc = "0.2" +criterion = { version = "0.5.0", features = ["html_reports"] } +geo = "0.28.0" +geo-types = { version = "0.7.9", features = ["use-rstar_0_12"] } +rand = "0.8" +rand_hc = "0.3" rstar = { path = "../rstar" } [[bench]] diff --git a/rstar-benches/benches/benchmarks.rs b/rstar-benches/benches/benchmarks.rs index 8347410..c871da4 100644 --- a/rstar-benches/benches/benchmarks.rs +++ b/rstar-benches/benches/benchmarks.rs @@ -123,6 +123,24 @@ fn locate_unsuccessful(c: &mut Criterion) { }); } +fn locate_successful_internal(c: &mut Criterion) { + let points: Vec<_> = create_random_points(100_000, SEED_1); + let query_point = points[500]; + let tree = RTree::<_, Params>::bulk_load_with_params(points); + c.bench_function("locate_at_point_int (successful)", move |b| { + b.iter(|| tree.locate_at_point_int(&query_point).is_some()) + }); +} + +fn locate_unsuccessful_internal(c: &mut Criterion) { + let points: Vec<_> = create_random_points(100_000, SEED_1); + let tree = RTree::<_, Params>::bulk_load_with_params(points); + let query_point = [0.7, 0.7]; + c.bench_function("locate_at_point_int (unsuccessful)", move |b| { + b.iter(|| tree.locate_at_point(&query_point).is_none()) + }); +} + criterion_group!( benches, bulk_load_baseline, @@ -131,7 +149,9 @@ criterion_group!( bulk_load_complex_geom_cached, tree_creation_quality, locate_successful, - locate_unsuccessful + locate_unsuccessful, + locate_successful_internal, + locate_unsuccessful_internal, ); criterion_main!(benches); diff --git a/rstar/src/algorithm/iterators.rs b/rstar/src/algorithm/iterators.rs index 4c82552..5a241f0 100644 --- a/rstar/src/algorithm/iterators.rs +++ b/rstar/src/algorithm/iterators.rs @@ -1,6 +1,7 @@ use crate::algorithm::selection_functions::*; use crate::node::{ParentNode, RTreeNode}; use crate::object::RTreeObject; +use core::ops::ControlFlow; #[cfg(doc)] use crate::RTree; @@ -91,6 +92,56 @@ where } } +/// Internal iteration variant of [`SelectionIterator`] +pub fn select_nodes<'a, T, Func, V, B>( + root: &'a ParentNode, + func: &Func, + visitor: &mut V, +) -> ControlFlow +where + T: RTreeObject, + Func: SelectionFunction, + V: FnMut(&'a T) -> ControlFlow, +{ + struct Args<'a, Func, V> { + func: &'a Func, + visitor: &'a mut V, + } + + fn inner<'a, T, Func, V, B>( + parent: &'a ParentNode, + args: &mut Args<'_, Func, V>, + ) -> ControlFlow + where + T: RTreeObject, + Func: SelectionFunction, + V: FnMut(&'a T) -> ControlFlow, + { + for node in parent.children.iter() { + match node { + RTreeNode::Leaf(ref t) => { + if args.func.should_unpack_leaf(t) { + (args.visitor)(t)?; + } + } + RTreeNode::Parent(ref data) => { + if args.func.should_unpack_parent(&data.envelope()) { + inner(data, args)?; + } + } + } + } + + ControlFlow::Continue(()) + } + + if func.should_unpack_parent(&root.envelope()) { + inner(root, &mut Args { func, visitor })?; + } + + ControlFlow::Continue(()) +} + /// Iterator type returned by `RTree::locate_*_mut` methods. pub struct SelectionIteratorMut<'a, T, Func> where @@ -146,6 +197,56 @@ where } } +/// Internal iteration variant of [`SelectionIteratorMut`] +pub fn select_nodes_mut<'a, T, Func, V, B>( + root: &'a mut ParentNode, + func: &Func, + visitor: &mut V, +) -> ControlFlow +where + T: RTreeObject, + Func: SelectionFunction, + V: FnMut(&'a mut T) -> ControlFlow, +{ + struct Args<'a, Func, V> { + func: &'a Func, + visitor: &'a mut V, + } + + fn inner<'a, T, Func, V, B>( + parent: &'a mut ParentNode, + args: &mut Args<'_, Func, V>, + ) -> ControlFlow + where + T: RTreeObject, + Func: SelectionFunction, + V: FnMut(&'a mut T) -> ControlFlow, + { + for node in parent.children.iter_mut() { + match node { + RTreeNode::Leaf(ref mut t) => { + if args.func.should_unpack_leaf(t) { + (args.visitor)(t)?; + } + } + RTreeNode::Parent(ref mut data) => { + if args.func.should_unpack_parent(&data.envelope()) { + inner(data, args)?; + } + } + } + } + + ControlFlow::Continue(()) + } + + if func.should_unpack_parent(&root.envelope()) { + inner(root, &mut Args { func, visitor })?; + } + + ControlFlow::Continue(()) +} + #[cfg(test)] mod test { use crate::aabb::AABB; diff --git a/rstar/src/rtree.rs b/rstar/src/rtree.rs index 0d87d53..862afa9 100644 --- a/rstar/src/rtree.rs +++ b/rstar/src/rtree.rs @@ -10,6 +10,7 @@ use crate::node::ParentNode; use crate::object::{PointDistance, RTreeObject}; use crate::params::{verify_parameters, DefaultParams, InsertionStrategy, RTreeParams}; use crate::Point; +use core::ops::ControlFlow; #[cfg(not(test))] use alloc::vec::Vec; @@ -94,6 +95,18 @@ where /// A naive sequential algorithm would take `O(n)` time. However, r-trees incur higher /// build up times: inserting an element into an r-tree costs `O(log(n))` time. /// +/// Most of the selection methods, meaning those with names beginning with `locate_`, +/// return iterators which are driven externally and can therefore be combined into +/// more complex pipelines using the combinators defined on the [`Iterator`] trait. +/// +/// This flexiblity does come at the cost of temporary heap allocations required +/// to keep track of the iteration state. Alternative methods using internal iteration +/// are provided to avoid this overhead, their names ending in `_int` or `_int_mut`. +/// +/// They use a callback-based interface to pass the selected objects on to the caller +/// thereby being able to use the stack to keep track of the state required for +/// traversing the tree. +/// /// # Bulk loading /// In many scenarios, insertion is only carried out once for many points. In this case, /// [RTree::bulk_load] will be considerably faster. Its total run time @@ -331,6 +344,38 @@ where ) } + /// Variant of [`locate_in_envelope`][Self::locate_in_envelope] using internal iteration. + pub fn locate_in_envelope_int<'a, V, B>( + &'a self, + envelope: &T::Envelope, + mut visitor: V, + ) -> ControlFlow + where + V: FnMut(&'a T) -> ControlFlow, + { + select_nodes( + self.root(), + &SelectInEnvelopeFunction::new(envelope.clone()), + &mut visitor, + ) + } + + /// Mutable variant of [`locate_in_envelope_mut`][Self::locate_in_envelope_mut]. + pub fn locate_in_envelope_int_mut<'a, V, B>( + &'a mut self, + envelope: &T::Envelope, + mut visitor: V, + ) -> ControlFlow + where + V: FnMut(&'a mut T) -> ControlFlow, + { + select_nodes_mut( + self.root_mut(), + &SelectInEnvelopeFunction::new(envelope.clone()), + &mut visitor, + ) + } + /// Returns a draining iterator over all elements contained in the tree. /// /// The order in which the elements are returned is not specified. @@ -407,6 +452,38 @@ where ) } + /// Variant of [`locate_in_envelope_intersecting`][Self::locate_in_envelope_intersecting] using internal iteration. + pub fn locate_in_envelope_intersecting_int<'a, V, B>( + &'a self, + envelope: &T::Envelope, + mut visitor: V, + ) -> ControlFlow + where + V: FnMut(&'a T) -> ControlFlow, + { + select_nodes( + self.root(), + &SelectInEnvelopeFuncIntersecting::new(envelope.clone()), + &mut visitor, + ) + } + + /// Mutable variant of [`locate_in_envelope_intersecting_int`][Self::locate_in_envelope_intersecting_int]. + pub fn locate_in_envelope_intersecting_int_mut<'a, V, B>( + &'a mut self, + envelope: &T::Envelope, + mut visitor: V, + ) -> ControlFlow + where + V: FnMut(&'a mut T) -> ControlFlow, + { + select_nodes_mut( + self.root_mut(), + &SelectInEnvelopeFuncIntersecting::new(envelope.clone()), + &mut visitor, + ) + } + /// Locates elements in the r-tree defined by a selection function. /// /// Refer to the documentation of [`SelectionFunction`] for @@ -540,6 +617,25 @@ where self.locate_all_at_point_mut(point).next() } + /// Variant of [`locate_at_point`][Self::locate_at_point] using internal iteration. + pub fn locate_at_point_int(&self, point: &::Point) -> Option<&T> { + match self.locate_all_at_point_int(point, ControlFlow::Break) { + ControlFlow::Break(node) => Some(node), + ControlFlow::Continue(()) => None, + } + } + + /// Mutable variant of [`locate_at_point_int`][Self::locate_at_point_int]. + pub fn locate_at_point_int_mut( + &mut self, + point: &::Point, + ) -> Option<&mut T> { + match self.locate_all_at_point_int_mut(point, ControlFlow::Break) { + ControlFlow::Break(node) => Some(node), + ControlFlow::Continue(()) => None, + } + } + /// Locate all elements containing a given point. /// /// Method [PointDistance::contains_point] is used @@ -565,7 +661,7 @@ where LocateAllAtPoint::new(&self.root, SelectAtPointFunction::new(point.clone())) } - /// Mutable variant of [locate_all_at_point](#method.locate_all_at_point). + /// Mutable variant of [`locate_all_at_point`][Self::locate_all_at_point]. pub fn locate_all_at_point_mut( &mut self, point: &::Point, @@ -573,6 +669,38 @@ where LocateAllAtPointMut::new(&mut self.root, SelectAtPointFunction::new(point.clone())) } + /// Variant of [`locate_all_at_point`][Self::locate_all_at_point] using internal iteration. + pub fn locate_all_at_point_int<'a, V, B>( + &'a self, + point: &::Point, + mut visitor: V, + ) -> ControlFlow + where + V: FnMut(&'a T) -> ControlFlow, + { + select_nodes( + &self.root, + &SelectAtPointFunction::new(point.clone()), + &mut visitor, + ) + } + + /// Mutable variant of [`locate_all_at_point_int`][Self::locate_all_at_point_int]. + pub fn locate_all_at_point_int_mut<'a, V, B>( + &'a mut self, + point: &::Point, + mut visitor: V, + ) -> ControlFlow + where + V: FnMut(&'a mut T) -> ControlFlow, + { + select_nodes_mut( + &mut self.root, + &SelectAtPointFunction::new(point.clone()), + &mut visitor, + ) + } + /// Removes an element containing a given point. /// /// The removed element, if any, is returned. If multiple elements cover the given point,