From 6812c6f54e30c1f1993b1fc95fe62482256576ba Mon Sep 17 00:00:00 2001 From: Adam Reichold Date: Wed, 8 May 2024 19:12:04 +0200 Subject: [PATCH] Defend against numerical instability when computing number of clusters Apparently, it can happen that n.log(k) yields j+eps even though k^j=n exactly. After the call to ceil, this ends up with `depth` being one too large so that the number of clusters can also end up as one in which case the bulk-loading algorithm enters infinite recursion eventually overflowing the stack. This change defends against this by ensuring that we always build at least two clusters per recursion step thereby actually reducing the number of objects the next steps has to consider. --- rstar/CHANGELOG.md | 1 + rstar/src/algorithm/bulk_load/bulk_load_sequential.rs | 2 +- rstar/src/algorithm/bulk_load/cluster_group_iterator.rs | 6 +++--- rstar/src/algorithm/nearest_neighbor.rs | 2 +- rstar/src/algorithm/rstar.rs | 4 ++-- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/rstar/CHANGELOG.md b/rstar/CHANGELOG.md index 9567e55..d0cfe7b 100644 --- a/rstar/CHANGELOG.md +++ b/rstar/CHANGELOG.md @@ -3,6 +3,7 @@ ## Changed - Switched to unstable sort for envelopes and node reinsertion ([PR](https://github.com/georust/rstar/pull/160)) - Use a more tame value for `AABB::new_empty` to avoid overflow panics applying selections on empty trees ([PR](https://github.com/georust/rstar/pull/162)) +- Avoid infinite recursion due to numerical instability when computing the number of clusters ([PR](https://github.com/georust/rstar/pull/166)) # 0.12.0 diff --git a/rstar/src/algorithm/bulk_load/bulk_load_sequential.rs b/rstar/src/algorithm/bulk_load/bulk_load_sequential.rs index c65a3a5..ca75d8a 100644 --- a/rstar/src/algorithm/bulk_load/bulk_load_sequential.rs +++ b/rstar/src/algorithm/bulk_load/bulk_load_sequential.rs @@ -25,7 +25,7 @@ where return ParentNode::new_parent(elements); } let number_of_clusters_on_axis = - calculate_number_of_clusters_on_axis::(elements.len()); + calculate_number_of_clusters_on_axis::(elements.len()).max(2); let iterator = PartitioningTask::<_, Params> { number_of_clusters_on_axis, diff --git a/rstar/src/algorithm/bulk_load/cluster_group_iterator.rs b/rstar/src/algorithm/bulk_load/cluster_group_iterator.rs index dbb6da2..6200442 100644 --- a/rstar/src/algorithm/bulk_load/cluster_group_iterator.rs +++ b/rstar/src/algorithm/bulk_load/cluster_group_iterator.rs @@ -55,9 +55,9 @@ where { let max_size = Params::MAX_SIZE as f32; // The depth of the resulting tree, assuming all leaf nodes will be filled up to MAX_SIZE - let depth = (number_of_elements as f32).log(max_size).ceil() as usize; + let depth = (number_of_elements as f32).log(max_size).ceil() as i32; // The number of elements each subtree will hold - let n_subtree = max_size.powi(depth as i32 - 1); + let n_subtree = max_size.powi(depth - 1); // How many clusters will this node contain let number_of_clusters = (number_of_elements as f32 / n_subtree).ceil(); @@ -87,7 +87,7 @@ mod test { assert_eq!(slab.len(), slab_size); } let mut total_size = 0; - let mut max_element_for_last_slab = i32::min_value(); + let mut max_element_for_last_slab = i32::MIN; for slab in &slabs { total_size += slab.len(); let current_max = slab.iter().max_by_key(|point| point[0]).unwrap(); diff --git a/rstar/src/algorithm/nearest_neighbor.rs b/rstar/src/algorithm/nearest_neighbor.rs index 5b9d88f..698b26d 100644 --- a/rstar/src/algorithm/nearest_neighbor.rs +++ b/rstar/src/algorithm/nearest_neighbor.rs @@ -331,7 +331,7 @@ mod test { let sample_points = create_random_points(100, SEED_2); for sample_point in &sample_points { let mut nearest = None; - let mut closest_dist = ::core::f64::INFINITY; + let mut closest_dist = f64::INFINITY; for point in &points { let delta = [point[0] - sample_point[0], point[1] - sample_point[1]]; let new_dist = delta[0] * delta[0] + delta[1] * delta[1]; diff --git a/rstar/src/algorithm/rstar.rs b/rstar/src/algorithm/rstar.rs index 02eb197..9127037 100644 --- a/rstar/src/algorithm/rstar.rs +++ b/rstar/src/algorithm/rstar.rs @@ -156,13 +156,13 @@ where T: RTreeObject, { let all_leaves = match node.children.first() { - Some(RTreeNode::Leaf(_)) => return usize::max_value(), + Some(RTreeNode::Leaf(_)) => return usize::MAX, Some(RTreeNode::Parent(ref data)) => data .children .first() .map(RTreeNode::is_leaf) .unwrap_or(true), - _ => return usize::max_value(), + None => return usize::MAX, }; let zero: <::Point as Point>::Scalar = Zero::zero();