Skip to content

Commit

Permalink
Defend against numerical instability when computing number of clusters
Browse files Browse the repository at this point in the history
Apparently, it can happen that n.log(k) yields j+eps even though k^j=n exactly.
After the call to ceil, this ends up with `depth` being one too large so that
the number of clusters can also end up as one in which case the bulk-loading
algorithm enters infinite recursion eventually overflowing the stack.

This change defends against this by ensuring that we always build at least two
clusters per recursion step thereby actually reducing the number of objects the
next steps has to consider.
  • Loading branch information
adamreichold committed May 8, 2024
1 parent 84d1265 commit c5354cb
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 7 deletions.
2 changes: 1 addition & 1 deletion rstar/src/algorithm/bulk_load/bulk_load_sequential.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ where
return ParentNode::new_parent(elements);
}
let number_of_clusters_on_axis =
calculate_number_of_clusters_on_axis::<T, Params>(elements.len());
calculate_number_of_clusters_on_axis::<T, Params>(elements.len()).max(2);

let iterator = PartitioningTask::<_, Params> {
number_of_clusters_on_axis,
Expand Down
6 changes: 3 additions & 3 deletions rstar/src/algorithm/bulk_load/cluster_group_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ where
{
let max_size = Params::MAX_SIZE as f32;
// The depth of the resulting tree, assuming all leaf nodes will be filled up to MAX_SIZE
let depth = (number_of_elements as f32).log(max_size).ceil() as usize;
let depth = (number_of_elements as f32).log(max_size).ceil() as i32;
// The number of elements each subtree will hold
let n_subtree = max_size.powi(depth as i32 - 1);
let n_subtree = max_size.powi(depth - 1);
// How many clusters will this node contain
let number_of_clusters = (number_of_elements as f32 / n_subtree).ceil();

Expand Down Expand Up @@ -87,7 +87,7 @@ mod test {
assert_eq!(slab.len(), slab_size);
}
let mut total_size = 0;
let mut max_element_for_last_slab = i32::min_value();
let mut max_element_for_last_slab = i32::MIN;
for slab in &slabs {
total_size += slab.len();
let current_max = slab.iter().max_by_key(|point| point[0]).unwrap();
Expand Down
2 changes: 1 addition & 1 deletion rstar/src/algorithm/nearest_neighbor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ mod test {
let sample_points = create_random_points(100, SEED_2);
for sample_point in &sample_points {
let mut nearest = None;
let mut closest_dist = ::core::f64::INFINITY;
let mut closest_dist = f64::INFINITY;
for point in &points {
let delta = [point[0] - sample_point[0], point[1] - sample_point[1]];
let new_dist = delta[0] * delta[0] + delta[1] * delta[1];
Expand Down
4 changes: 2 additions & 2 deletions rstar/src/algorithm/rstar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,13 +156,13 @@ where
T: RTreeObject,
{
let all_leaves = match node.children.first() {
Some(RTreeNode::Leaf(_)) => return usize::max_value(),
Some(RTreeNode::Leaf(_)) => return usize::MAX,
Some(RTreeNode::Parent(ref data)) => data
.children
.first()
.map(RTreeNode::is_leaf)
.unwrap_or(true),
_ => return usize::max_value(),
None => return usize::MAX,
};

let zero: <<T::Envelope as Envelope>::Point as Point>::Scalar = Zero::zero();
Expand Down

0 comments on commit c5354cb

Please sign in to comment.