Skip to content

Commit

Permalink
perf: apply string view GC more conservatively (#13850)
Browse files Browse the repository at this point in the history
  • Loading branch information
orlp authored Jan 19, 2024
1 parent 1d434cc commit ff83d34
Showing 1 changed file with 17 additions and 7 deletions.
24 changes: 17 additions & 7 deletions crates/polars-arrow/src/array/binview/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -373,18 +373,28 @@ impl<T: ViewType + ?Sized> BinaryViewArrayGeneric<T> {
}

pub fn maybe_gc(self) -> Self {
if self.total_buffer_len == 0 {
const GC_MINIMUM_SAVINGS: usize = 16 * 1024; // At least 16 KiB.

if self.total_buffer_len <= GC_MINIMUM_SAVINGS {
return self;
}

// Subtract the maximum amount of inlined strings to get a lower bound
// on the number of buffer bytes needed (assuming no dedup).
let total_bytes_len = self.total_bytes_len.load(Ordering::Relaxed) as usize;
// Subtract the maximum amount of inlined strings.
let min_in_buffer = total_bytes_len.saturating_sub(self.len() * 12);
let frac = (min_in_buffer as f64) / ((self.total_buffer_len() + 1) as f64);
let buffer_req_lower_bound = total_bytes_len.saturating_sub(self.len() * 12);

let lower_bound_mem_usage_post_gc = self.len() * 16 + buffer_req_lower_bound;
let cur_mem_usage = self.len() * 16 + self.total_buffer_len();
let savings_upper_bound = cur_mem_usage.saturating_sub(lower_bound_mem_usage_post_gc);

if frac < 0.25 {
return self.gc();
if savings_upper_bound >= GC_MINIMUM_SAVINGS
&& cur_mem_usage >= 4 * lower_bound_mem_usage_post_gc
{
self.gc()
} else {
self
}
self
}
}

Expand Down

0 comments on commit ff83d34

Please sign in to comment.