Skip to content

Commit

Permalink
Track the memory usage of custom allocations so that their size can b…
Browse files Browse the repository at this point in the history
…e reported via Array::get_buffer_memory_size (#5347)
  • Loading branch information
jhorstmann authored Jan 31, 2024
1 parent 93c7a12 commit c096172
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 11 deletions.
13 changes: 13 additions & 0 deletions arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,8 @@ pub trait Array: std::fmt::Debug + Send + Sync {

/// Returns the total number of bytes of memory pointed to by this array.
/// The buffers store bytes in the Arrow memory format, and include the data as well as the validity map.
/// Note that this does not always correspond to the exact memory usage of an array,
/// since multiple arrays can share the same buffers or slices thereof.
fn get_buffer_memory_size(&self) -> usize;

/// Returns the total number of bytes of memory occupied physically by this array.
Expand Down Expand Up @@ -934,6 +936,17 @@ mod tests {
);
}

#[test]
fn test_memory_size_primitive_sliced() {
let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
let slice1 = arr.slice(0, 64);
let slice2 = arr.slice(64, 64);

// both slices report the full buffer memory usage, even though the buffers are shared
assert_eq!(slice1.get_array_memory_size(), arr.get_array_memory_size());
assert_eq!(slice2.get_array_memory_size(), arr.get_array_memory_size());
}

#[test]
fn test_memory_size_primitive_nullable() {
let arr: PrimitiveArray<Int64Type> = (0..128)
Expand Down
21 changes: 18 additions & 3 deletions arrow-buffer/src/alloc/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ pub(crate) enum Deallocation {
Standard(Layout),
/// An allocation from an external source like the FFI interface
/// Deallocation will happen on `Allocation::drop`
Custom(Arc<dyn Allocation>),
/// The size of the allocation is tracked here separately only
/// for memory usage reporting via `Array::get_buffer_memory_size`
Custom(Arc<dyn Allocation>, usize),
}

impl Debug for Deallocation {
Expand All @@ -47,9 +49,22 @@ impl Debug for Deallocation {
Deallocation::Standard(layout) => {
write!(f, "Deallocation::Standard {layout:?}")
}
Deallocation::Custom(_) => {
write!(f, "Deallocation::Custom {{ capacity: unknown }}")
Deallocation::Custom(_, size) => {
write!(f, "Deallocation::Custom {{ capacity: {size} }}")
}
}
}
}

#[cfg(test)]
mod tests {
use crate::alloc::Deallocation;

#[test]
fn test_size_of_deallocation() {
assert_eq!(
std::mem::size_of::<Deallocation>(),
3 * std::mem::size_of::<usize>()
);
}
}
2 changes: 1 addition & 1 deletion arrow-buffer/src/buffer/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ impl Buffer {
len: usize,
owner: Arc<dyn Allocation>,
) -> Self {
Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner))
Buffer::build_with_arguments(ptr, len, Deallocation::Custom(owner, len))
}

/// Auxiliary method to create a new Buffer
Expand Down
2 changes: 1 addition & 1 deletion arrow-buffer/src/buffer/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ impl<T: ArrowNativeType> From<Buffer> for ScalarBuffer<T> {
is_aligned,
"Memory pointer is not aligned with the specified scalar type"
),
Deallocation::Custom(_) =>
Deallocation::Custom(_, _) =>
assert!(is_aligned, "Memory pointer from external source (e.g, FFI) is not aligned with the specified scalar type. Before importing buffer through FFI, please make sure the allocation is aligned."),
}

Expand Down
13 changes: 7 additions & 6 deletions arrow-buffer/src/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,9 @@ impl Bytes {
pub fn capacity(&self) -> usize {
match self.deallocation {
Deallocation::Standard(layout) => layout.size(),
// we cannot determine this in general,
// and thus we state that this is externally-owned memory
Deallocation::Custom(_) => 0,
// we only know the size of the custom allocation
// its underlying capacity might be larger
Deallocation::Custom(_, size) => size,
}
}

Expand All @@ -116,7 +116,7 @@ impl Drop for Bytes {
_ => unsafe { std::alloc::dealloc(self.ptr.as_ptr(), *layout) },
},
// The automatic drop implementation will free the memory once the reference count reaches zero
Deallocation::Custom(_allocation) => (),
Deallocation::Custom(_allocation, _size) => (),
}
}
}
Expand Down Expand Up @@ -147,10 +147,11 @@ impl Debug for Bytes {

impl From<bytes::Bytes> for Bytes {
fn from(value: bytes::Bytes) -> Self {
let len = value.len();
Self {
len: value.len(),
len,
ptr: NonNull::new(value.as_ptr() as _).unwrap(),
deallocation: Deallocation::Custom(std::sync::Arc::new(value)),
deallocation: Deallocation::Custom(std::sync::Arc::new(value), len),
}
}
}
Expand Down

0 comments on commit c096172

Please sign in to comment.