From 5b6068e7b4627fdbbe8696bcf692d558047a0a1b Mon Sep 17 00:00:00 2001 From: lambda <1wei@live.com> Date: Tue, 5 Nov 2024 07:06:37 +0000 Subject: [PATCH 1/4] Fix arrow-array null_count error during conversion from C to Rust --- arrow-array/src/ffi.rs | 41 ++++++++++++++++++++++++++++++++++++----- arrow-data/src/ffi.rs | 12 ++++++++++++ 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs index 29414eae6fea..06115dc8ec7d 100644 --- a/arrow-array/src/ffi.rs +++ b/arrow-array/src/ffi.rs @@ -301,10 +301,13 @@ impl ImportedArrowArray<'_> { fn consume(self) -> Result { let len = self.array.len(); let offset = self.array.offset(); - let null_count = match &self.data_type { - DataType::Null => 0, - _ => self.array.null_count(), - }; + let null_count = self + .array + .null_count_checked() + .then_some(match &self.data_type { + DataType::Null => 0, + _ => self.array.null_count(), + }); let data_layout = layout(&self.data_type); let buffers = self.buffers(data_layout.can_contain_null_mask, data_layout.variadic)?; @@ -329,7 +332,7 @@ impl ImportedArrowArray<'_> { ArrayData::new_unchecked( self.data_type, len, - Some(null_count), + null_count, null_bit_buffer, offset, buffers, @@ -635,6 +638,34 @@ mod tests_to_then_from_ffi { } // case with nulls is tested in the docs, through the example on this module. + #[test] + fn test_null_count_handling() { + let int32_data = ArrayData::builder(DataType::Int32) + .len(10) + .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])) + .null_bit_buffer(Some(Buffer::from([0b01011111, 0b00000001]))) + .build() + .unwrap(); + let mut ffi_array = FFI_ArrowArray::new(&int32_data); + assert_eq!(3, ffi_array.null_count()); + assert!(ffi_array.null_count_checked()); + // Simulating uninitialized state + ffi_array.set_null_count(-1); + assert!(!ffi_array.null_count_checked()); + let int32_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Int32) }.unwrap(); + assert_eq!(3, int32_data.null_count()); + + let null_data = &ArrayData::new_null(&DataType::Null, 10); + let mut ffi_array = FFI_ArrowArray::new(null_data); + assert_eq!(10, ffi_array.null_count()); + assert!(ffi_array.null_count_checked()); + // Simulating uninitialized state + ffi_array.set_null_count(-1); + assert!(!ffi_array.null_count_checked()); + let null_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Null) }.unwrap(); + assert_eq!(0, null_data.null_count()); + } + fn test_generic_string() -> Result<()> { // create an array natively let array = GenericStringArray::::from(vec![Some("a"), None, Some("aaa")]); diff --git a/arrow-data/src/ffi.rs b/arrow-data/src/ffi.rs index cd283d32662f..417b054f437a 100644 --- a/arrow-data/src/ffi.rs +++ b/arrow-data/src/ffi.rs @@ -271,6 +271,18 @@ impl FFI_ArrowArray { self.null_count as usize } + /// whether the null count value is valid + #[inline] + pub fn null_count_checked(&self) -> bool { + self.null_count >= 0 + } + + /// set the null count of the array + #[inline] + pub fn set_null_count(&mut self, null_count: i64) { + self.null_count = null_count; + } + /// Returns the buffer at the provided index /// /// # Panic From f09f843d133995f9c65c2b177b165034d0a6fccd Mon Sep 17 00:00:00 2001 From: Yiwei Wang <1wei@live.com> Date: Wed, 6 Nov 2024 00:02:50 +0800 Subject: [PATCH 2/4] add ffi_array.null_count_opt --- arrow-array/src/ffi.rs | 27 ++++++++++++++------------- arrow-data/src/ffi.rs | 8 ++++---- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/arrow-array/src/ffi.rs b/arrow-array/src/ffi.rs index 06115dc8ec7d..4426e0986409 100644 --- a/arrow-array/src/ffi.rs +++ b/arrow-array/src/ffi.rs @@ -301,13 +301,10 @@ impl ImportedArrowArray<'_> { fn consume(self) -> Result { let len = self.array.len(); let offset = self.array.offset(); - let null_count = self - .array - .null_count_checked() - .then_some(match &self.data_type { - DataType::Null => 0, - _ => self.array.null_count(), - }); + let null_count = match &self.data_type { + DataType::Null => Some(0), + _ => self.array.null_count_opt(), + }; let data_layout = layout(&self.data_type); let buffers = self.buffers(data_layout.can_contain_null_mask, data_layout.variadic)?; @@ -648,20 +645,24 @@ mod tests_to_then_from_ffi { .unwrap(); let mut ffi_array = FFI_ArrowArray::new(&int32_data); assert_eq!(3, ffi_array.null_count()); - assert!(ffi_array.null_count_checked()); + assert_eq!(Some(3), ffi_array.null_count_opt()); // Simulating uninitialized state - ffi_array.set_null_count(-1); - assert!(!ffi_array.null_count_checked()); + unsafe { + ffi_array.set_null_count(-1); + } + assert_eq!(None, ffi_array.null_count_opt()); let int32_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Int32) }.unwrap(); assert_eq!(3, int32_data.null_count()); let null_data = &ArrayData::new_null(&DataType::Null, 10); let mut ffi_array = FFI_ArrowArray::new(null_data); assert_eq!(10, ffi_array.null_count()); - assert!(ffi_array.null_count_checked()); + assert_eq!(Some(10), ffi_array.null_count_opt()); // Simulating uninitialized state - ffi_array.set_null_count(-1); - assert!(!ffi_array.null_count_checked()); + unsafe { + ffi_array.set_null_count(-1); + } + assert_eq!(None, ffi_array.null_count_opt()); let null_data = unsafe { from_ffi_and_data_type(ffi_array, DataType::Null) }.unwrap(); assert_eq!(0, null_data.null_count()); } diff --git a/arrow-data/src/ffi.rs b/arrow-data/src/ffi.rs index 417b054f437a..a575361cb99e 100644 --- a/arrow-data/src/ffi.rs +++ b/arrow-data/src/ffi.rs @@ -271,15 +271,15 @@ impl FFI_ArrowArray { self.null_count as usize } - /// whether the null count value is valid + /// Returns the null count, checking for validity #[inline] - pub fn null_count_checked(&self) -> bool { - self.null_count >= 0 + pub fn null_count_opt(&self) -> Option { + usize::try_from(self.null_count).ok() } /// set the null count of the array #[inline] - pub fn set_null_count(&mut self, null_count: i64) { + pub unsafe fn set_null_count(&mut self, null_count: i64) { self.null_count = null_count; } From 79018920a59bcd7b3112f127692c222ceca27b01 Mon Sep 17 00:00:00 2001 From: Yiwei Wang <1wei@live.com> Date: Wed, 6 Nov 2024 00:07:36 +0800 Subject: [PATCH 3/4] add Safety to follow clippy --- arrow-data/src/ffi.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arrow-data/src/ffi.rs b/arrow-data/src/ffi.rs index a575361cb99e..c5aa7e42817a 100644 --- a/arrow-data/src/ffi.rs +++ b/arrow-data/src/ffi.rs @@ -277,7 +277,9 @@ impl FFI_ArrowArray { usize::try_from(self.null_count).ok() } - /// set the null count of the array + /// # Safety + /// + /// Set the null count of the array #[inline] pub unsafe fn set_null_count(&mut self, null_count: i64) { self.null_count = null_count; From c6ab84489b43b0a63bca59d275c602c286c6308a Mon Sep 17 00:00:00 2001 From: Yiwei Wang <1wei@live.com> Date: Wed, 6 Nov 2024 00:13:08 +0800 Subject: [PATCH 4/4] add Safety to follow clippy --- arrow-data/src/ffi.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arrow-data/src/ffi.rs b/arrow-data/src/ffi.rs index c5aa7e42817a..04599bc47a1c 100644 --- a/arrow-data/src/ffi.rs +++ b/arrow-data/src/ffi.rs @@ -277,9 +277,10 @@ impl FFI_ArrowArray { usize::try_from(self.null_count).ok() } - /// # Safety - /// /// Set the null count of the array + /// + /// # Safety + /// Null count must match that of null buffer #[inline] pub unsafe fn set_null_count(&mut self, null_count: i64) { self.null_count = null_count;