diff --git a/crates/polars-arrow/Cargo.toml b/crates/polars-arrow/Cargo.toml index 0cd85cbae4837..24a979d9b4f24 100644 --- a/crates/polars-arrow/Cargo.toml +++ b/crates/polars-arrow/Cargo.toml @@ -14,7 +14,7 @@ description = "Minimal implementation of the Arrow specification forked from arr [dependencies] atoi = { workspace = true, optional = true } -bytemuck = { workspace = true } +bytemuck = { workspace = true, features = [ "must_cast" ] } chrono = { workspace = true } # for timezone support chrono-tz = { workspace = true, optional = true } diff --git a/crates/polars-arrow/src/array/binview/view.rs b/crates/polars-arrow/src/array/binview/view.rs index 67334a53aa177..15a744f804e9e 100644 --- a/crates/polars-arrow/src/array/binview/view.rs +++ b/crates/polars-arrow/src/array/binview/view.rs @@ -11,7 +11,7 @@ use polars_utils::total_ord::{TotalEq, TotalOrd}; use crate::buffer::Buffer; use crate::datatypes::PrimitiveType; -use crate::types::NativeType; +use crate::types::{Bytes16Alignment4, NativeType}; // We use this instead of u128 because we want alignment of <= 8 bytes. /// A reference to a set of bytes. @@ -346,7 +346,9 @@ impl MinMax for View { impl NativeType for View { const PRIMITIVE: PrimitiveType = PrimitiveType::UInt128; + type Bytes = [u8; 16]; + type AlignedBytes = Bytes16Alignment4; #[inline] fn to_le_bytes(&self) -> Self::Bytes { diff --git a/crates/polars-arrow/src/buffer/immutable.rs b/crates/polars-arrow/src/buffer/immutable.rs index 1dfe805ffc577..dcb862466a8e8 100644 --- a/crates/polars-arrow/src/buffer/immutable.rs +++ b/crates/polars-arrow/src/buffer/immutable.rs @@ -79,7 +79,7 @@ impl Buffer { } /// Auxiliary method to create a new Buffer - pub(crate) fn from_storage(storage: SharedStorage) -> Self { + pub fn from_storage(storage: SharedStorage) -> Self { let ptr = storage.as_ptr(); let length = storage.len(); Buffer { diff --git a/crates/polars-arrow/src/storage.rs b/crates/polars-arrow/src/storage.rs index 7cab3235b25ab..b7f8338cd06a2 100644 --- a/crates/polars-arrow/src/storage.rs +++ b/crates/polars-arrow/src/storage.rs @@ -4,11 +4,27 @@ use std::ops::Deref; use std::ptr::NonNull; use std::sync::atomic::{AtomicU64, Ordering}; +use bytemuck::Pod; + use crate::ffi::InternalArrowArray; +use crate::types::{ + AlignedBytes, Bytes12Alignment4, Bytes16Alignment16, Bytes16Alignment4, Bytes16Alignment8, + Bytes1Alignment1, Bytes2Alignment2, Bytes32Alignment16, Bytes4Alignment4, Bytes8Alignment4, + Bytes8Alignment8, NativeSizeAlignment, +}; enum BackingStorage { Vec { capacity: usize, + + /// Size and alignment of the original vector type. + /// + /// We have the following invariants: + /// - if this is Some(...) then all alignments involved are a power of 2 + /// - align_of(Original) >= align_of(Current) + /// - size_of(Original) >= size_of(Current) + /// - size_of(Original) % size_of(Current) == 0 + original_element_size_alignment: Option, }, InternalArrowArray(InternalArrowArray), #[cfg(feature = "arrow_rs")] @@ -30,8 +46,53 @@ impl Drop for SharedStorageInner { Some(BackingStorage::InternalArrowArray(a)) => drop(a), #[cfg(feature = "arrow_rs")] Some(BackingStorage::ArrowBuffer(b)) => drop(b), - Some(BackingStorage::Vec { capacity }) => unsafe { - drop(Vec::from_raw_parts(self.ptr, self.length, capacity)) + Some(BackingStorage::Vec { + capacity, + original_element_size_alignment, + }) => { + #[inline] + unsafe fn drop_vec(ptr: *mut T, length: usize, capacity: usize) { + let ptr = ptr.cast::(); + debug_assert!(ptr.is_aligned()); + + debug_assert!(size_of::() >= size_of::()); + debug_assert_eq!(size_of::() % size_of::(), 0); + + let scale_factor = size_of::() / size_of::(); + + // If the original element had a different size_of we need to rescale the + // length and capacity here. + let length = length / scale_factor; + let capacity = capacity / scale_factor; + + // SAFETY: + // - The BackingStorage holds an invariants that make this safe + drop(unsafe { Vec::from_raw_parts(ptr, length, capacity) }); + } + + let ptr = self.ptr; + let length = self.length; + + let Some(size_alignment) = original_element_size_alignment else { + unsafe { drop_vec::(ptr, length, capacity) }; + return; + }; + + use NativeSizeAlignment as SA; + unsafe { + match size_alignment { + SA::S1A1 => drop_vec::(ptr, length, capacity), + SA::S2A2 => drop_vec::(ptr, length, capacity), + SA::S4A4 => drop_vec::(ptr, length, capacity), + SA::S8A4 => drop_vec::(ptr, length, capacity), + SA::S8A8 => drop_vec::(ptr, length, capacity), + SA::S12A4 => drop_vec::(ptr, length, capacity), + SA::S16A4 => drop_vec::(ptr, length, capacity), + SA::S16A8 => drop_vec::(ptr, length, capacity), + SA::S16A16 => drop_vec::(ptr, length, capacity), + SA::S32A16 => drop_vec::(ptr, length, capacity), + } + } }, None => {}, } @@ -72,7 +133,10 @@ impl SharedStorage { ref_count: AtomicU64::new(1), ptr, length, - backing: Some(BackingStorage::Vec { capacity }), + backing: Some(BackingStorage::Vec { + capacity, + original_element_size_alignment: None, + }), phantom: PhantomData, }; Self { @@ -161,7 +225,11 @@ impl SharedStorage { } pub fn try_into_vec(mut self) -> Result, Self> { - let Some(BackingStorage::Vec { capacity }) = self.inner().backing else { + let Some(BackingStorage::Vec { + capacity, + original_element_size_alignment: None, + }) = self.inner().backing + else { return Err(self); }; if self.is_exclusive() { @@ -186,6 +254,55 @@ impl SharedStorage { } } +impl SharedStorage { + /// Create a [`SharedStorage`] from a [`Vec`] of [`AlignedBytes`]. + /// + /// This will fail if the size and alignment requirements of `T` are stricter than `B`. + pub fn from_aligned_bytes_vec(mut v: Vec) -> Option { + if align_of::() < align_of::() { + return None; + } + + // @NOTE: This is not a fundamental limitation, but something we impose for now. This makes + // calculating the capacity a lot easier. + if size_of::() < size_of::() || size_of::() % size_of::() != 0 { + return None; + } + + let scale_factor = size_of::() / size_of::(); + + let length = v.len() * scale_factor; + let capacity = v.capacity() * scale_factor; + let ptr = v.as_mut_ptr().cast::(); + core::mem::forget(v); + + let inner = SharedStorageInner { + ref_count: AtomicU64::new(1), + ptr, + length, + backing: Some(BackingStorage::Vec { + capacity, + original_element_size_alignment: Some(B::SIZE_ALIGNMENT_PAIR), + }), + phantom: PhantomData, + }; + + Some(Self { + inner: NonNull::new(Box::into_raw(Box::new(inner))).unwrap(), + phantom: PhantomData, + }) + } +} + +impl SharedStorage { + /// Create a [`SharedStorage`][SharedStorage] from a [`Vec`] of [`AlignedBytes`]. + /// + /// This will never fail since `u8` has unit size and alignment. + pub fn bytes_from_aligned_bytes_vec(v: Vec) -> Self { + Self::from_aligned_bytes_vec(v).unwrap() + } +} + impl Deref for SharedStorage { type Target = [T]; diff --git a/crates/polars-arrow/src/types/mod.rs b/crates/polars-arrow/src/types/mod.rs index 49b4d315408ec..c6f653a32311e 100644 --- a/crates/polars-arrow/src/types/mod.rs +++ b/crates/polars-arrow/src/types/mod.rs @@ -20,6 +20,8 @@ //! Finally, this module contains traits used to compile code based on [`NativeType`] optimized //! for SIMD, at [`mod@simd`]. +mod aligned_bytes; +pub use aligned_bytes::*; mod bit_chunk; pub use bit_chunk::{BitChunk, BitChunkIter, BitChunkOnes}; mod index; diff --git a/crates/polars-arrow/src/types/native.rs b/crates/polars-arrow/src/types/native.rs index 230fdde387d1f..9cf9a6b56c466 100644 --- a/crates/polars-arrow/src/types/native.rs +++ b/crates/polars-arrow/src/types/native.rs @@ -7,6 +7,7 @@ use polars_utils::min_max::MinMax; use polars_utils::nulls::IsNull; use polars_utils::total_ord::{ToTotalOrd, TotalEq, TotalHash, TotalOrd, TotalOrdWrap}; +use super::aligned_bytes::*; use super::PrimitiveType; /// Sealed trait implemented by all physical types that can be allocated, @@ -27,6 +28,7 @@ pub trait NativeType: + TotalOrd + IsNull + MinMax + + AlignedBytesCast { /// The corresponding variant of [`PrimitiveType`]. const PRIMITIVE: PrimitiveType; @@ -42,6 +44,11 @@ pub trait NativeType: + Default + IntoIterator; + /// Type denoting its representation as aligned bytes. + /// + /// This is `[u8; N]` where `N = size_of::` and has alignment `align_of::`. + type AlignedBytes: AlignedBytes + From + Into; + /// To bytes in little endian fn to_le_bytes(&self) -> Self::Bytes; @@ -56,11 +63,13 @@ pub trait NativeType: } macro_rules! native_type { - ($type:ty, $primitive_type:expr) => { + ($type:ty, $aligned:ty, $primitive_type:expr) => { impl NativeType for $type { const PRIMITIVE: PrimitiveType = $primitive_type; type Bytes = [u8; std::mem::size_of::()]; + type AlignedBytes = $aligned; + #[inline] fn to_le_bytes(&self) -> Self::Bytes { Self::to_le_bytes(*self) @@ -84,18 +93,18 @@ macro_rules! native_type { }; } -native_type!(u8, PrimitiveType::UInt8); -native_type!(u16, PrimitiveType::UInt16); -native_type!(u32, PrimitiveType::UInt32); -native_type!(u64, PrimitiveType::UInt64); -native_type!(i8, PrimitiveType::Int8); -native_type!(i16, PrimitiveType::Int16); -native_type!(i32, PrimitiveType::Int32); -native_type!(i64, PrimitiveType::Int64); -native_type!(f32, PrimitiveType::Float32); -native_type!(f64, PrimitiveType::Float64); -native_type!(i128, PrimitiveType::Int128); -native_type!(u128, PrimitiveType::UInt128); +native_type!(u8, Bytes1Alignment1, PrimitiveType::UInt8); +native_type!(u16, Bytes2Alignment2, PrimitiveType::UInt16); +native_type!(u32, Bytes4Alignment4, PrimitiveType::UInt32); +native_type!(u64, Bytes8Alignment8, PrimitiveType::UInt64); +native_type!(i8, Bytes1Alignment1, PrimitiveType::Int8); +native_type!(i16, Bytes2Alignment2, PrimitiveType::Int16); +native_type!(i32, Bytes4Alignment4, PrimitiveType::Int32); +native_type!(i64, Bytes8Alignment8, PrimitiveType::Int64); +native_type!(f32, Bytes4Alignment4, PrimitiveType::Float32); +native_type!(f64, Bytes8Alignment8, PrimitiveType::Float64); +native_type!(i128, Bytes16Alignment16, PrimitiveType::Int128); +native_type!(u128, Bytes16Alignment16, PrimitiveType::UInt128); /// The in-memory representation of the DayMillisecond variant of arrow's "Interval" logical type. #[derive(Debug, Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Zeroable, Pod)] @@ -151,7 +160,10 @@ impl MinMax for days_ms { impl NativeType for days_ms { const PRIMITIVE: PrimitiveType = PrimitiveType::DaysMs; + type Bytes = [u8; 8]; + type AlignedBytes = Bytes8Alignment4; + #[inline] fn to_le_bytes(&self) -> Self::Bytes { let days = self.0.to_le_bytes(); @@ -289,7 +301,10 @@ impl MinMax for months_days_ns { impl NativeType for months_days_ns { const PRIMITIVE: PrimitiveType = PrimitiveType::MonthDayNano; + type Bytes = [u8; 16]; + type AlignedBytes = Bytes16Alignment8; + #[inline] fn to_le_bytes(&self) -> Self::Bytes { let months = self.months().to_le_bytes(); @@ -658,7 +673,10 @@ impl MinMax for f16 { impl NativeType for f16 { const PRIMITIVE: PrimitiveType = PrimitiveType::Float16; + type Bytes = [u8; 2]; + type AlignedBytes = Bytes2Alignment2; + #[inline] fn to_le_bytes(&self) -> Self::Bytes { self.0.to_le_bytes() @@ -758,6 +776,7 @@ impl NativeType for i256 { const PRIMITIVE: PrimitiveType = PrimitiveType::Int256; type Bytes = [u8; 32]; + type AlignedBytes = Bytes32Alignment16; #[inline] fn to_le_bytes(&self) -> Self::Bytes {