diff --git a/encodings/datetime-parts/src/array.rs b/encodings/datetime-parts/src/array.rs index 0da004bf2e..b25b506d99 100644 --- a/encodings/datetime-parts/src/array.rs +++ b/encodings/datetime-parts/src/array.rs @@ -61,7 +61,7 @@ impl DateTimePartsArray { seconds_dtype: seconds.dtype().clone(), subseconds_dtype: subsecond.dtype().clone(), }, - vec![days, seconds, subsecond].into(), + [days, seconds, subsecond].into(), StatsSet::new(), ) } diff --git a/encodings/dict/src/dict.rs b/encodings/dict/src/dict.rs index 37bb1b57aa..fc265c6907 100644 --- a/encodings/dict/src/dict.rs +++ b/encodings/dict/src/dict.rs @@ -36,7 +36,7 @@ impl DictArray { codes_dtype: codes.dtype().clone(), values_len: values.len(), }, - vec![values, codes].into(), + [values, codes].into(), StatsSet::new(), ) } diff --git a/encodings/fastlanes/src/for/mod.rs b/encodings/fastlanes/src/for/mod.rs index ca88cab460..7a037dcd95 100644 --- a/encodings/fastlanes/src/for/mod.rs +++ b/encodings/fastlanes/src/for/mod.rs @@ -37,7 +37,7 @@ impl FoRArray { reference.dtype().clone(), child.len(), FoRMetadata { reference, shift }, - vec![child].into(), + [child].into(), StatsSet::new(), ) } diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs index 826809c502..23f004a787 100644 --- a/encodings/fsst/src/array.rs +++ b/encodings/fsst/src/array.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use fsst::{Decompressor, Symbol}; use serde::{Deserialize, Serialize}; use vortex::array::VarBinArray; @@ -73,7 +75,7 @@ impl FSSTArray { let len = codes.len(); let strings_dtype = codes.dtype().clone(); let uncompressed_lengths_dtype = uncompressed_lengths.dtype().clone(); - let children = vec![symbols, symbol_lengths, codes, uncompressed_lengths]; + let children = Arc::new([symbols, symbol_lengths, codes, uncompressed_lengths]); Self::try_from_parts( dtype, @@ -83,7 +85,7 @@ impl FSSTArray { codes_dtype: strings_dtype, uncompressed_lengths_dtype, }, - children.into(), + children, StatsSet::new(), ) } diff --git a/encodings/zigzag/src/zigzag.rs b/encodings/zigzag/src/zigzag.rs index b9864e4240..abd9045bd2 100644 --- a/encodings/zigzag/src/zigzag.rs +++ b/encodings/zigzag/src/zigzag.rs @@ -33,7 +33,7 @@ impl ZigZagArray { .with_nullability(encoded_dtype.nullability()); let len = encoded.len(); - let children = vec![encoded]; + let children = [encoded]; Self::try_from_parts(dtype, len, ZigZagMetadata, children.into(), StatsSet::new()) } diff --git a/vortex-array/src/arc_slice.rs b/vortex-array/src/arc_slice.rs deleted file mode 100644 index 375cfc1cda..0000000000 --- a/vortex-array/src/arc_slice.rs +++ /dev/null @@ -1,114 +0,0 @@ -use std::fmt; -use std::fmt::{Debug, Formatter}; -use std::sync::Arc; - -/// SharedVec provides shared access to a collection, along with the ability to create owned -/// slices of the collection with zero copying. -#[derive(Clone)] -pub struct SharedVec { - data: Arc<[T]>, - start: usize, - len: usize, -} - -impl std::ops::Deref for SharedVec { - type Target = [T]; - - fn deref(&self) -> &Self::Target { - // SAFETY: the pointer only points at memory contained within owned `data`. - unsafe { std::slice::from_raw_parts(self.data.as_ptr().add(self.start), self.len) } - } -} - -impl Debug for SharedVec { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - f.debug_struct("SharedVec") - .field("start", &self.start) - .field("len", &self.len) - .field("data", &self.data) - .finish() - } -} - -impl From> for SharedVec { - fn from(value: Arc<[T]>) -> Self { - Self { - len: value.len(), - start: 0, - data: value, - } - } -} - -impl From> for SharedVec { - fn from(value: Vec) -> Self { - // moves the data from the Vec into a new owned slice. - let data: Arc<[T]> = Arc::from(value); - - SharedVec::from(data) - } -} - -impl SharedVec { - /// Create a new slice of the given vec, without copying or allocation. - pub fn slice(&self, start: usize, end: usize) -> Self { - assert!(end <= self.len, "cannot slice beyond end of SharedVec"); - - Self { - data: self.data.clone(), - start: self.start + start, - len: end - start, - } - } -} - -#[cfg(test)] -mod test { - use std::sync::Arc; - - use crate::arc_slice::SharedVec; - - #[test] - fn test_simple() { - let data = vec!["alice".to_string(), "bob".to_string(), "carol".to_string()]; - let data: Arc<[String]> = data.into(); - let shared_vec: SharedVec = data.into(); - - // We get iter() for free via the Deref to slice! - assert_eq!( - shared_vec.iter().collect::>(), - vec!["alice", "bob", "carol"], - ); - } - - #[test] - fn test_slicing() { - let data = vec!["alice".to_string(), "bob".to_string(), "carol".to_string()]; - let data: Arc<[String]> = data.into(); - let shared_vec: SharedVec = data.into(); - - // Original array - assert_eq!(shared_vec.len(), 3); - - // Sliced once - let sliced_vec = shared_vec.slice(1, 3); - assert_eq!(sliced_vec.len(), 2); - assert_eq!(sliced_vec.iter().collect::>(), vec!["bob", "carol"]); - - // Sliced again - let sliced_again = sliced_vec.slice(1, 2); - assert_eq!(sliced_again.len(), 1); - assert_eq!(sliced_again.iter().collect::>(), vec!["carol"]); - } - - #[test] - fn test_deref() { - let data = vec!["alice".to_string(), "bob".to_string(), "carol".to_string()]; - let data: Arc<[String]> = data.into(); - let shared_vec: SharedVec = data.into(); - - assert_eq!(&shared_vec[0], "alice"); - assert_eq!(&shared_vec[1], "bob"); - assert_eq!(&shared_vec[2], "carol"); - } -} diff --git a/vortex-array/src/array/constant/mod.rs b/vortex-array/src/array/constant/mod.rs index 8e7c0eb00c..055b77d1e2 100644 --- a/vortex-array/src/array/constant/mod.rs +++ b/vortex-array/src/array/constant/mod.rs @@ -46,7 +46,7 @@ impl ConstantArray { scalar: scalar.clone(), length, }, - vec![].into(), + [].into(), stats, ) .unwrap_or_else(|err| { diff --git a/vortex-array/src/array/extension/mod.rs b/vortex-array/src/array/extension/mod.rs index a033bd0bf2..0a1ec8c805 100644 --- a/vortex-array/src/array/extension/mod.rs +++ b/vortex-array/src/array/extension/mod.rs @@ -26,7 +26,7 @@ impl ExtensionArray { ExtensionMetadata { storage_dtype: storage.dtype().clone(), }, - vec![storage].into(), + [storage].into(), Default::default(), ) .vortex_expect("Invalid ExtensionArray") diff --git a/vortex-array/src/array/null/mod.rs b/vortex-array/src/array/null/mod.rs index c1de2c8c83..2844b65c9b 100644 --- a/vortex-array/src/array/null/mod.rs +++ b/vortex-array/src/array/null/mod.rs @@ -24,7 +24,7 @@ impl NullArray { DType::Null, len, NullMetadata { len }, - vec![].into(), + [].into(), StatsSet::nulls(len, &DType::Null), ) .vortex_expect("NullArray::new should never fail!") diff --git a/vortex-array/src/array/sparse/mod.rs b/vortex-array/src/array/sparse/mod.rs index db13ef8751..8878a854a8 100644 --- a/vortex-array/src/array/sparse/mod.rs +++ b/vortex-array/src/array/sparse/mod.rs @@ -81,7 +81,7 @@ impl SparseArray { len, fill_value, }, - vec![indices, values].into(), + [indices, values].into(), StatsSet::new(), ) } diff --git a/vortex-array/src/data.rs b/vortex-array/src/data.rs index 78e08a87bd..b645d6735e 100644 --- a/vortex-array/src/data.rs +++ b/vortex-array/src/data.rs @@ -5,7 +5,6 @@ use vortex_dtype::DType; use vortex_error::{vortex_bail, vortex_panic, VortexResult}; use vortex_scalar::Scalar; -use crate::arc_slice::SharedVec; use crate::encoding::EncodingRef; use crate::stats::{Stat, Statistics, StatsSet}; use crate::{Array, ArrayDType, ArrayMetadata, ToArray}; @@ -17,7 +16,7 @@ pub struct ArrayData { len: usize, metadata: Arc, buffer: Option, - children: SharedVec, + children: Arc<[Array]>, stats_map: Arc>, } @@ -28,7 +27,7 @@ impl ArrayData { len: usize, metadata: Arc, buffer: Option, - children: SharedVec, + children: Arc<[Array]>, statistics: StatsSet, ) -> VortexResult { let data = Self { diff --git a/vortex-array/src/implementation.rs b/vortex-array/src/implementation.rs index 7d9cefb7ce..6c94130744 100644 --- a/vortex-array/src/implementation.rs +++ b/vortex-array/src/implementation.rs @@ -64,7 +64,7 @@ macro_rules! impl_encoding { dtype: vortex_dtype::DType, len: usize, metadata: [<$Name Metadata>], - children: $crate::arc_slice::SharedVec<$crate::Array>, + children: std::sync::Arc<[$crate::Array]>, stats: $crate::stats::StatsSet, ) -> VortexResult { Ok(Self { typed: $crate::TypedArray::try_from_parts(dtype, len, metadata, None, children, stats)? }) diff --git a/vortex-array/src/lib.rs b/vortex-array/src/lib.rs index 240c2456cf..99fd2e5854 100644 --- a/vortex-array/src/lib.rs +++ b/vortex-array/src/lib.rs @@ -35,7 +35,6 @@ use crate::variants::ArrayVariants; use crate::visitor::{AcceptArrayVisitor, ArrayVisitor}; pub mod accessor; -pub mod arc_slice; pub mod array; pub mod arrow; mod canonical; diff --git a/vortex-array/src/typed.rs b/vortex-array/src/typed.rs index 9467bc222a..c60ad2338a 100644 --- a/vortex-array/src/typed.rs +++ b/vortex-array/src/typed.rs @@ -4,7 +4,6 @@ use vortex_buffer::Buffer; use vortex_dtype::DType; use vortex_error::{vortex_bail, vortex_panic, VortexError, VortexResult}; -use crate::arc_slice::SharedVec; use crate::stats::StatsSet; use crate::{Array, ArrayData, ArrayDef, IntoArray, ToArray, TryDeserializeArrayMetadata}; @@ -20,7 +19,7 @@ impl TypedArray { len: usize, metadata: D::Metadata, buffer: Option, - children: SharedVec, + children: Arc<[Array]>, stats: StatsSet, ) -> VortexResult { let array = Array::Data(ArrayData::try_new( diff --git a/vortex-array/src/view.rs b/vortex-array/src/view.rs index 11a2263255..9fa2e7492d 100644 --- a/vortex-array/src/view.rs +++ b/vortex-array/src/view.rs @@ -9,7 +9,6 @@ use vortex_dtype::{DType, Nullability}; use vortex_error::{vortex_bail, vortex_err, VortexError, VortexExpect as _, VortexResult}; use vortex_scalar::{PValue, Scalar, ScalarValue}; -use crate::arc_slice::SharedVec; use crate::encoding::EncodingRef; use crate::opaque::OpaqueEncoding; use crate::stats::{Stat, Statistics, StatsSet}; @@ -23,7 +22,7 @@ pub struct ArrayView { len: usize, flatbuffer: Buffer, flatbuffer_loc: usize, - buffers: SharedVec, + buffers: Arc<[Buffer]>, ctx: Arc, // TODO(ngates): a store a Projection. A projected ArrayView contains the full fb::Array // metadata, but only the buffers from the selected columns. Therefore we need to know @@ -74,7 +73,7 @@ impl ArrayView { len, flatbuffer, flatbuffer_loc, - buffers: SharedVec::from(buffers), + buffers: buffers.into(), ctx, };