From 2e633dcafb9ec076b056ad6140a3db43c815819b Mon Sep 17 00:00:00 2001 From: Gijs Burghoorn Date: Fri, 7 Jun 2024 08:15:47 +0200 Subject: [PATCH] refactor(rust): start further use of polars-compute in polars-parquet (#16788) --- .../src/arrow/write/binview/basic.rs | 20 ++++--------------- .../src/arrow/write/primitive/nested.rs | 5 +++-- 2 files changed, 7 insertions(+), 18 deletions(-) diff --git a/crates/polars-parquet/src/arrow/write/binview/basic.rs b/crates/polars-parquet/src/arrow/write/binview/basic.rs index 55b51d26b5f7..2516e03e667c 100644 --- a/crates/polars-parquet/src/arrow/write/binview/basic.rs +++ b/crates/polars-parquet/src/arrow/write/binview/basic.rs @@ -1,11 +1,12 @@ use arrow::array::{Array, BinaryViewArray}; +use polars_compute::min_max::MinMaxKernel; use polars_error::PolarsResult; use crate::parquet::encoding::delta_bitpacked; use crate::parquet::schema::types::PrimitiveType; use crate::parquet::statistics::{BinaryStatistics, ParquetStatistics}; use crate::read::schema::is_nullable; -use crate::write::binary::{encode_non_null_values, ord_binary}; +use crate::write::binary::encode_non_null_values; use crate::write::utils::invalid_encoding; use crate::write::{utils, Encoding, Page, StatisticsOptions, WriteOptions}; @@ -77,7 +78,6 @@ pub fn array_to_page( .map(Page::Data) } -// TODO! speed this up pub(crate) fn build_statistics( array: &BinaryViewArray, primitive_type: PrimitiveType, @@ -89,23 +89,11 @@ pub(crate) fn build_statistics( distinct_count: None, max_value: options .max_value - .then(|| { - array - .iter() - .flatten() - .max_by(|x, y| ord_binary(x, y)) - .map(|x| x.to_vec()) - }) + .then(|| array.max_propagate_nan_kernel().map(<[u8]>::to_vec)) .flatten(), min_value: options .min_value - .then(|| { - array - .iter() - .flatten() - .min_by(|x, y| ord_binary(x, y)) - .map(|x| x.to_vec()) - }) + .then(|| array.min_propagate_nan_kernel().map(<[u8]>::to_vec)) .flatten(), } .serialize() diff --git a/crates/polars-parquet/src/arrow/write/primitive/nested.rs b/crates/polars-parquet/src/arrow/write/primitive/nested.rs index 918afa6a4dc6..13fb5a1de5f1 100644 --- a/crates/polars-parquet/src/arrow/write/primitive/nested.rs +++ b/crates/polars-parquet/src/arrow/write/primitive/nested.rs @@ -11,13 +11,14 @@ use crate::parquet::page::DataPage; use crate::parquet::schema::types::PrimitiveType; use crate::parquet::types::NativeType; -pub fn array_to_page( - array: &PrimitiveArray, +pub fn array_to_page<'a, T, R>( + array: &'a PrimitiveArray, options: WriteOptions, type_: PrimitiveType, nested: &[Nested], ) -> PolarsResult where + PrimitiveArray: polars_compute::min_max::MinMaxKernel = T>, T: ArrowNativeType, R: NativeType, T: num_traits::AsPrimitive,