Skip to content

Commit

Permalink
refactor(rust): start further use of polars-compute in polars-parquet (
Browse files Browse the repository at this point in the history
  • Loading branch information
coastalwhite authored Jun 7, 2024
1 parent 1df442f commit 2e633dc
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 18 deletions.
20 changes: 4 additions & 16 deletions crates/polars-parquet/src/arrow/write/binview/basic.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use arrow::array::{Array, BinaryViewArray};
use polars_compute::min_max::MinMaxKernel;
use polars_error::PolarsResult;

use crate::parquet::encoding::delta_bitpacked;
use crate::parquet::schema::types::PrimitiveType;
use crate::parquet::statistics::{BinaryStatistics, ParquetStatistics};
use crate::read::schema::is_nullable;
use crate::write::binary::{encode_non_null_values, ord_binary};
use crate::write::binary::encode_non_null_values;
use crate::write::utils::invalid_encoding;
use crate::write::{utils, Encoding, Page, StatisticsOptions, WriteOptions};

Expand Down Expand Up @@ -77,7 +78,6 @@ pub fn array_to_page(
.map(Page::Data)
}

// TODO! speed this up
pub(crate) fn build_statistics(
array: &BinaryViewArray,
primitive_type: PrimitiveType,
Expand All @@ -89,23 +89,11 @@ pub(crate) fn build_statistics(
distinct_count: None,
max_value: options
.max_value
.then(|| {
array
.iter()
.flatten()
.max_by(|x, y| ord_binary(x, y))
.map(|x| x.to_vec())
})
.then(|| array.max_propagate_nan_kernel().map(<[u8]>::to_vec))
.flatten(),
min_value: options
.min_value
.then(|| {
array
.iter()
.flatten()
.min_by(|x, y| ord_binary(x, y))
.map(|x| x.to_vec())
})
.then(|| array.min_propagate_nan_kernel().map(<[u8]>::to_vec))
.flatten(),
}
.serialize()
Expand Down
5 changes: 3 additions & 2 deletions crates/polars-parquet/src/arrow/write/primitive/nested.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@ use crate::parquet::page::DataPage;
use crate::parquet::schema::types::PrimitiveType;
use crate::parquet::types::NativeType;

pub fn array_to_page<T, R>(
array: &PrimitiveArray<T>,
pub fn array_to_page<'a, T, R>(
array: &'a PrimitiveArray<T>,
options: WriteOptions,
type_: PrimitiveType,
nested: &[Nested],
) -> PolarsResult<DataPage>
where
PrimitiveArray<T>: polars_compute::min_max::MinMaxKernel<Scalar<'a> = T>,
T: ArrowNativeType,
R: NativeType,
T: num_traits::AsPrimitive<R>,
Expand Down

0 comments on commit 2e633dc

Please sign in to comment.