From b92e9d5106fa26dafb91f61405be21d8a6a1f74f Mon Sep 17 00:00:00 2001 From: eitsupi Date: Wed, 31 Jan 2024 13:30:20 +0000 Subject: [PATCH] refactor: remove unused funcs, comments, and move some code to the other file --- R/extendr-wrappers.R | 2 - src/rust/src/arrow_interop/mod.rs | 62 +++++++++++++++++++++ src/rust/src/arrow_interop/to_rust.rs | 78 +-------------------------- src/rust/src/rlib.rs | 20 ------- 4 files changed, 63 insertions(+), 99 deletions(-) diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 0d61aabfe..b46f22dab 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -38,8 +38,6 @@ as_struct <- function(exprs) .Call(wrap__as_struct, exprs) struct_ <- function(exprs, eager, schema) .Call(wrap__struct_, exprs, eager, schema) -rb_list_to_df <- function(r_batches, names) .Call(wrap__rb_list_to_df, r_batches, names) - dtype_str_repr <- function(dtype) .Call(wrap__dtype_str_repr, dtype) new_arrow_stream <- function() .Call(wrap__new_arrow_stream) diff --git a/src/rust/src/arrow_interop/mod.rs b/src/rust/src/arrow_interop/mod.rs index 59fac44e5..256ef94e0 100644 --- a/src/rust/src/arrow_interop/mod.rs +++ b/src/rust/src/arrow_interop/mod.rs @@ -1 +1,63 @@ pub mod to_rust; + +use extendr_api::prelude::*; +use std::result::Result; + +#[derive(Debug)] +pub enum RArrowArrayClass { + ArrowArray, + NanoArrowArray, +} + +impl<'a> FromRobj<'a> for RArrowArrayClass { + fn from_robj(robj: &Robj) -> std::result::Result { + if robj.inherits("nanoarrow_array") { + Ok(RArrowArrayClass::NanoArrowArray) + } else if robj.inherits("Array") { + Ok(RArrowArrayClass::ArrowArray) + } else { + Err("Robj does not inherit from Array or nanoarrow_array") + } + } +} + +#[derive(Debug)] +pub struct ArrowRPackage; +#[derive(Debug)] +pub struct NanoArrowRPackage; + +impl RArrowArrayClass { + pub fn get_package(&self) -> Box { + match self { + RArrowArrayClass::ArrowArray => Box::new(ArrowRPackage), + RArrowArrayClass::NanoArrowArray => Box::new(NanoArrowRPackage), + } + } +} + +pub trait RPackage { + fn get_export_array_func(&self) -> Result; +} + +impl RPackage for ArrowRPackage { + fn get_export_array_func(&self) -> Result { + R!(r#" + function(array, exportable_array, exportable_schema) { + array$export_to_c(exportable_array, exportable_schema) + }"#) + } +} + +impl RPackage for NanoArrowRPackage { + fn get_export_array_func(&self) -> Result { + R!(r#" + function(array, exportable_array, exportable_schema) { + nanoarrow::nanoarrow_pointer_export( + nanoarrow::infer_nanoarrow_schema(array), + exportable_schema + ) + nanoarrow::nanoarrow_pointer_export(array, exportable_array) + } + "#) + } +} diff --git a/src/rust/src/arrow_interop/to_rust.rs b/src/rust/src/arrow_interop/to_rust.rs index dd8a13b31..12822e8f3 100644 --- a/src/rust/src/arrow_interop/to_rust.rs +++ b/src/rust/src/arrow_interop/to_rust.rs @@ -8,66 +8,8 @@ use polars_core::utils::arrow::ffi; use polars_core::POOL; use std::result::Result; -#[derive(Debug)] -pub enum RArrowArrayClass { - ArrowArray, - NanoArrowArray, -} - -impl<'a> FromRobj<'a> for RArrowArrayClass { - fn from_robj(robj: &Robj) -> std::result::Result { - if robj.inherits("nanoarrow_array") { - Ok(RArrowArrayClass::NanoArrowArray) - } else if robj.inherits("Array") { - Ok(RArrowArrayClass::ArrowArray) - } else { - Err("Robj does not inherit from Array or nanoarrow_array") - } - } -} - -#[derive(Debug)] -pub struct ArrowRPackage; -#[derive(Debug)] -pub struct NanoArrowRPackage; - -impl RArrowArrayClass { - pub fn get_package(&self) -> Box { - match self { - RArrowArrayClass::ArrowArray => Box::new(ArrowRPackage), - RArrowArrayClass::NanoArrowArray => Box::new(NanoArrowRPackage), - } - } -} - -pub trait RPackage { - fn get_export_array_func(&self) -> Result; -} +use super::RArrowArrayClass; -impl RPackage for ArrowRPackage { - fn get_export_array_func(&self) -> Result { - R!(r#" - function(array, exportable_array, exportable_schema) { - array$export_to_c(exportable_array, exportable_schema) - }"#) - } -} - -impl RPackage for NanoArrowRPackage { - fn get_export_array_func(&self) -> Result { - R!(r#" - function(array, exportable_array, exportable_schema) { - nanoarrow::nanoarrow_pointer_export( - nanoarrow::infer_nanoarrow_schema(array), - exportable_schema - ) - nanoarrow::nanoarrow_pointer_export(array, exportable_array) - } - "#) - } -} - -//does not support chunked array pub fn arrow_array_to_rust(arrow_array: Robj) -> Result { let mut array = Box::new(ffi::ArrowArray::empty()); let mut schema = Box::new(ffi::ArrowSchema::empty()); @@ -87,31 +29,13 @@ pub fn arrow_array_to_rust(arrow_array: Robj) -> Result { let field = ffi::import_field_from_c(schema.as_ref()).map_err(|err| err.to_string())?; ffi::import_array_from_c(*array, field.data_type).map_err(|err| err.to_string())? }; - //dbg!(&array); Ok(array) } unsafe fn wrap_make_external_ptr(t: &mut T) -> Robj { - //use extendr_api::{Integers, Rinternals}; unsafe { ::make_external_ptr(t, r!(extendr_api::NULL)) } } -pub fn rb_to_rust_df(r_rb_columns: List, names: &[String]) -> Result { - let n_col = r_rb_columns.len(); - let col_iter = r_rb_columns - .into_iter() - .zip(names.iter()) - .map(|((_, r_array), str)| { - let arr = arrow_array_to_rust(r_array)?; - let s = ::try_from((str.as_str(), arr)) - .map_err(|err| err.to_string()); - s - }); - let s_vec_res = crate::utils::collect_hinted_result(n_col, col_iter); - - Ok(pl::DataFrame::new_no_checks(s_vec_res?)) -} - pub fn to_rust_df(rb: Robj) -> Result { let rb = rb.as_list().ok_or("arrow record batches is not a List")?; let get_columns_f = R!(r"\(x) x$columns")?.as_function().unwrap(); diff --git a/src/rust/src/rlib.rs b/src/rust/src/rlib.rs index 1fb7b3f55..a9369e6ea 100644 --- a/src/rust/src/rlib.rs +++ b/src/rust/src/rlib.rs @@ -169,22 +169,6 @@ unsafe fn export_df_to_arrow_stream(robj_df: Robj, robj_str: Robj) -> RResult) -> Result { - let mut iter = r_batches.into_iter().map(|(_, robj)| { - let robj = call!(r"\(x) x$columns", robj)?; - let l = robj.as_list().ok_or_else(|| "not a list!?".to_string())?; - crate::arrow_interop::to_rust::rb_to_rust_df(l, &names) - }); - let mut df_acc = iter - .next() - .unwrap_or_else(|| Ok(pl::DataFrame::default()))?; - for df in iter { - df_acc.vstack_mut(&df?).map_err(|err| err.to_string())?; - } - Ok(RPolarsDataFrame(df_acc)) -} - #[extendr] pub fn dtype_str_repr(dtype: Robj) -> RResult { let dtype = robj_to!(RPolarsDataType, dtype)?.0; @@ -328,10 +312,6 @@ extendr_module! { fn r_date_range_lazy; fn as_struct; fn struct_; - //fn field_to_rust2; - //fn series_from_arrow; - //fn rb_to_df; - fn rb_list_to_df; fn dtype_str_repr;