From f0f010f27f2dcf129c765c9a477e98e548cf443a Mon Sep 17 00:00:00 2001 From: eitsupi Date: Tue, 31 Oct 2023 13:35:10 +0000 Subject: [PATCH] docs: add an example of reading hive-style partitioned parquet dataset --- R/parquet.R | 18 ++++++++++++++++-- man/IO_scan_parquet.Rd | 18 +++++++++++++++++- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/R/parquet.R b/R/parquet.R index 78d772510..f79cdb4f4 100644 --- a/R/parquet.R +++ b/R/parquet.R @@ -15,8 +15,22 @@ #' @return LazyFrame #' @name scan_parquet #' @rdname IO_scan_parquet -#' @examples -#' # TODO write parquet example +#' @examplesIf requireNamespace("arrow", quietly = TRUE) && arrow::arrow_with_dataset() && arrow::arrow_with_parquet() +#' temp_dir = tempfile() +#' # Write a hive-style partitioned parquet dataset +#' arrow::write_dataset( +#' mtcars, +#' temp_dir, +#' partitioning = c("cyl", "gear"), +#' format = "parquet", +#' hive_style = TRUE +#' ) +#' list.files(temp_dir, recursive = TRUE) +#' +#' # Read the dataset +#' pl$scan_parquet( +#' file.path(temp_dir, "**/*.parquet") +#' )$collect() pl$scan_parquet = function( file, # : str | Path, n_rows = NULL, # : int | None = None, diff --git a/man/IO_scan_parquet.Rd b/man/IO_scan_parquet.Rd index 6c5f10f17..7e7a69d61 100644 --- a/man/IO_scan_parquet.Rd +++ b/man/IO_scan_parquet.Rd @@ -30,6 +30,22 @@ LazyFrame Scan a parquet file } \examples{ -# TODO write parquet example +\dontshow{if (requireNamespace("arrow", quietly = TRUE) && arrow::arrow_with_dataset() && arrow::arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +temp_dir = tempfile() +# Write a hive-style partitioned parquet dataset +arrow::write_dataset( + mtcars, + temp_dir, + partitioning = c("cyl", "gear"), + format = "parquet", + hive_style = TRUE +) +list.files(temp_dir, recursive = TRUE) + +# Read the dataset +pl$scan_parquet( + file.path(temp_dir, "**/*.parquet") +)$collect() +\dontshow{\}) # examplesIf} } \keyword{LazyFrame_new}