From 27b1571f862434e61af79dc9b933f44164fb089e Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 26 Apr 2024 17:11:31 +0200 Subject: [PATCH] details section --- R/io_parquet.R | 4 ++-- man/IO_read_parquet.Rd | 24 +++++++++++++++++++++++- man/IO_scan_parquet.Rd | 2 +- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/R/io_parquet.R b/R/io_parquet.R index 3f525ede7..677b2e40d 100644 --- a/R/io_parquet.R +++ b/R/io_parquet.R @@ -16,7 +16,7 @@ #' can be skipped from reading. #' @param storage_options Experimental. List of options necessary to scan #' parquet files from different cloud storage providers (GCP, AWS, Azure). -#' See the @details section. +#' See the 'Details' section. #' @rdname IO_scan_parquet #' @details #' ## Connecting to cloud providers @@ -89,7 +89,7 @@ pl_scan_parquet = function( #' Read a parquet file #' @rdname IO_read_parquet #' @inherit pl_read_csv return -#' @inheritParams pl_scan_parquet +#' @inherit pl_scan_parquet params details #' @examplesIf requireNamespace("arrow", quietly = TRUE) && arrow::arrow_with_dataset() && arrow::arrow_with_parquet() #' temp_dir = tempfile() #' # Write a hive-style partitioned parquet dataset diff --git a/man/IO_read_parquet.Rd b/man/IO_read_parquet.Rd index e420363ea..991d85e46 100644 --- a/man/IO_read_parquet.Rd +++ b/man/IO_read_parquet.Rd @@ -47,7 +47,7 @@ the final DataFrame into contiguous memory chunks.} \item{storage_options}{Experimental. List of options necessary to scan parquet files from different cloud storage providers (GCP, AWS, Azure). -See the @details section.} +See the 'Details' section.} \item{use_statistics}{Use statistics in the parquet file to determine if pages can be skipped from reading.} @@ -60,6 +60,28 @@ can be skipped from reading.} \description{ Read a parquet file } +\details{ +\subsection{Connecting to cloud providers}{ + +Polars supports scanning parquet files from different cloud providers. +The cloud providers currently supported are AWS, GCP, and Azure. +The supported keys to pass to the \code{storage_options} argument can be found +here: +\itemize{ +\item \href{https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html}{aws} +\item \href{https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html}{gcp} +\item \href{https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html}{azure} +} +\subsection{Implementation details}{ +\itemize{ +\item Currently it is impossible to scan public parquet files from GCP without +a valid service account. Be sure to always include a service account in the +\code{storage_options} argument. +} +} + +} +} \examples{ \dontshow{if (requireNamespace("arrow", quietly = TRUE) && arrow::arrow_with_dataset() && arrow::arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} temp_dir = tempfile() diff --git a/man/IO_scan_parquet.Rd b/man/IO_scan_parquet.Rd index 70bca6d5d..0cfcd0ccb 100644 --- a/man/IO_scan_parquet.Rd +++ b/man/IO_scan_parquet.Rd @@ -47,7 +47,7 @@ the final DataFrame into contiguous memory chunks.} \item{storage_options}{Experimental. List of options necessary to scan parquet files from different cloud storage providers (GCP, AWS, Azure). -See the @details section.} +See the 'Details' section.} \item{use_statistics}{Use statistics in the parquet file to determine if pages can be skipped from reading.}