From c0be6aac0b20f1e656ee05e97bcdf2af98de36c4 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 6 Oct 2023 13:21:43 +0200 Subject: [PATCH 01/25] start work on write_csv() --- R/dataframe__frame.R | 75 ++++++++++++++++++++++++++++++++++ R/extendr-wrappers.R | 10 +++++ R/utils.R | 6 +++ src/rust/src/rdataframe/mod.rs | 56 +++++++++++++++++++++++++ 4 files changed, 147 insertions(+) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 2fb60866e..23c3f7374 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -1599,3 +1599,78 @@ DataFrame_sample = function( ) |> unwrap("in $sample():") } + + + +#' Write to comma-separated values (CSV) file +#' +#' @param path File path to which the result should be written. +#' @param has_header Whether to include header in the CSV output. +#' @param separator Separate CSV fields with this symbol. +#' @param line_terminator String used to end each row. +#' @param quote Byte to use as quoting character. +#' @param batch_size Number of rows that will be processed per thread. +#' @param datetime_format A format string, with the specifiers defined by the +#' chrono Rust crate. If no format specified, the default fractional-second +#' precision is inferred from the maximum timeunit found in the frame’s Datetime +#' cols (if any). +#' @param date_format A format string, with the specifiers defined by the chrono +#' Rust crate. +#' @param time_format A format string, with the specifiers defined by the chrono +#' Rust crate. +#' @param float_precision Number of decimal places to write, applied to both +#' Float32 and Float64 datatypes. +#' @param null_values A string representing null values (defaulting to the empty +#' string). +#' @param quote_style Determines the quoting strategy used. +#' * "`necessary"` (default): This puts quotes around fields only when necessary. +#' They are necessary when fields contain a quote, delimiter or record +#' terminator. Quotes are also necessary when writing an empty record (which +#' is indistinguishable from a record with one empty field). This is the +#' default. +#' * `"always"`: This puts quotes around every field. +#' * `"non_numeric"`: This puts quotes around all fields that are non-numeric. +#' Namely, when writing a field that does not parse as a valid float or integer, +#' then quotes will be used even if they aren`t strictly necessary. + +# TODO: include "never" when bumping rust-polars to 0.34 +# * `"never"`: This never puts quotes around fields, even if that results in +# invalid CSV data (e.g.: by not quoting strings containing the separator). + +#' @return +#' This doesn't return anything but creates a CSV file. +#' @export +#' +#' @examples +DataFrame_write_csv = function( + path, + has_header = TRUE, + separator = ",", + line_terminator = "\n", + quote = '"', + batch_size = 1024, + datetime_format = NULL, + date_format = NULL, + time_format = NULL, + float_precision = NULL, + null_values = "", + quote_style = "necessary" +) { + + if (file_ext(path) != "csv") { + stop("Argument `path` must the path to a CSV file.") + } + + if (is.null(null_values)) { + stop("Argument `null_values` cannot be NULL.") + } + + .pr$DataFrame$write_csv( + self, + path, has_header, utf8ToInt(separator), line_terminator, utf8ToInt(quote), batch_size, + datetime_format, date_format, time_format, float_precision, + null_values, quote_style + ) |> + unwrap("in $write_csv():") |> + invisible() +} diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 91338ef9a..89fcc6af6 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -179,6 +179,16 @@ DataFrame$sample_n <- function(n, with_replacement, shuffle, seed) .Call(wrap__D DataFrame$sample_frac <- function(frac, with_replacement, shuffle, seed) .Call(wrap__DataFrame__sample_frac, self, frac, with_replacement, shuffle, seed) +DataFrame$write_csv <- function(path, has_header, separator, line_terminator, quote, batch_size, + datetime_format, date_format, time_format, float_precision, + null_values, quote_style) .Call(wrap__DataFrame__write_csv, self, path, + has_header, separator, + line_terminator, quote, + batch_size, + datetime_format, date_format, + time_format, float_precision, + null_values, quote_style) + #' @export `$.DataFrame` <- function (self, name) { func <- DataFrame[[name]]; environment(func) <- environment(); func } diff --git a/R/utils.R b/R/utils.R index 5e7f071bd..93f9982a7 100644 --- a/R/utils.R +++ b/R/utils.R @@ -634,3 +634,9 @@ is_bool = function(x) { dtypes_are_struct = function(dtypes) { sapply(dtypes, \(dt) pl$same_outer_dt(dt, pl$Struct())) } + +# from tools::file_ext() +file_ext <- function(x) { + pos <- regexpr("\\.([[:alnum:]]+)$", x) + ifelse(pos > -1L, substring(x, pos + 1L), "") +} diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index 63f54048d..cbfba14aa 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -12,6 +12,8 @@ use crate::rlib; use crate::robj_to; use crate::rpolarserr::{polars_to_rpolars_err, RResult}; +use polars::prelude::{CsvWriter, QuoteStyle, SerWriter}; + pub use lazy::dataframe::*; use crate::conversion_s_to_r::pl_series_to_list; @@ -420,7 +422,61 @@ impl DataFrame { .map_err(polars_to_rpolars_err) .map(DataFrame) } + + pub fn write_csv( + &mut self, + path: Robj, + has_header: Robj, + separator: Robj, + line_terminator: Robj, + quote: Robj, + batch_size: Robj, + datetime_format: Robj, + date_format: Robj, + time_format: Robj, + float_precision: Robj, + null_value: Robj, + quote_style: Robj, + ) -> List { + + let null = robj_to!(Option, String, null_value).unwrap_or_default().unwrap(); + let path = robj_to!(str, path).unwrap(); + let f = std::fs::File::create(path).unwrap(); + let qs = parse_quote_style(quote_style); + + let mut r = CsvWriter::new(f) + .has_header(robj_to!(bool, has_header).unwrap()) + .with_delimiter(robj_to!(u8, separator).unwrap()) + .with_line_terminator(robj_to!(String, line_terminator).unwrap()) + .with_quoting_char(robj_to!(u8, quote).unwrap()) + .with_batch_size(robj_to!(usize, batch_size).unwrap()) + .with_datetime_format(robj_to!(Option, String, datetime_format).unwrap()) + .with_date_format(robj_to!(Option, String, date_format).unwrap()) + .with_time_format(robj_to!(Option, String, time_format).unwrap()) + .with_float_precision(robj_to!(Option, usize, float_precision).unwrap()) + .with_null_value(null) + .with_quote_style(qs); + + let result = r + .finish(&mut self.0) + .map_err(polars_to_rpolars_err); + + r_result_list(result) + } } + +pub fn parse_quote_style(x: Robj) -> QuoteStyle { + match robj_to!(Option, String, x).unwrap_or_default().unwrap().as_str() { + "always" => QuoteStyle::Always, + "necessary" => QuoteStyle::Necessary, + "non_numeric" => QuoteStyle::NonNumeric, + // "never" was added in 0.34 + // "never" => QuoteStyle::Never, + _ => panic!("polars internal error: `quote_style` must be 'always', 'necessary' or 'non_numeric'.") + } +} + + impl DataFrame { pub fn to_list_result(&self) -> Result { //convert DataFrame to Result of to R vectors, error if DataType is not supported From 41397e4b135c4655a0cf18cdc3c6dd9de336b4d6 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 6 Oct 2023 15:00:19 +0200 Subject: [PATCH 02/25] add some checks and tests --- R/dataframe__frame.R | 9 ++++++ tests/testthat/test-csv.R | 67 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 23c3f7374..7489aec32 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -1640,8 +1640,12 @@ DataFrame_sample = function( #' @return #' This doesn't return anything but creates a CSV file. #' @export +#' @rdname IO_write_csv #' #' @examples +#' dat = pl$DataFrame(mtcars) +#' + DataFrame_write_csv = function( path, has_header = TRUE, @@ -1665,6 +1669,11 @@ DataFrame_write_csv = function( stop("Argument `null_values` cannot be NULL.") } + if (length(quote_style) == 0 || + !quote_style %in% c("always", "necessary", "non_numeric")) { + stop("Argument `quote_style` must be one of 'always', 'necessary', or 'non_numeric'.") + } + .pr$DataFrame$write_csv( self, path, has_header, utf8ToInt(separator), line_terminator, utf8ToInt(quote), batch_size, diff --git a/tests/testthat/test-csv.R b/tests/testthat/test-csv.R index 5fa7a0e76..87014ec1b 100644 --- a/tests/testthat/test-csv.R +++ b/tests/testthat/test-csv.R @@ -22,3 +22,70 @@ test_that("csv read iris", { iris ) }) + + +dat = mtcars +dat[c(1, 3, 9, 12), c(3, 4, 5)] = NA +dat_pl = pl$DataFrame(dat) +temp_noext = tempfile() +temp_out = tempfile(fileext = ".csv") + +test_that("write_csv: path works", { + expect_error( + dat_pl$write_csv(temp_noext), + "must the path to a CSV file" + ) + + dat_pl$write_csv(temp_out) + expect_identical( + pl$read_csv(temp_out)$to_data_frame(), + dat, + ignore_attr = TRUE # rownames are lost when writing / reading from CSV + ) +}) + +test_that("write_csv: null_values works", { + expect_error( + dat_pl$write_csv(temp_out, null_values = NULL), + "cannot be NULL" + ) + dat_pl$write_csv(temp_out, null_values = "hello") + tmp = pl$read_csv(temp_out)$to_data_frame() + expect_true(is.character(tmp$disp) && is.character(tmp$hp) && is.character(tmp$drat)) + expect_equal(tmp[1:2, "disp"], c("hello", "160.0")) +}) + + +test_that("write_csv: null_values works", { + dat_pl$write_csv(temp_out, separator = "|") + expect_identical( + pl$read_csv(temp_out, sep = "|")$to_data_frame(), + dat, + ignore_attr = TRUE # rownames are lost when writing / reading from CSV + ) +}) + +test_that("write_csv: quote_style and quote works", { + dat_pl2 = pl$DataFrame(iris) + + expect_error( + dat_pl2$write_csv(temp_out, quote_style = "foo"), + "must be one of" + ) + + dat_pl2$write_csv(temp_out, quote_style = "always", quote = "+") + expect_identical( + head(pl$read_csv(temp_out)$to_data_frame()[["+Sepal.Length+"]], n = 2), + c("+5.1+", "+4.9+") + ) + + dat_pl2$write_csv(temp_out, quote_style = "non_numeric", quote = "+") + expect_identical( + head(pl$read_csv(temp_out)$to_data_frame()[["+Sepal.Length+"]], n = 2), + c(5.1, 4.9) + ) + expect_identical( + head(pl$read_csv(temp_out)$to_data_frame()[["+Species+"]], n = 2), + c("+setosa+", "+setosa+") + ) +}) From fd27a9828b6f3c042d11858cf1a90ad421500573 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 6 Oct 2023 15:00:26 +0200 Subject: [PATCH 03/25] add some docs --- man/IO_write_csv.Rd | 74 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 man/IO_write_csv.Rd diff --git a/man/IO_write_csv.Rd b/man/IO_write_csv.Rd new file mode 100644 index 000000000..d20dee475 --- /dev/null +++ b/man/IO_write_csv.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_write_csv} +\alias{DataFrame_write_csv} +\title{Write to comma-separated values (CSV) file} +\usage{ +DataFrame_write_csv( + path, + has_header = TRUE, + separator = ",", + line_terminator = "\\n", + quote = "\\"", + batch_size = 1024, + datetime_format = NULL, + date_format = NULL, + time_format = NULL, + float_precision = NULL, + null_values = "", + quote_style = "necessary" +) +} +\arguments{ +\item{path}{File path to which the result should be written.} + +\item{has_header}{Whether to include header in the CSV output.} + +\item{separator}{Separate CSV fields with this symbol.} + +\item{line_terminator}{String used to end each row.} + +\item{quote}{Byte to use as quoting character.} + +\item{batch_size}{Number of rows that will be processed per thread.} + +\item{datetime_format}{A format string, with the specifiers defined by the +chrono Rust crate. If no format specified, the default fractional-second +precision is inferred from the maximum timeunit found in the frame’s Datetime +cols (if any).} + +\item{date_format}{A format string, with the specifiers defined by the chrono +Rust crate.} + +\item{time_format}{A format string, with the specifiers defined by the chrono +Rust crate.} + +\item{float_precision}{Number of decimal places to write, applied to both +Float32 and Float64 datatypes.} + +\item{null_values}{A string representing null values (defaulting to the empty +string).} + +\item{quote_style}{Determines the quoting strategy used. +\itemize{ +\item "\verb{necessary"} (default): This puts quotes around fields only when necessary. +They are necessary when fields contain a quote, delimiter or record +terminator. Quotes are also necessary when writing an empty record (which +is indistinguishable from a record with one empty field). This is the +default. +\item \code{"always"}: This puts quotes around every field. +\item \code{"non_numeric"}: This puts quotes around all fields that are non-numeric. +Namely, when writing a field that does not parse as a valid float or integer, +then quotes will be used even if they aren`t strictly necessary. +}} +} +\value{ +This doesn't return anything but creates a CSV file. +} +\description{ +Write to comma-separated values (CSV) file +} +\examples{ +dat = pl$DataFrame(mtcars) + +} From a05e98fedfe5d38131a330e4a1386b7c193be1bc Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 6 Oct 2023 15:10:28 +0200 Subject: [PATCH 04/25] simplify rust side --- NAMESPACE | 1 + src/rust/src/rdataframe/mod.rs | 8 +++----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index e4235fb6f..8fe0a0b22 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -151,6 +151,7 @@ S3method(tail,LazyFrame) S3method(unique,DataFrame) S3method(unique,LazyFrame) export(.pr) +export(DataFrame_write_csv) export(as_polars_series) export(knit_print.DataFrame) export(pl) diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index cbfba14aa..6c4a5c885 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -437,7 +437,7 @@ impl DataFrame { float_precision: Robj, null_value: Robj, quote_style: Robj, - ) -> List { + ) -> RResult<()> { let null = robj_to!(Option, String, null_value).unwrap_or_default().unwrap(); let path = robj_to!(str, path).unwrap(); @@ -457,11 +457,9 @@ impl DataFrame { .with_null_value(null) .with_quote_style(qs); - let result = r + r .finish(&mut self.0) - .map_err(polars_to_rpolars_err); - - r_result_list(result) + .map_err(polars_to_rpolars_err) } } From 769ee80d53f75ec5c4dd6ede61623aac68721e91 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 6 Oct 2023 18:12:08 +0200 Subject: [PATCH 05/25] simplify rust side --- src/rust/src/rdataframe/mod.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index 6c4a5c885..9eb65ecf9 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -444,7 +444,7 @@ impl DataFrame { let f = std::fs::File::create(path).unwrap(); let qs = parse_quote_style(quote_style); - let mut r = CsvWriter::new(f) + CsvWriter::new(f) .has_header(robj_to!(bool, has_header).unwrap()) .with_delimiter(robj_to!(u8, separator).unwrap()) .with_line_terminator(robj_to!(String, line_terminator).unwrap()) @@ -455,9 +455,7 @@ impl DataFrame { .with_time_format(robj_to!(Option, String, time_format).unwrap()) .with_float_precision(robj_to!(Option, usize, float_precision).unwrap()) .with_null_value(null) - .with_quote_style(qs); - - r + .with_quote_style(qs) .finish(&mut self.0) .map_err(polars_to_rpolars_err) } @@ -468,7 +466,7 @@ pub fn parse_quote_style(x: Robj) -> QuoteStyle { "always" => QuoteStyle::Always, "necessary" => QuoteStyle::Necessary, "non_numeric" => QuoteStyle::NonNumeric, - // "never" was added in 0.34 + // "never" is available in rust-polars devel only for now (will be added in 0.34) // "never" => QuoteStyle::Never, _ => panic!("polars internal error: `quote_style` must be 'always', 'necessary' or 'non_numeric'.") } From 296cec110d988a70fa7dd35da04cf934bfcbaa03 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Mon, 9 Oct 2023 09:52:06 +0200 Subject: [PATCH 06/25] remove null_value check on R side --- R/dataframe__frame.R | 4 ---- src/rust/src/rdataframe/mod.rs | 6 +++--- tests/testthat/test-csv.R | 5 ++--- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 7489aec32..d658ea8f4 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -1665,10 +1665,6 @@ DataFrame_write_csv = function( stop("Argument `path` must the path to a CSV file.") } - if (is.null(null_values)) { - stop("Argument `null_values` cannot be NULL.") - } - if (length(quote_style) == 0 || !quote_style %in% c("always", "necessary", "non_numeric")) { stop("Argument `quote_style` must be one of 'always', 'necessary', or 'non_numeric'.") diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index 9eb65ecf9..34c09ebac 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -436,13 +436,13 @@ impl DataFrame { time_format: Robj, float_precision: Robj, null_value: Robj, - quote_style: Robj, + quote_style: Robj, ) -> RResult<()> { - let null = robj_to!(Option, String, null_value).unwrap_or_default().unwrap(); + let null = robj_to!(String, null_value).unwrap(); let path = robj_to!(str, path).unwrap(); let f = std::fs::File::create(path).unwrap(); - let qs = parse_quote_style(quote_style); + let qs = parse_quote_style(quote_style); CsvWriter::new(f) .has_header(robj_to!(bool, has_header).unwrap()) diff --git a/tests/testthat/test-csv.R b/tests/testthat/test-csv.R index 87014ec1b..6e2c69469 100644 --- a/tests/testthat/test-csv.R +++ b/tests/testthat/test-csv.R @@ -46,8 +46,7 @@ test_that("write_csv: path works", { test_that("write_csv: null_values works", { expect_error( - dat_pl$write_csv(temp_out, null_values = NULL), - "cannot be NULL" + dat_pl$write_csv(temp_out, null_values = NULL) ) dat_pl$write_csv(temp_out, null_values = "hello") tmp = pl$read_csv(temp_out)$to_data_frame() @@ -56,7 +55,7 @@ test_that("write_csv: null_values works", { }) -test_that("write_csv: null_values works", { +test_that("write_csv: separator works", { dat_pl$write_csv(temp_out, separator = "|") expect_identical( pl$read_csv(temp_out, sep = "|")$to_data_frame(), From 3861b30a5f583f4731bf79e4a01589d3801f1bcb Mon Sep 17 00:00:00 2001 From: eitsupi Date: Mon, 16 Oct 2023 14:26:01 +0000 Subject: [PATCH 07/25] auto formatting by `make all` --- R/extendr-wrappers.R | 10 +--------- src/rust/src/rdataframe/mod.rs | 12 ++++++++---- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 1d12ef79a..17e1e7bef 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -189,15 +189,7 @@ DataFrame$sample_n <- function(n, with_replacement, shuffle, seed) .Call(wrap__D DataFrame$sample_frac <- function(frac, with_replacement, shuffle, seed) .Call(wrap__DataFrame__sample_frac, self, frac, with_replacement, shuffle, seed) -DataFrame$write_csv <- function(path, has_header, separator, line_terminator, quote, batch_size, - datetime_format, date_format, time_format, float_precision, - null_values, quote_style) .Call(wrap__DataFrame__write_csv, self, path, - has_header, separator, - line_terminator, quote, - batch_size, - datetime_format, date_format, - time_format, float_precision, - null_values, quote_style) +DataFrame$write_csv <- function(path, has_header, separator, line_terminator, quote, batch_size, datetime_format, date_format, time_format, float_precision, null_value, quote_style) .Call(wrap__DataFrame__write_csv, self, path, has_header, separator, line_terminator, quote, batch_size, datetime_format, date_format, time_format, float_precision, null_value, quote_style) #' @export `$.DataFrame` <- function (self, name) { func <- DataFrame[[name]]; environment(func) <- environment(); func } diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index b79a8e982..232af2078 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -460,7 +460,6 @@ impl DataFrame { null_value: Robj, quote_style: Robj, ) -> RResult<()> { - let null = robj_to!(String, null_value).unwrap(); let path = robj_to!(str, path).unwrap(); let f = std::fs::File::create(path).unwrap(); @@ -484,17 +483,22 @@ impl DataFrame { } pub fn parse_quote_style(x: Robj) -> QuoteStyle { - match robj_to!(Option, String, x).unwrap_or_default().unwrap().as_str() { + match robj_to!(Option, String, x) + .unwrap_or_default() + .unwrap() + .as_str() + { "always" => QuoteStyle::Always, "necessary" => QuoteStyle::Necessary, "non_numeric" => QuoteStyle::NonNumeric, // "never" is available in rust-polars devel only for now (will be added in 0.34) // "never" => QuoteStyle::Never, - _ => panic!("polars internal error: `quote_style` must be 'always', 'necessary' or 'non_numeric'.") + _ => panic!( + "polars internal error: `quote_style` must be 'always', 'necessary' or 'non_numeric'." + ), } } - impl DataFrame { pub fn to_list_result(&self) -> Result { //convert DataFrame to Result of to R vectors, error if DataType is not supported From 6c221fe0611752e7a6b182727ef3b1fb0745f6d1 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Mon, 16 Oct 2023 14:37:52 +0000 Subject: [PATCH 08/25] test: use snapshot tests --- tests/testthat/_snaps/csv.md | 392 +++++++++++++++++++++++++++++++++++ tests/testthat/test-csv.R | 24 +-- 2 files changed, 396 insertions(+), 20 deletions(-) create mode 100644 tests/testthat/_snaps/csv.md diff --git a/tests/testthat/_snaps/csv.md b/tests/testthat/_snaps/csv.md new file mode 100644 index 000000000..4cde591fe --- /dev/null +++ b/tests/testthat/_snaps/csv.md @@ -0,0 +1,392 @@ +# write_csv: null_values works + + Code + cat(readLines(temp_out), sep = "\n") + Output + mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb + 21.0,6.0,hello,hello,hello,2.62,16.46,0.0,1.0,4.0,4.0 + 21.0,6.0,160.0,110.0,3.9,2.875,17.02,0.0,1.0,4.0,4.0 + 22.8,4.0,hello,hello,hello,2.32,18.61,1.0,1.0,4.0,1.0 + 21.4,6.0,258.0,110.0,3.08,3.215,19.44,1.0,0.0,3.0,1.0 + 18.7,8.0,360.0,175.0,3.15,3.44,17.02,0.0,0.0,3.0,2.0 + 18.1,6.0,225.0,105.0,2.76,3.46,20.22,1.0,0.0,3.0,1.0 + 14.3,8.0,360.0,245.0,3.21,3.57,15.84,0.0,0.0,3.0,4.0 + 24.4,4.0,146.7,62.0,3.69,3.19,20.0,1.0,0.0,4.0,2.0 + 22.8,4.0,hello,hello,hello,3.15,22.9,1.0,0.0,4.0,2.0 + 19.2,6.0,167.6,123.0,3.92,3.44,18.3,1.0,0.0,4.0,4.0 + 17.8,6.0,167.6,123.0,3.92,3.44,18.9,1.0,0.0,4.0,4.0 + 16.4,8.0,hello,hello,hello,4.07,17.4,0.0,0.0,3.0,3.0 + 17.3,8.0,275.8,180.0,3.07,3.73,17.6,0.0,0.0,3.0,3.0 + 15.2,8.0,275.8,180.0,3.07,3.78,18.0,0.0,0.0,3.0,3.0 + 10.4,8.0,472.0,205.0,2.93,5.25,17.98,0.0,0.0,3.0,4.0 + 10.4,8.0,460.0,215.0,3.0,5.424,17.82,0.0,0.0,3.0,4.0 + 14.7,8.0,440.0,230.0,3.23,5.345,17.42,0.0,0.0,3.0,4.0 + 32.4,4.0,78.7,66.0,4.08,2.2,19.47,1.0,1.0,4.0,1.0 + 30.4,4.0,75.7,52.0,4.93,1.615,18.52,1.0,1.0,4.0,2.0 + 33.9,4.0,71.1,65.0,4.22,1.835,19.9,1.0,1.0,4.0,1.0 + 21.5,4.0,120.1,97.0,3.7,2.465,20.01,1.0,0.0,3.0,1.0 + 15.5,8.0,318.0,150.0,2.76,3.52,16.87,0.0,0.0,3.0,2.0 + 15.2,8.0,304.0,150.0,3.15,3.435,17.3,0.0,0.0,3.0,2.0 + 13.3,8.0,350.0,245.0,3.73,3.84,15.41,0.0,0.0,3.0,4.0 + 19.2,8.0,400.0,175.0,3.08,3.845,17.05,0.0,0.0,3.0,2.0 + 27.3,4.0,79.0,66.0,4.08,1.935,18.9,1.0,1.0,4.0,1.0 + 26.0,4.0,120.3,91.0,4.43,2.14,16.7,0.0,1.0,5.0,2.0 + 30.4,4.0,95.1,113.0,3.77,1.513,16.9,1.0,1.0,5.0,2.0 + 15.8,8.0,351.0,264.0,4.22,3.17,14.5,0.0,1.0,5.0,4.0 + 19.7,6.0,145.0,175.0,3.62,2.77,15.5,0.0,1.0,5.0,6.0 + 15.0,8.0,301.0,335.0,3.54,3.57,14.6,0.0,1.0,5.0,8.0 + 21.4,4.0,121.0,109.0,4.11,2.78,18.6,1.0,1.0,4.0,2.0 + +# write_csv: separator works + + Code + cat(readLines(temp_out), sep = "\n") + Output + mpg|cyl|disp|hp|drat|wt|qsec|vs|am|gear|carb + 21.0|6.0||||2.62|16.46|0.0|1.0|4.0|4.0 + 21.0|6.0|160.0|110.0|3.9|2.875|17.02|0.0|1.0|4.0|4.0 + 22.8|4.0||||2.32|18.61|1.0|1.0|4.0|1.0 + 21.4|6.0|258.0|110.0|3.08|3.215|19.44|1.0|0.0|3.0|1.0 + 18.7|8.0|360.0|175.0|3.15|3.44|17.02|0.0|0.0|3.0|2.0 + 18.1|6.0|225.0|105.0|2.76|3.46|20.22|1.0|0.0|3.0|1.0 + 14.3|8.0|360.0|245.0|3.21|3.57|15.84|0.0|0.0|3.0|4.0 + 24.4|4.0|146.7|62.0|3.69|3.19|20.0|1.0|0.0|4.0|2.0 + 22.8|4.0||||3.15|22.9|1.0|0.0|4.0|2.0 + 19.2|6.0|167.6|123.0|3.92|3.44|18.3|1.0|0.0|4.0|4.0 + 17.8|6.0|167.6|123.0|3.92|3.44|18.9|1.0|0.0|4.0|4.0 + 16.4|8.0||||4.07|17.4|0.0|0.0|3.0|3.0 + 17.3|8.0|275.8|180.0|3.07|3.73|17.6|0.0|0.0|3.0|3.0 + 15.2|8.0|275.8|180.0|3.07|3.78|18.0|0.0|0.0|3.0|3.0 + 10.4|8.0|472.0|205.0|2.93|5.25|17.98|0.0|0.0|3.0|4.0 + 10.4|8.0|460.0|215.0|3.0|5.424|17.82|0.0|0.0|3.0|4.0 + 14.7|8.0|440.0|230.0|3.23|5.345|17.42|0.0|0.0|3.0|4.0 + 32.4|4.0|78.7|66.0|4.08|2.2|19.47|1.0|1.0|4.0|1.0 + 30.4|4.0|75.7|52.0|4.93|1.615|18.52|1.0|1.0|4.0|2.0 + 33.9|4.0|71.1|65.0|4.22|1.835|19.9|1.0|1.0|4.0|1.0 + 21.5|4.0|120.1|97.0|3.7|2.465|20.01|1.0|0.0|3.0|1.0 + 15.5|8.0|318.0|150.0|2.76|3.52|16.87|0.0|0.0|3.0|2.0 + 15.2|8.0|304.0|150.0|3.15|3.435|17.3|0.0|0.0|3.0|2.0 + 13.3|8.0|350.0|245.0|3.73|3.84|15.41|0.0|0.0|3.0|4.0 + 19.2|8.0|400.0|175.0|3.08|3.845|17.05|0.0|0.0|3.0|2.0 + 27.3|4.0|79.0|66.0|4.08|1.935|18.9|1.0|1.0|4.0|1.0 + 26.0|4.0|120.3|91.0|4.43|2.14|16.7|0.0|1.0|5.0|2.0 + 30.4|4.0|95.1|113.0|3.77|1.513|16.9|1.0|1.0|5.0|2.0 + 15.8|8.0|351.0|264.0|4.22|3.17|14.5|0.0|1.0|5.0|4.0 + 19.7|6.0|145.0|175.0|3.62|2.77|15.5|0.0|1.0|5.0|6.0 + 15.0|8.0|301.0|335.0|3.54|3.57|14.6|0.0|1.0|5.0|8.0 + 21.4|4.0|121.0|109.0|4.11|2.78|18.6|1.0|1.0|4.0|2.0 + +# write_csv: quote_style and quote works + + Code + cat(readLines(temp_out), sep = "\n") + Output + +Sepal.Length+,+Sepal.Width+,+Petal.Length+,+Petal.Width+,+Species+ + +5.1+,+3.5+,+1.4+,+0.2+,+setosa+ + +4.9+,+3.0+,+1.4+,+0.2+,+setosa+ + +4.7+,+3.2+,+1.3+,+0.2+,+setosa+ + +4.6+,+3.1+,+1.5+,+0.2+,+setosa+ + +5.0+,+3.6+,+1.4+,+0.2+,+setosa+ + +5.4+,+3.9+,+1.7+,+0.4+,+setosa+ + +4.6+,+3.4+,+1.4+,+0.3+,+setosa+ + +5.0+,+3.4+,+1.5+,+0.2+,+setosa+ + +4.4+,+2.9+,+1.4+,+0.2+,+setosa+ + +4.9+,+3.1+,+1.5+,+0.1+,+setosa+ + +5.4+,+3.7+,+1.5+,+0.2+,+setosa+ + +4.8+,+3.4+,+1.6+,+0.2+,+setosa+ + +4.8+,+3.0+,+1.4+,+0.1+,+setosa+ + +4.3+,+3.0+,+1.1+,+0.1+,+setosa+ + +5.8+,+4.0+,+1.2+,+0.2+,+setosa+ + +5.7+,+4.4+,+1.5+,+0.4+,+setosa+ + +5.4+,+3.9+,+1.3+,+0.4+,+setosa+ + +5.1+,+3.5+,+1.4+,+0.3+,+setosa+ + +5.7+,+3.8+,+1.7+,+0.3+,+setosa+ + +5.1+,+3.8+,+1.5+,+0.3+,+setosa+ + +5.4+,+3.4+,+1.7+,+0.2+,+setosa+ + +5.1+,+3.7+,+1.5+,+0.4+,+setosa+ + +4.6+,+3.6+,+1.0+,+0.2+,+setosa+ + +5.1+,+3.3+,+1.7+,+0.5+,+setosa+ + +4.8+,+3.4+,+1.9+,+0.2+,+setosa+ + +5.0+,+3.0+,+1.6+,+0.2+,+setosa+ + +5.0+,+3.4+,+1.6+,+0.4+,+setosa+ + +5.2+,+3.5+,+1.5+,+0.2+,+setosa+ + +5.2+,+3.4+,+1.4+,+0.2+,+setosa+ + +4.7+,+3.2+,+1.6+,+0.2+,+setosa+ + +4.8+,+3.1+,+1.6+,+0.2+,+setosa+ + +5.4+,+3.4+,+1.5+,+0.4+,+setosa+ + +5.2+,+4.1+,+1.5+,+0.1+,+setosa+ + +5.5+,+4.2+,+1.4+,+0.2+,+setosa+ + +4.9+,+3.1+,+1.5+,+0.2+,+setosa+ + +5.0+,+3.2+,+1.2+,+0.2+,+setosa+ + +5.5+,+3.5+,+1.3+,+0.2+,+setosa+ + +4.9+,+3.6+,+1.4+,+0.1+,+setosa+ + +4.4+,+3.0+,+1.3+,+0.2+,+setosa+ + +5.1+,+3.4+,+1.5+,+0.2+,+setosa+ + +5.0+,+3.5+,+1.3+,+0.3+,+setosa+ + +4.5+,+2.3+,+1.3+,+0.3+,+setosa+ + +4.4+,+3.2+,+1.3+,+0.2+,+setosa+ + +5.0+,+3.5+,+1.6+,+0.6+,+setosa+ + +5.1+,+3.8+,+1.9+,+0.4+,+setosa+ + +4.8+,+3.0+,+1.4+,+0.3+,+setosa+ + +5.1+,+3.8+,+1.6+,+0.2+,+setosa+ + +4.6+,+3.2+,+1.4+,+0.2+,+setosa+ + +5.3+,+3.7+,+1.5+,+0.2+,+setosa+ + +5.0+,+3.3+,+1.4+,+0.2+,+setosa+ + +7.0+,+3.2+,+4.7+,+1.4+,+versicolor+ + +6.4+,+3.2+,+4.5+,+1.5+,+versicolor+ + +6.9+,+3.1+,+4.9+,+1.5+,+versicolor+ + +5.5+,+2.3+,+4.0+,+1.3+,+versicolor+ + +6.5+,+2.8+,+4.6+,+1.5+,+versicolor+ + +5.7+,+2.8+,+4.5+,+1.3+,+versicolor+ + +6.3+,+3.3+,+4.7+,+1.6+,+versicolor+ + +4.9+,+2.4+,+3.3+,+1.0+,+versicolor+ + +6.6+,+2.9+,+4.6+,+1.3+,+versicolor+ + +5.2+,+2.7+,+3.9+,+1.4+,+versicolor+ + +5.0+,+2.0+,+3.5+,+1.0+,+versicolor+ + +5.9+,+3.0+,+4.2+,+1.5+,+versicolor+ + +6.0+,+2.2+,+4.0+,+1.0+,+versicolor+ + +6.1+,+2.9+,+4.7+,+1.4+,+versicolor+ + +5.6+,+2.9+,+3.6+,+1.3+,+versicolor+ + +6.7+,+3.1+,+4.4+,+1.4+,+versicolor+ + +5.6+,+3.0+,+4.5+,+1.5+,+versicolor+ + +5.8+,+2.7+,+4.1+,+1.0+,+versicolor+ + +6.2+,+2.2+,+4.5+,+1.5+,+versicolor+ + +5.6+,+2.5+,+3.9+,+1.1+,+versicolor+ + +5.9+,+3.2+,+4.8+,+1.8+,+versicolor+ + +6.1+,+2.8+,+4.0+,+1.3+,+versicolor+ + +6.3+,+2.5+,+4.9+,+1.5+,+versicolor+ + +6.1+,+2.8+,+4.7+,+1.2+,+versicolor+ + +6.4+,+2.9+,+4.3+,+1.3+,+versicolor+ + +6.6+,+3.0+,+4.4+,+1.4+,+versicolor+ + +6.8+,+2.8+,+4.8+,+1.4+,+versicolor+ + +6.7+,+3.0+,+5.0+,+1.7+,+versicolor+ + +6.0+,+2.9+,+4.5+,+1.5+,+versicolor+ + +5.7+,+2.6+,+3.5+,+1.0+,+versicolor+ + +5.5+,+2.4+,+3.8+,+1.1+,+versicolor+ + +5.5+,+2.4+,+3.7+,+1.0+,+versicolor+ + +5.8+,+2.7+,+3.9+,+1.2+,+versicolor+ + +6.0+,+2.7+,+5.1+,+1.6+,+versicolor+ + +5.4+,+3.0+,+4.5+,+1.5+,+versicolor+ + +6.0+,+3.4+,+4.5+,+1.6+,+versicolor+ + +6.7+,+3.1+,+4.7+,+1.5+,+versicolor+ + +6.3+,+2.3+,+4.4+,+1.3+,+versicolor+ + +5.6+,+3.0+,+4.1+,+1.3+,+versicolor+ + +5.5+,+2.5+,+4.0+,+1.3+,+versicolor+ + +5.5+,+2.6+,+4.4+,+1.2+,+versicolor+ + +6.1+,+3.0+,+4.6+,+1.4+,+versicolor+ + +5.8+,+2.6+,+4.0+,+1.2+,+versicolor+ + +5.0+,+2.3+,+3.3+,+1.0+,+versicolor+ + +5.6+,+2.7+,+4.2+,+1.3+,+versicolor+ + +5.7+,+3.0+,+4.2+,+1.2+,+versicolor+ + +5.7+,+2.9+,+4.2+,+1.3+,+versicolor+ + +6.2+,+2.9+,+4.3+,+1.3+,+versicolor+ + +5.1+,+2.5+,+3.0+,+1.1+,+versicolor+ + +5.7+,+2.8+,+4.1+,+1.3+,+versicolor+ + +6.3+,+3.3+,+6.0+,+2.5+,+virginica+ + +5.8+,+2.7+,+5.1+,+1.9+,+virginica+ + +7.1+,+3.0+,+5.9+,+2.1+,+virginica+ + +6.3+,+2.9+,+5.6+,+1.8+,+virginica+ + +6.5+,+3.0+,+5.8+,+2.2+,+virginica+ + +7.6+,+3.0+,+6.6+,+2.1+,+virginica+ + +4.9+,+2.5+,+4.5+,+1.7+,+virginica+ + +7.3+,+2.9+,+6.3+,+1.8+,+virginica+ + +6.7+,+2.5+,+5.8+,+1.8+,+virginica+ + +7.2+,+3.6+,+6.1+,+2.5+,+virginica+ + +6.5+,+3.2+,+5.1+,+2.0+,+virginica+ + +6.4+,+2.7+,+5.3+,+1.9+,+virginica+ + +6.8+,+3.0+,+5.5+,+2.1+,+virginica+ + +5.7+,+2.5+,+5.0+,+2.0+,+virginica+ + +5.8+,+2.8+,+5.1+,+2.4+,+virginica+ + +6.4+,+3.2+,+5.3+,+2.3+,+virginica+ + +6.5+,+3.0+,+5.5+,+1.8+,+virginica+ + +7.7+,+3.8+,+6.7+,+2.2+,+virginica+ + +7.7+,+2.6+,+6.9+,+2.3+,+virginica+ + +6.0+,+2.2+,+5.0+,+1.5+,+virginica+ + +6.9+,+3.2+,+5.7+,+2.3+,+virginica+ + +5.6+,+2.8+,+4.9+,+2.0+,+virginica+ + +7.7+,+2.8+,+6.7+,+2.0+,+virginica+ + +6.3+,+2.7+,+4.9+,+1.8+,+virginica+ + +6.7+,+3.3+,+5.7+,+2.1+,+virginica+ + +7.2+,+3.2+,+6.0+,+1.8+,+virginica+ + +6.2+,+2.8+,+4.8+,+1.8+,+virginica+ + +6.1+,+3.0+,+4.9+,+1.8+,+virginica+ + +6.4+,+2.8+,+5.6+,+2.1+,+virginica+ + +7.2+,+3.0+,+5.8+,+1.6+,+virginica+ + +7.4+,+2.8+,+6.1+,+1.9+,+virginica+ + +7.9+,+3.8+,+6.4+,+2.0+,+virginica+ + +6.4+,+2.8+,+5.6+,+2.2+,+virginica+ + +6.3+,+2.8+,+5.1+,+1.5+,+virginica+ + +6.1+,+2.6+,+5.6+,+1.4+,+virginica+ + +7.7+,+3.0+,+6.1+,+2.3+,+virginica+ + +6.3+,+3.4+,+5.6+,+2.4+,+virginica+ + +6.4+,+3.1+,+5.5+,+1.8+,+virginica+ + +6.0+,+3.0+,+4.8+,+1.8+,+virginica+ + +6.9+,+3.1+,+5.4+,+2.1+,+virginica+ + +6.7+,+3.1+,+5.6+,+2.4+,+virginica+ + +6.9+,+3.1+,+5.1+,+2.3+,+virginica+ + +5.8+,+2.7+,+5.1+,+1.9+,+virginica+ + +6.8+,+3.2+,+5.9+,+2.3+,+virginica+ + +6.7+,+3.3+,+5.7+,+2.5+,+virginica+ + +6.7+,+3.0+,+5.2+,+2.3+,+virginica+ + +6.3+,+2.5+,+5.0+,+1.9+,+virginica+ + +6.5+,+3.0+,+5.2+,+2.0+,+virginica+ + +6.2+,+3.4+,+5.4+,+2.3+,+virginica+ + +5.9+,+3.0+,+5.1+,+1.8+,+virginica+ + +--- + + Code + cat(readLines(temp_out), sep = "\n") + Output + +Sepal.Length+,+Sepal.Width+,+Petal.Length+,+Petal.Width+,+Species+ + 5.1,3.5,1.4,0.2,+setosa+ + 4.9,3.0,1.4,0.2,+setosa+ + 4.7,3.2,1.3,0.2,+setosa+ + 4.6,3.1,1.5,0.2,+setosa+ + 5.0,3.6,1.4,0.2,+setosa+ + 5.4,3.9,1.7,0.4,+setosa+ + 4.6,3.4,1.4,0.3,+setosa+ + 5.0,3.4,1.5,0.2,+setosa+ + 4.4,2.9,1.4,0.2,+setosa+ + 4.9,3.1,1.5,0.1,+setosa+ + 5.4,3.7,1.5,0.2,+setosa+ + 4.8,3.4,1.6,0.2,+setosa+ + 4.8,3.0,1.4,0.1,+setosa+ + 4.3,3.0,1.1,0.1,+setosa+ + 5.8,4.0,1.2,0.2,+setosa+ + 5.7,4.4,1.5,0.4,+setosa+ + 5.4,3.9,1.3,0.4,+setosa+ + 5.1,3.5,1.4,0.3,+setosa+ + 5.7,3.8,1.7,0.3,+setosa+ + 5.1,3.8,1.5,0.3,+setosa+ + 5.4,3.4,1.7,0.2,+setosa+ + 5.1,3.7,1.5,0.4,+setosa+ + 4.6,3.6,1.0,0.2,+setosa+ + 5.1,3.3,1.7,0.5,+setosa+ + 4.8,3.4,1.9,0.2,+setosa+ + 5.0,3.0,1.6,0.2,+setosa+ + 5.0,3.4,1.6,0.4,+setosa+ + 5.2,3.5,1.5,0.2,+setosa+ + 5.2,3.4,1.4,0.2,+setosa+ + 4.7,3.2,1.6,0.2,+setosa+ + 4.8,3.1,1.6,0.2,+setosa+ + 5.4,3.4,1.5,0.4,+setosa+ + 5.2,4.1,1.5,0.1,+setosa+ + 5.5,4.2,1.4,0.2,+setosa+ + 4.9,3.1,1.5,0.2,+setosa+ + 5.0,3.2,1.2,0.2,+setosa+ + 5.5,3.5,1.3,0.2,+setosa+ + 4.9,3.6,1.4,0.1,+setosa+ + 4.4,3.0,1.3,0.2,+setosa+ + 5.1,3.4,1.5,0.2,+setosa+ + 5.0,3.5,1.3,0.3,+setosa+ + 4.5,2.3,1.3,0.3,+setosa+ + 4.4,3.2,1.3,0.2,+setosa+ + 5.0,3.5,1.6,0.6,+setosa+ + 5.1,3.8,1.9,0.4,+setosa+ + 4.8,3.0,1.4,0.3,+setosa+ + 5.1,3.8,1.6,0.2,+setosa+ + 4.6,3.2,1.4,0.2,+setosa+ + 5.3,3.7,1.5,0.2,+setosa+ + 5.0,3.3,1.4,0.2,+setosa+ + 7.0,3.2,4.7,1.4,+versicolor+ + 6.4,3.2,4.5,1.5,+versicolor+ + 6.9,3.1,4.9,1.5,+versicolor+ + 5.5,2.3,4.0,1.3,+versicolor+ + 6.5,2.8,4.6,1.5,+versicolor+ + 5.7,2.8,4.5,1.3,+versicolor+ + 6.3,3.3,4.7,1.6,+versicolor+ + 4.9,2.4,3.3,1.0,+versicolor+ + 6.6,2.9,4.6,1.3,+versicolor+ + 5.2,2.7,3.9,1.4,+versicolor+ + 5.0,2.0,3.5,1.0,+versicolor+ + 5.9,3.0,4.2,1.5,+versicolor+ + 6.0,2.2,4.0,1.0,+versicolor+ + 6.1,2.9,4.7,1.4,+versicolor+ + 5.6,2.9,3.6,1.3,+versicolor+ + 6.7,3.1,4.4,1.4,+versicolor+ + 5.6,3.0,4.5,1.5,+versicolor+ + 5.8,2.7,4.1,1.0,+versicolor+ + 6.2,2.2,4.5,1.5,+versicolor+ + 5.6,2.5,3.9,1.1,+versicolor+ + 5.9,3.2,4.8,1.8,+versicolor+ + 6.1,2.8,4.0,1.3,+versicolor+ + 6.3,2.5,4.9,1.5,+versicolor+ + 6.1,2.8,4.7,1.2,+versicolor+ + 6.4,2.9,4.3,1.3,+versicolor+ + 6.6,3.0,4.4,1.4,+versicolor+ + 6.8,2.8,4.8,1.4,+versicolor+ + 6.7,3.0,5.0,1.7,+versicolor+ + 6.0,2.9,4.5,1.5,+versicolor+ + 5.7,2.6,3.5,1.0,+versicolor+ + 5.5,2.4,3.8,1.1,+versicolor+ + 5.5,2.4,3.7,1.0,+versicolor+ + 5.8,2.7,3.9,1.2,+versicolor+ + 6.0,2.7,5.1,1.6,+versicolor+ + 5.4,3.0,4.5,1.5,+versicolor+ + 6.0,3.4,4.5,1.6,+versicolor+ + 6.7,3.1,4.7,1.5,+versicolor+ + 6.3,2.3,4.4,1.3,+versicolor+ + 5.6,3.0,4.1,1.3,+versicolor+ + 5.5,2.5,4.0,1.3,+versicolor+ + 5.5,2.6,4.4,1.2,+versicolor+ + 6.1,3.0,4.6,1.4,+versicolor+ + 5.8,2.6,4.0,1.2,+versicolor+ + 5.0,2.3,3.3,1.0,+versicolor+ + 5.6,2.7,4.2,1.3,+versicolor+ + 5.7,3.0,4.2,1.2,+versicolor+ + 5.7,2.9,4.2,1.3,+versicolor+ + 6.2,2.9,4.3,1.3,+versicolor+ + 5.1,2.5,3.0,1.1,+versicolor+ + 5.7,2.8,4.1,1.3,+versicolor+ + 6.3,3.3,6.0,2.5,+virginica+ + 5.8,2.7,5.1,1.9,+virginica+ + 7.1,3.0,5.9,2.1,+virginica+ + 6.3,2.9,5.6,1.8,+virginica+ + 6.5,3.0,5.8,2.2,+virginica+ + 7.6,3.0,6.6,2.1,+virginica+ + 4.9,2.5,4.5,1.7,+virginica+ + 7.3,2.9,6.3,1.8,+virginica+ + 6.7,2.5,5.8,1.8,+virginica+ + 7.2,3.6,6.1,2.5,+virginica+ + 6.5,3.2,5.1,2.0,+virginica+ + 6.4,2.7,5.3,1.9,+virginica+ + 6.8,3.0,5.5,2.1,+virginica+ + 5.7,2.5,5.0,2.0,+virginica+ + 5.8,2.8,5.1,2.4,+virginica+ + 6.4,3.2,5.3,2.3,+virginica+ + 6.5,3.0,5.5,1.8,+virginica+ + 7.7,3.8,6.7,2.2,+virginica+ + 7.7,2.6,6.9,2.3,+virginica+ + 6.0,2.2,5.0,1.5,+virginica+ + 6.9,3.2,5.7,2.3,+virginica+ + 5.6,2.8,4.9,2.0,+virginica+ + 7.7,2.8,6.7,2.0,+virginica+ + 6.3,2.7,4.9,1.8,+virginica+ + 6.7,3.3,5.7,2.1,+virginica+ + 7.2,3.2,6.0,1.8,+virginica+ + 6.2,2.8,4.8,1.8,+virginica+ + 6.1,3.0,4.9,1.8,+virginica+ + 6.4,2.8,5.6,2.1,+virginica+ + 7.2,3.0,5.8,1.6,+virginica+ + 7.4,2.8,6.1,1.9,+virginica+ + 7.9,3.8,6.4,2.0,+virginica+ + 6.4,2.8,5.6,2.2,+virginica+ + 6.3,2.8,5.1,1.5,+virginica+ + 6.1,2.6,5.6,1.4,+virginica+ + 7.7,3.0,6.1,2.3,+virginica+ + 6.3,3.4,5.6,2.4,+virginica+ + 6.4,3.1,5.5,1.8,+virginica+ + 6.0,3.0,4.8,1.8,+virginica+ + 6.9,3.1,5.4,2.1,+virginica+ + 6.7,3.1,5.6,2.4,+virginica+ + 6.9,3.1,5.1,2.3,+virginica+ + 5.8,2.7,5.1,1.9,+virginica+ + 6.8,3.2,5.9,2.3,+virginica+ + 6.7,3.3,5.7,2.5,+virginica+ + 6.7,3.0,5.2,2.3,+virginica+ + 6.3,2.5,5.0,1.9,+virginica+ + 6.5,3.0,5.2,2.0,+virginica+ + 6.2,3.4,5.4,2.3,+virginica+ + 5.9,3.0,5.1,1.8,+virginica+ + diff --git a/tests/testthat/test-csv.R b/tests/testthat/test-csv.R index 6e2c69469..ac02b8cf5 100644 --- a/tests/testthat/test-csv.R +++ b/tests/testthat/test-csv.R @@ -49,19 +49,13 @@ test_that("write_csv: null_values works", { dat_pl$write_csv(temp_out, null_values = NULL) ) dat_pl$write_csv(temp_out, null_values = "hello") - tmp = pl$read_csv(temp_out)$to_data_frame() - expect_true(is.character(tmp$disp) && is.character(tmp$hp) && is.character(tmp$drat)) - expect_equal(tmp[1:2, "disp"], c("hello", "160.0")) + expect_snapshot(readLines(temp_out) |> cat(sep = "\n")) }) test_that("write_csv: separator works", { dat_pl$write_csv(temp_out, separator = "|") - expect_identical( - pl$read_csv(temp_out, sep = "|")$to_data_frame(), - dat, - ignore_attr = TRUE # rownames are lost when writing / reading from CSV - ) + expect_snapshot(readLines(temp_out) |> cat(sep = "\n")) }) test_that("write_csv: quote_style and quote works", { @@ -73,18 +67,8 @@ test_that("write_csv: quote_style and quote works", { ) dat_pl2$write_csv(temp_out, quote_style = "always", quote = "+") - expect_identical( - head(pl$read_csv(temp_out)$to_data_frame()[["+Sepal.Length+"]], n = 2), - c("+5.1+", "+4.9+") - ) + expect_snapshot(readLines(temp_out) |> cat(sep = "\n")) dat_pl2$write_csv(temp_out, quote_style = "non_numeric", quote = "+") - expect_identical( - head(pl$read_csv(temp_out)$to_data_frame()[["+Sepal.Length+"]], n = 2), - c(5.1, 4.9) - ) - expect_identical( - head(pl$read_csv(temp_out)$to_data_frame()[["+Species+"]], n = 2), - c("+setosa+", "+setosa+") - ) + expect_snapshot(readLines(temp_out) |> cat(sep = "\n")) }) From ab4253fc31d3481885518ed54d621cf2d6320e1a Mon Sep 17 00:00:00 2001 From: eitsupi Date: Mon, 16 Oct 2023 14:40:18 +0000 Subject: [PATCH 09/25] docs: fix typo --- R/dataframe__frame.R | 2 +- man/IO_write_csv.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index ac82cd991..edefe5247 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -1704,7 +1704,7 @@ DataFrame_sample = function( #' @param null_values A string representing null values (defaulting to the empty #' string). #' @param quote_style Determines the quoting strategy used. -#' * "`necessary"` (default): This puts quotes around fields only when necessary. +#' * `"necessary"` (default): This puts quotes around fields only when necessary. #' They are necessary when fields contain a quote, delimiter or record #' terminator. Quotes are also necessary when writing an empty record (which #' is indistinguishable from a record with one empty field). This is the diff --git a/man/IO_write_csv.Rd b/man/IO_write_csv.Rd index d20dee475..0d2cca41d 100644 --- a/man/IO_write_csv.Rd +++ b/man/IO_write_csv.Rd @@ -51,7 +51,7 @@ string).} \item{quote_style}{Determines the quoting strategy used. \itemize{ -\item "\verb{necessary"} (default): This puts quotes around fields only when necessary. +\item \code{"necessary"} (default): This puts quotes around fields only when necessary. They are necessary when fields contain a quote, delimiter or record terminator. Quotes are also necessary when writing an empty record (which is indistinguishable from a record with one empty field). This is the From d4ab52c127cfd483983ad1cca6020e7ff4c08617 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Mon, 16 Oct 2023 14:46:12 +0000 Subject: [PATCH 10/25] refactor(test): use helper function --- tests/testthat/_snaps/csv.md | 8 ++++---- tests/testthat/helper.R | 4 ++++ tests/testthat/test-csv.R | 8 ++++---- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/tests/testthat/_snaps/csv.md b/tests/testthat/_snaps/csv.md index 4cde591fe..1f81d03b9 100644 --- a/tests/testthat/_snaps/csv.md +++ b/tests/testthat/_snaps/csv.md @@ -1,7 +1,7 @@ # write_csv: null_values works Code - cat(readLines(temp_out), sep = "\n") + cat(readLines(path), sep = "\n") Output mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb 21.0,6.0,hello,hello,hello,2.62,16.46,0.0,1.0,4.0,4.0 @@ -40,7 +40,7 @@ # write_csv: separator works Code - cat(readLines(temp_out), sep = "\n") + cat(readLines(path), sep = "\n") Output mpg|cyl|disp|hp|drat|wt|qsec|vs|am|gear|carb 21.0|6.0||||2.62|16.46|0.0|1.0|4.0|4.0 @@ -79,7 +79,7 @@ # write_csv: quote_style and quote works Code - cat(readLines(temp_out), sep = "\n") + cat(readLines(path), sep = "\n") Output +Sepal.Length+,+Sepal.Width+,+Petal.Length+,+Petal.Width+,+Species+ +5.1+,+3.5+,+1.4+,+0.2+,+setosa+ @@ -236,7 +236,7 @@ --- Code - cat(readLines(temp_out), sep = "\n") + cat(readLines(path), sep = "\n") Output +Sepal.Length+,+Sepal.Width+,+Petal.Length+,+Petal.Width+,+Species+ 5.1,3.5,1.4,0.2,+setosa+ diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R index 5c4e3a460..73325813c 100644 --- a/tests/testthat/helper.R +++ b/tests/testthat/helper.R @@ -84,3 +84,7 @@ expect_rpolarserr = function(expr, ctxs) { expect_identical(class(res$err), "RPolarsErr") expect_identical(names(res$err$contexts()), ctxs) } + +expect_snapshot_file = function(path, ...) { + expect_snapshot(readLines(path) |> cat(sep = "\n"), ...) +} diff --git a/tests/testthat/test-csv.R b/tests/testthat/test-csv.R index ac02b8cf5..802c01b9b 100644 --- a/tests/testthat/test-csv.R +++ b/tests/testthat/test-csv.R @@ -49,13 +49,13 @@ test_that("write_csv: null_values works", { dat_pl$write_csv(temp_out, null_values = NULL) ) dat_pl$write_csv(temp_out, null_values = "hello") - expect_snapshot(readLines(temp_out) |> cat(sep = "\n")) + expect_snapshot_file(temp_out) }) test_that("write_csv: separator works", { dat_pl$write_csv(temp_out, separator = "|") - expect_snapshot(readLines(temp_out) |> cat(sep = "\n")) + expect_snapshot_file(temp_out) }) test_that("write_csv: quote_style and quote works", { @@ -67,8 +67,8 @@ test_that("write_csv: quote_style and quote works", { ) dat_pl2$write_csv(temp_out, quote_style = "always", quote = "+") - expect_snapshot(readLines(temp_out) |> cat(sep = "\n")) + expect_snapshot_file(temp_out) dat_pl2$write_csv(temp_out, quote_style = "non_numeric", quote = "+") - expect_snapshot(readLines(temp_out) |> cat(sep = "\n")) + expect_snapshot_file(temp_out) }) From a0b661dd1ab2a1d6ecf36a5419fe8d9b38d6a1c6 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Mon, 16 Oct 2023 14:59:20 +0000 Subject: [PATCH 11/25] test: tests for quote_style --- tests/testthat/test-csv.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/testthat/test-csv.R b/tests/testthat/test-csv.R index 802c01b9b..09b4fd5ad 100644 --- a/tests/testthat/test-csv.R +++ b/tests/testthat/test-csv.R @@ -72,3 +72,15 @@ test_that("write_csv: quote_style and quote works", { dat_pl2$write_csv(temp_out, quote_style = "non_numeric", quote = "+") expect_snapshot_file(temp_out) }) + +patrick::with_parameters_test_that( + "write_csv: quote_style", + { + df = pl$DataFrame( + a = c(r"("foo")", "bar"), + b = 1:2, + c = letters[1:2] + )$write_csv(temp_out, quote_style = quote_style) + }, + quote_style = c("necessary", "always", "non_numeric") +) From ada992b0a166e3e226fd9e719d706b17d10ce16e Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Mon, 16 Oct 2023 18:46:08 +0200 Subject: [PATCH 12/25] remove check for path on the R side --- R/dataframe__frame.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index edefe5247..e82baae9b 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -1742,10 +1742,6 @@ DataFrame_write_csv = function( quote_style = "necessary" ) { - if (file_ext(path) != "csv") { - stop("Argument `path` must the path to a CSV file.") - } - if (length(quote_style) == 0 || !quote_style %in% c("always", "necessary", "non_numeric")) { stop("Argument `quote_style` must be one of 'always', 'necessary', or 'non_numeric'.") From 502501786ba9becc38244f8d081fdac5b1a64a70 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Mon, 16 Oct 2023 18:52:40 +0200 Subject: [PATCH 13/25] update tests --- tests/testthat/_snaps/csv.md | 61 ++++++++++++++++-------------------- tests/testthat/test-csv.R | 8 ++--- 2 files changed, 29 insertions(+), 40 deletions(-) diff --git a/tests/testthat/_snaps/csv.md b/tests/testthat/_snaps/csv.md index 1f81d03b9..33b856e36 100644 --- a/tests/testthat/_snaps/csv.md +++ b/tests/testthat/_snaps/csv.md @@ -19,23 +19,6 @@ 17.3,8.0,275.8,180.0,3.07,3.73,17.6,0.0,0.0,3.0,3.0 15.2,8.0,275.8,180.0,3.07,3.78,18.0,0.0,0.0,3.0,3.0 10.4,8.0,472.0,205.0,2.93,5.25,17.98,0.0,0.0,3.0,4.0 - 10.4,8.0,460.0,215.0,3.0,5.424,17.82,0.0,0.0,3.0,4.0 - 14.7,8.0,440.0,230.0,3.23,5.345,17.42,0.0,0.0,3.0,4.0 - 32.4,4.0,78.7,66.0,4.08,2.2,19.47,1.0,1.0,4.0,1.0 - 30.4,4.0,75.7,52.0,4.93,1.615,18.52,1.0,1.0,4.0,2.0 - 33.9,4.0,71.1,65.0,4.22,1.835,19.9,1.0,1.0,4.0,1.0 - 21.5,4.0,120.1,97.0,3.7,2.465,20.01,1.0,0.0,3.0,1.0 - 15.5,8.0,318.0,150.0,2.76,3.52,16.87,0.0,0.0,3.0,2.0 - 15.2,8.0,304.0,150.0,3.15,3.435,17.3,0.0,0.0,3.0,2.0 - 13.3,8.0,350.0,245.0,3.73,3.84,15.41,0.0,0.0,3.0,4.0 - 19.2,8.0,400.0,175.0,3.08,3.845,17.05,0.0,0.0,3.0,2.0 - 27.3,4.0,79.0,66.0,4.08,1.935,18.9,1.0,1.0,4.0,1.0 - 26.0,4.0,120.3,91.0,4.43,2.14,16.7,0.0,1.0,5.0,2.0 - 30.4,4.0,95.1,113.0,3.77,1.513,16.9,1.0,1.0,5.0,2.0 - 15.8,8.0,351.0,264.0,4.22,3.17,14.5,0.0,1.0,5.0,4.0 - 19.7,6.0,145.0,175.0,3.62,2.77,15.5,0.0,1.0,5.0,6.0 - 15.0,8.0,301.0,335.0,3.54,3.57,14.6,0.0,1.0,5.0,8.0 - 21.4,4.0,121.0,109.0,4.11,2.78,18.6,1.0,1.0,4.0,2.0 # write_csv: separator works @@ -58,23 +41,6 @@ 17.3|8.0|275.8|180.0|3.07|3.73|17.6|0.0|0.0|3.0|3.0 15.2|8.0|275.8|180.0|3.07|3.78|18.0|0.0|0.0|3.0|3.0 10.4|8.0|472.0|205.0|2.93|5.25|17.98|0.0|0.0|3.0|4.0 - 10.4|8.0|460.0|215.0|3.0|5.424|17.82|0.0|0.0|3.0|4.0 - 14.7|8.0|440.0|230.0|3.23|5.345|17.42|0.0|0.0|3.0|4.0 - 32.4|4.0|78.7|66.0|4.08|2.2|19.47|1.0|1.0|4.0|1.0 - 30.4|4.0|75.7|52.0|4.93|1.615|18.52|1.0|1.0|4.0|2.0 - 33.9|4.0|71.1|65.0|4.22|1.835|19.9|1.0|1.0|4.0|1.0 - 21.5|4.0|120.1|97.0|3.7|2.465|20.01|1.0|0.0|3.0|1.0 - 15.5|8.0|318.0|150.0|2.76|3.52|16.87|0.0|0.0|3.0|2.0 - 15.2|8.0|304.0|150.0|3.15|3.435|17.3|0.0|0.0|3.0|2.0 - 13.3|8.0|350.0|245.0|3.73|3.84|15.41|0.0|0.0|3.0|4.0 - 19.2|8.0|400.0|175.0|3.08|3.845|17.05|0.0|0.0|3.0|2.0 - 27.3|4.0|79.0|66.0|4.08|1.935|18.9|1.0|1.0|4.0|1.0 - 26.0|4.0|120.3|91.0|4.43|2.14|16.7|0.0|1.0|5.0|2.0 - 30.4|4.0|95.1|113.0|3.77|1.513|16.9|1.0|1.0|5.0|2.0 - 15.8|8.0|351.0|264.0|4.22|3.17|14.5|0.0|1.0|5.0|4.0 - 19.7|6.0|145.0|175.0|3.62|2.77|15.5|0.0|1.0|5.0|6.0 - 15.0|8.0|301.0|335.0|3.54|3.57|14.6|0.0|1.0|5.0|8.0 - 21.4|4.0|121.0|109.0|4.11|2.78|18.6|1.0|1.0|4.0|2.0 # write_csv: quote_style and quote works @@ -390,3 +356,30 @@ 6.2,3.4,5.4,2.3,+virginica+ 5.9,3.0,5.1,1.8,+virginica+ +# write_csv: quote_style quote_style=necessary + + Code + cat(readLines(path), sep = "\n") + Output + a,b,c + """foo""",1,a + bar,2,b + +# write_csv: quote_style quote_style=always + + Code + cat(readLines(path), sep = "\n") + Output + "a","b","c" + """foo""","1","a" + "bar","2","b" + +# write_csv: quote_style quote_style=non_numeric + + Code + cat(readLines(path), sep = "\n") + Output + "a","b","c" + """foo""",1,"a" + "bar",2,"b" + diff --git a/tests/testthat/test-csv.R b/tests/testthat/test-csv.R index 09b4fd5ad..601dc5e56 100644 --- a/tests/testthat/test-csv.R +++ b/tests/testthat/test-csv.R @@ -24,18 +24,13 @@ test_that("csv read iris", { }) -dat = mtcars +dat = head(mtcars, n = 15) dat[c(1, 3, 9, 12), c(3, 4, 5)] = NA dat_pl = pl$DataFrame(dat) temp_noext = tempfile() temp_out = tempfile(fileext = ".csv") test_that("write_csv: path works", { - expect_error( - dat_pl$write_csv(temp_noext), - "must the path to a CSV file" - ) - dat_pl$write_csv(temp_out) expect_identical( pl$read_csv(temp_out)$to_data_frame(), @@ -81,6 +76,7 @@ patrick::with_parameters_test_that( b = 1:2, c = letters[1:2] )$write_csv(temp_out, quote_style = quote_style) + expect_snapshot_file(temp_out) }, quote_style = c("necessary", "always", "non_numeric") ) From dee44026093e3959b9547fa401a8db24c481e89c Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Mon, 16 Oct 2023 18:54:28 +0200 Subject: [PATCH 14/25] bump news --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 2271b2f1c..1b499ea42 100644 --- a/NEWS.md +++ b/NEWS.md @@ -44,6 +44,7 @@ - New function `pl$raw_list` and class `rpolars_raw_list` a list of R Raw's, where missing is encoded as `NULL` to aid conversion to polars binary Series. Support back and forth conversion from polars binary literal and Series to R raw (#417). +- New method `$write_csv()` for `DataFrame` (#414). # polars 0.8.1 From 460bf3b51156c70d955238e7da7926fbab5b95a3 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Tue, 17 Oct 2023 13:30:30 +0200 Subject: [PATCH 15/25] add some tests for date_format, time_format and datetime_format --- tests/testthat/_snaps/csv.md | 46 ++++++++++++++++++++++++++++++++++++ tests/testthat/test-csv.R | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) diff --git a/tests/testthat/_snaps/csv.md b/tests/testthat/_snaps/csv.md index 33b856e36..308f03186 100644 --- a/tests/testthat/_snaps/csv.md +++ b/tests/testthat/_snaps/csv.md @@ -383,3 +383,49 @@ """foo""",1,"a" "bar",2,"b" +# write_csv: date_format works + + Code + cat(readLines(path), sep = "\n") + Output + date + 2020 + 2021 + 2022 + 2023 + +--- + + Code + cat(readLines(path), sep = "\n") + Output + date + 01/01/2020 + 01/01/2021 + 01/01/2022 + 01/01/2023 + +# write_csv: datetime_format works + + Code + cat(readLines(path), sep = "\n") + Output + date + 00h00m - 01/01/2020 + 06h00m - 01/01/2020 + 12h00m - 01/01/2020 + 18h00m - 01/01/2020 + 00h00m - 02/01/2020 + +# write_csv: time_format works + + Code + cat(readLines(path), sep = "\n") + Output + date + 2023-10-16T22:00:00.000000 + 2023-10-16T22:15:00.000000 + 2023-10-16T22:30:00.000000 + 2023-10-16T22:45:00.000000 + 2023-10-16T23:00:00.000000 + diff --git a/tests/testthat/test-csv.R b/tests/testthat/test-csv.R index 601dc5e56..3dd9e1b66 100644 --- a/tests/testthat/test-csv.R +++ b/tests/testthat/test-csv.R @@ -80,3 +80,44 @@ patrick::with_parameters_test_that( }, quote_style = c("necessary", "always", "non_numeric") ) + +test_that("write_csv: date_format works", { + dat <- pl$DataFrame( + date = pl$date_range( + as.Date("2020-01-01"), + as.Date("2023-01-02"), + interval = "1y", + eager = TRUE + ) + ) + dat$write_csv(temp_out, date_format = "%Y") + expect_snapshot_file(temp_out) + dat$write_csv(temp_out, date_format = "%d/%m/%Y") + expect_snapshot_file(temp_out) +}) + +test_that("write_csv: datetime_format works", { + dat <- pl$DataFrame( + date = pl$date_range( + as.Date("2020-01-01"), + as.Date("2020-01-02"), + interval = "6h", + eager = TRUE + ) + ) + dat$write_csv(temp_out, datetime_format = "%Hh%Mm - %d/%m/%Y") + expect_snapshot_file(temp_out) +}) + +test_that("write_csv: time_format works", { + dat <- pl$DataFrame( + date = pl$date_range( + strptime("00:00:00", format = "%H:%M:%S"), + strptime("01:00:00", format = "%H:%M:%S"), + interval = "15m", + eager = TRUE + ) + ) + dat$write_csv(temp_out, time_format = "%Hh%Mm%%Ss") + expect_snapshot_file(temp_out) +}) From 8a554498eb6b7d20047fc3614ab1d649ea4cbf4a Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Tue, 17 Oct 2023 13:32:45 +0200 Subject: [PATCH 16/25] test float_precision --- tests/testthat/_snaps/csv.md | 18 ++++++++++++++++++ tests/testthat/test-csv.R | 10 ++++++++++ 2 files changed, 28 insertions(+) diff --git a/tests/testthat/_snaps/csv.md b/tests/testthat/_snaps/csv.md index 308f03186..fbfd5d01c 100644 --- a/tests/testthat/_snaps/csv.md +++ b/tests/testthat/_snaps/csv.md @@ -429,3 +429,21 @@ 2023-10-16T22:45:00.000000 2023-10-16T23:00:00.000000 +# write_csv: float_precision works + + Code + cat(readLines(path), sep = "\n") + Output + x + 1.2 + 5.6 + +--- + + Code + cat(readLines(path), sep = "\n") + Output + x + 1.234 + 5.600 + diff --git a/tests/testthat/test-csv.R b/tests/testthat/test-csv.R index 3dd9e1b66..a07bbb863 100644 --- a/tests/testthat/test-csv.R +++ b/tests/testthat/test-csv.R @@ -121,3 +121,13 @@ test_that("write_csv: time_format works", { dat$write_csv(temp_out, time_format = "%Hh%Mm%%Ss") expect_snapshot_file(temp_out) }) + + +test_that("write_csv: float_precision works", { + dat <- pl$DataFrame(x = c(1.234, 5.6)) + dat$write_csv(temp_out, float_precision = 1) + expect_snapshot_file(temp_out) + + dat$write_csv(temp_out, float_precision = 3) + expect_snapshot_file(temp_out) +}) From 32294710390087877f33f5ee76f02e8111d56c65 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Tue, 17 Oct 2023 13:36:04 +0200 Subject: [PATCH 17/25] add examples --- R/dataframe__frame.R | 4 ++++ man/IO_write_csv.Rd | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index e82baae9b..c1bde697f 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -1726,6 +1726,10 @@ DataFrame_sample = function( #' @examples #' dat = pl$DataFrame(mtcars) #' +#' destination = tempfile(fileext = ".csv") +#' dat$select(pl$col("drat", "mpg"))$write_csv(destination) +#' +#' pl$read_csv(destination) DataFrame_write_csv = function( path, diff --git a/man/IO_write_csv.Rd b/man/IO_write_csv.Rd index 0d2cca41d..bf9ed3fc6 100644 --- a/man/IO_write_csv.Rd +++ b/man/IO_write_csv.Rd @@ -71,4 +71,8 @@ Write to comma-separated values (CSV) file \examples{ dat = pl$DataFrame(mtcars) +destination = tempfile(fileext = ".csv") +dat$select(pl$col("drat", "mpg"))$write_csv(destination) + +pl$read_csv(destination) } From 3539a04532ab5ec3089d4c96b63437f068e94cde Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Tue, 17 Oct 2023 14:08:08 +0200 Subject: [PATCH 18/25] remove time_format test for now --- tests/testthat/_snaps/csv.md | 12 ------------ tests/testthat/test-csv.R | 25 +++++++++++++------------ 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/tests/testthat/_snaps/csv.md b/tests/testthat/_snaps/csv.md index fbfd5d01c..aecb27a65 100644 --- a/tests/testthat/_snaps/csv.md +++ b/tests/testthat/_snaps/csv.md @@ -417,18 +417,6 @@ 18h00m - 01/01/2020 00h00m - 02/01/2020 -# write_csv: time_format works - - Code - cat(readLines(path), sep = "\n") - Output - date - 2023-10-16T22:00:00.000000 - 2023-10-16T22:15:00.000000 - 2023-10-16T22:30:00.000000 - 2023-10-16T22:45:00.000000 - 2023-10-16T23:00:00.000000 - # write_csv: float_precision works Code diff --git a/tests/testthat/test-csv.R b/tests/testthat/test-csv.R index a07bbb863..a37400c29 100644 --- a/tests/testthat/test-csv.R +++ b/tests/testthat/test-csv.R @@ -109,18 +109,19 @@ test_that("write_csv: datetime_format works", { expect_snapshot_file(temp_out) }) -test_that("write_csv: time_format works", { - dat <- pl$DataFrame( - date = pl$date_range( - strptime("00:00:00", format = "%H:%M:%S"), - strptime("01:00:00", format = "%H:%M:%S"), - interval = "15m", - eager = TRUE - ) - ) - dat$write_csv(temp_out, time_format = "%Hh%Mm%%Ss") - expect_snapshot_file(temp_out) -}) +# TODO: uncomment this when $dt$time is implemented +# test_that("write_csv: time_format works", { +# dat <- pl$DataFrame( +# date = pl$date_range( +# strptime("00:00:00", format = "%H:%M:%S"), +# strptime("01:00:00", format = "%H:%M:%S"), +# interval = "15m", +# eager = TRUE +# )$dt$time() +# ) +# dat$write_csv(temp_out, time_format = "%Hh%Mm%%Ss") +# expect_snapshot_file(temp_out) +# }) test_that("write_csv: float_precision works", { From 53cfc9c32b6bd9de3f4516401912f39ae00a778a Mon Sep 17 00:00:00 2001 From: sorhawell Date: Wed, 18 Oct 2023 00:34:21 +0200 Subject: [PATCH 19/25] more robj_to!(x,y)?, add QuoteStyle, Utf8Byte --- R/dataframe__frame.R | 8 +---- src/rust/src/rdataframe/mod.rs | 56 +++++++++++----------------------- src/rust/src/utils/mod.rs | 31 +++++++++++++++++++ 3 files changed, 49 insertions(+), 46 deletions(-) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index c1bde697f..83bcf0055 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -1730,7 +1730,6 @@ DataFrame_sample = function( #' dat$select(pl$col("drat", "mpg"))$write_csv(destination) #' #' pl$read_csv(destination) - DataFrame_write_csv = function( path, has_header = TRUE, @@ -1746,14 +1745,9 @@ DataFrame_write_csv = function( quote_style = "necessary" ) { - if (length(quote_style) == 0 || - !quote_style %in% c("always", "necessary", "non_numeric")) { - stop("Argument `quote_style` must be one of 'always', 'necessary', or 'non_numeric'.") - } - .pr$DataFrame$write_csv( self, - path, has_header, utf8ToInt(separator), line_terminator, utf8ToInt(quote), batch_size, + path, has_header, separator, line_terminator, quote, batch_size, datetime_format, date_format, time_format, float_precision, null_values, quote_style ) |> diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index 232af2078..5d96d7ffe 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -1,5 +1,5 @@ use extendr_api::{extendr, prelude::*, rprintln, Rinternals}; -use polars::prelude::{self as pl, IntoLazy}; +use polars::prelude::{self as pl, IntoLazy, SerWriter}; use std::result::Result; pub mod read_csv; pub mod read_ipc; @@ -11,8 +11,6 @@ use crate::rdatatype::RPolarsDataType; use crate::robj_to; use crate::rpolarserr::*; -use polars::prelude::{CsvWriter, QuoteStyle, SerWriter}; - pub use lazy::dataframe::*; use crate::conversion_s_to_r::pl_series_to_list; @@ -446,7 +444,7 @@ impl DataFrame { } pub fn write_csv( - &mut self, + &self, path: Robj, has_header: Robj, separator: Robj, @@ -460,45 +458,25 @@ impl DataFrame { null_value: Robj, quote_style: Robj, ) -> RResult<()> { - let null = robj_to!(String, null_value).unwrap(); - let path = robj_to!(str, path).unwrap(); - let f = std::fs::File::create(path).unwrap(); - let qs = parse_quote_style(quote_style); - - CsvWriter::new(f) - .has_header(robj_to!(bool, has_header).unwrap()) - .with_delimiter(robj_to!(u8, separator).unwrap()) - .with_line_terminator(robj_to!(String, line_terminator).unwrap()) - .with_quoting_char(robj_to!(u8, quote).unwrap()) - .with_batch_size(robj_to!(usize, batch_size).unwrap()) - .with_datetime_format(robj_to!(Option, String, datetime_format).unwrap()) - .with_date_format(robj_to!(Option, String, date_format).unwrap()) - .with_time_format(robj_to!(Option, String, time_format).unwrap()) - .with_float_precision(robj_to!(Option, usize, float_precision).unwrap()) - .with_null_value(null) - .with_quote_style(qs) - .finish(&mut self.0) + let path = robj_to!(str, path)?; + let f = std::fs::File::create(path)?; + pl::CsvWriter::new(f) + .has_header(robj_to!(bool, has_header)?) + .with_delimiter(robj_to!(Utf8Byte, separator)?) + .with_line_terminator(robj_to!(String, line_terminator)?) + .with_quoting_char(robj_to!(Utf8Byte, quote)?) + .with_batch_size(robj_to!(usize, batch_size)?) + .with_datetime_format(robj_to!(Option, String, datetime_format)?) + .with_date_format(robj_to!(Option, String, date_format)?) + .with_time_format(robj_to!(Option, String, time_format)?) + .with_float_precision(robj_to!(Option, usize, float_precision)?) + .with_null_value(robj_to!(String, null_value)?) + .with_quote_style(robj_to!(QuoteStyle, quote_style)?) + .finish(&mut self.0.clone()) .map_err(polars_to_rpolars_err) } } -pub fn parse_quote_style(x: Robj) -> QuoteStyle { - match robj_to!(Option, String, x) - .unwrap_or_default() - .unwrap() - .as_str() - { - "always" => QuoteStyle::Always, - "necessary" => QuoteStyle::Necessary, - "non_numeric" => QuoteStyle::NonNumeric, - // "never" is available in rust-polars devel only for now (will be added in 0.34) - // "never" => QuoteStyle::Never, - _ => panic!( - "polars internal error: `quote_style` must be 'always', 'necessary' or 'non_numeric'." - ), - } -} - impl DataFrame { pub fn to_list_result(&self) -> Result { //convert DataFrame to Result of to R vectors, error if DataType is not supported diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs index aa28bb6eb..d11715755 100644 --- a/src/rust/src/utils/mod.rs +++ b/src/rust/src/utils/mod.rs @@ -535,6 +535,29 @@ pub fn robj_to_usize(robj: extendr_api::Robj) -> RResult { robj_to_u64(robj).and_then(try_u64_into_usize) } +pub fn robj_to_utf8_byte(robj: extendr_api::Robj) -> RResult { + let mut utf8_byte_iter = robj_to_str(robj)?.as_bytes().iter(); + match (utf8_byte_iter.next(), utf8_byte_iter.next()) { + (Some(s), None) => Ok(*s), + (None, None) => rerr().plain("cannot extract single byte from empty string"), + (Some(_), Some(_)) => rerr().plain("multi byte-string not allowed"), + (None, Some(_)) => unreachable!("the iter() cannot yield Some after None(depleted)"), + } +} + +pub fn robj_to_quote_style(robj: Robj) -> RResult { + match robj_to_str(robj.clone())? { + "always" => Ok(pl::QuoteStyle::Always), + "necessary" => Ok(pl::QuoteStyle::Necessary), + "non_numeric" => Ok(pl::QuoteStyle::NonNumeric), + // "never" is available in rust-polars devel only for now (will be added in 0.34) + // "never" => Ok(QuoteStyle::Never), + _ => rerr() + .plain("a `quote_style` must be 'always', 'necessary' or 'non_numeric'.") + .bad_robj(&robj), + } +} + fn err_no_nan() -> RResult { rerr().plain("any NA value is not allowed here".to_string()) } @@ -885,6 +908,10 @@ macro_rules! robj_to_inner { $crate::utils::robj_to_u8($a) }; + (Utf8Byte, $a:ident) => { + $crate::utils::robj_to_utf8_byte($a) + }; + (char, $a:ident) => { $crate::utils::robj_to_char($a) }; @@ -985,6 +1012,10 @@ macro_rules! robj_to_inner { $crate::utils::robj_to_dataframe($a).map(|lf| lf.0) }; + (QuoteStyle, $a:ident) => { + $crate::utils::robj_to_quote_style($a) + }; + (RArrow_schema, $a:ident) => { $crate::utils::robj_to_rarrow_schema($a) }; From d8949bb1885040fbc637a9f25b2437311053dc42 Mon Sep 17 00:00:00 2001 From: sorhawell Date: Wed, 18 Oct 2023 00:56:44 +0200 Subject: [PATCH 20/25] unit test new robj_to! conversions and errors --- tests/testthat/test-csv.R | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-csv.R b/tests/testthat/test-csv.R index a37400c29..58fde3152 100644 --- a/tests/testthat/test-csv.R +++ b/tests/testthat/test-csv.R @@ -56,16 +56,35 @@ test_that("write_csv: separator works", { test_that("write_csv: quote_style and quote works", { dat_pl2 = pl$DataFrame(iris) - expect_error( - dat_pl2$write_csv(temp_out, quote_style = "foo"), - "must be one of" - ) + #wrong quote_style + ctx = dat_pl2$write_csv(temp_out, quote_style = "foo") |> get_err_ctx() + expect_identical(ctx$BadArgument, "quote_style") + expect_identical(ctx$Plain, "a `quote_style` must be 'always', 'necessary' or 'non_numeric'.") + # wrong quote_style type + ctx = dat_pl2$write_csv(temp_out, quote_style = 42) |> get_err_ctx() + expect_identical(ctx$TypeMismatch, "&str") + + # ok quote_style and quote dat_pl2$write_csv(temp_out, quote_style = "always", quote = "+") expect_snapshot_file(temp_out) - dat_pl2$write_csv(temp_out, quote_style = "non_numeric", quote = "+") + # ok also + ctx = dat_pl2$write_csv(temp_out, quote_style = "non_numeric", quote = "+") expect_snapshot_file(temp_out) + + # zero byte quote + ctx = dat_pl2$write_csv(temp_out, quote = "") |> get_err_ctx() + expect_identical(ctx$Plain, "cannot extract single byte from empty string") + + # multi byte quote not allowed + ctx = dat_pl2$write_csv(temp_out, quote = "£") |> get_err_ctx() + expect_identical(ctx$Plain, "multi byte-string not allowed") + + # multi string not allowed + ctx = dat_pl2$write_csv(temp_out, quote = c("a","b")) |> get_err_ctx() + expect_identical(ctx$TypeMismatch, "&str") + }) patrick::with_parameters_test_that( From 1e5bcba857195522d6482a6469617c301b42da57 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Wed, 18 Oct 2023 08:32:42 +0200 Subject: [PATCH 21/25] do not export --- NAMESPACE | 1 - R/dataframe__frame.R | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index b7e654aa8..14166760d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -153,7 +153,6 @@ S3method(tail,LazyFrame) S3method(unique,DataFrame) S3method(unique,LazyFrame) export(.pr) -export(DataFrame_write_csv) export(as_polars_series) export(knit_print.DataFrame) export(pl) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 83bcf0055..fe1929f0e 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -1720,7 +1720,7 @@ DataFrame_sample = function( #' @return #' This doesn't return anything but creates a CSV file. -#' @export +#' #' @rdname IO_write_csv #' #' @examples From 5e5473acffca8c13462df811aa466216bc756e4d Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Wed, 18 Oct 2023 08:35:35 +0200 Subject: [PATCH 22/25] remove unused utils --- R/utils.R | 6 ------ 1 file changed, 6 deletions(-) diff --git a/R/utils.R b/R/utils.R index 0cf103154..46b3c1dd1 100644 --- a/R/utils.R +++ b/R/utils.R @@ -620,9 +620,3 @@ is_bool = function(x) { dtypes_are_struct = function(dtypes) { sapply(dtypes, \(dt) pl$same_outer_dt(dt, pl$Struct())) } - -# from tools::file_ext() -file_ext <- function(x) { - pos <- regexpr("\\.([[:alnum:]]+)$", x) - ifelse(pos > -1L, substring(x, pos + 1L), "") -} From 55a83580b2a43716ecf6d945e36bf6f0694615d1 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Wed, 18 Oct 2023 08:38:33 +0200 Subject: [PATCH 23/25] uncomment a test --- tests/testthat/_snaps/csv.md | 12 ++++++++++++ tests/testthat/test-csv.R | 25 ++++++++++++------------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/tests/testthat/_snaps/csv.md b/tests/testthat/_snaps/csv.md index aecb27a65..6bf215bb7 100644 --- a/tests/testthat/_snaps/csv.md +++ b/tests/testthat/_snaps/csv.md @@ -417,6 +417,18 @@ 18h00m - 01/01/2020 00h00m - 02/01/2020 +# write_csv: time_format works + + Code + cat(readLines(path), sep = "\n") + Output + date + 22h00m00s + 22h15m00s + 22h30m00s + 22h45m00s + 23h00m00s + # write_csv: float_precision works Code diff --git a/tests/testthat/test-csv.R b/tests/testthat/test-csv.R index 58fde3152..5b3fab7c2 100644 --- a/tests/testthat/test-csv.R +++ b/tests/testthat/test-csv.R @@ -128,19 +128,18 @@ test_that("write_csv: datetime_format works", { expect_snapshot_file(temp_out) }) -# TODO: uncomment this when $dt$time is implemented -# test_that("write_csv: time_format works", { -# dat <- pl$DataFrame( -# date = pl$date_range( -# strptime("00:00:00", format = "%H:%M:%S"), -# strptime("01:00:00", format = "%H:%M:%S"), -# interval = "15m", -# eager = TRUE -# )$dt$time() -# ) -# dat$write_csv(temp_out, time_format = "%Hh%Mm%%Ss") -# expect_snapshot_file(temp_out) -# }) +test_that("write_csv: time_format works", { + dat <- pl$DataFrame( + date = pl$date_range( + strptime("00:00:00", format = "%H:%M:%S"), + strptime("01:00:00", format = "%H:%M:%S"), + interval = "15m", + eager = TRUE + ) + )$with_columns(pl$col("date")$dt$time()) + dat$write_csv(temp_out, time_format = "%Hh%Mm%Ss") + expect_snapshot_file(temp_out) +}) test_that("write_csv: float_precision works", { From d53f912b954ca8597f6e93f9670cabf411da39a8 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Wed, 18 Oct 2023 13:39:13 +0200 Subject: [PATCH 24/25] try to fix test --- tests/testthat/_snaps/csv.md | 9 ++++----- tests/testthat/test-csv.R | 6 +++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/testthat/_snaps/csv.md b/tests/testthat/_snaps/csv.md index 6bf215bb7..84c909389 100644 --- a/tests/testthat/_snaps/csv.md +++ b/tests/testthat/_snaps/csv.md @@ -423,11 +423,10 @@ cat(readLines(path), sep = "\n") Output date - 22h00m00s - 22h15m00s - 22h30m00s - 22h45m00s - 23h00m00s + 00h00m00s + 08h00m00s + 16h00m00s + 00h00m00s # write_csv: float_precision works diff --git a/tests/testthat/test-csv.R b/tests/testthat/test-csv.R index 5b3fab7c2..3a6acb375 100644 --- a/tests/testthat/test-csv.R +++ b/tests/testthat/test-csv.R @@ -131,9 +131,9 @@ test_that("write_csv: datetime_format works", { test_that("write_csv: time_format works", { dat <- pl$DataFrame( date = pl$date_range( - strptime("00:00:00", format = "%H:%M:%S"), - strptime("01:00:00", format = "%H:%M:%S"), - interval = "15m", + as.Date("2020-10-17"), + as.Date("2020-10-18"), + "8h", eager = TRUE ) )$with_columns(pl$col("date")$dt$time()) From 2e2a4a817f6be8b57a92ead17ba70ada454e8f60 Mon Sep 17 00:00:00 2001 From: eitsupi Date: Wed, 18 Oct 2023 12:14:58 +0000 Subject: [PATCH 25/25] refactor: auto formatting --- R/dataframe__frame.R | 4 +--- src/rust/src/lazy/dsl.rs | 7 +------ tests/testthat/test-csv.R | 13 ++++++------- 3 files changed, 8 insertions(+), 16 deletions(-) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index fe1929f0e..fa3636336 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -1742,9 +1742,7 @@ DataFrame_write_csv = function( time_format = NULL, float_precision = NULL, null_values = "", - quote_style = "necessary" -) { - + quote_style = "necessary") { .pr$DataFrame$write_csv( self, path, has_header, separator, line_terminator, quote, batch_size, diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index 26cd804c0..683e73322 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -1314,12 +1314,7 @@ impl Expr { } pub fn dt_time(&self) -> RResult { - Ok(self - .0 - .clone() - .dt() - .time() - .into()) + Ok(self.0.clone().dt().time().into()) } pub fn dt_combine(&self, time: Robj, tu: Robj) -> RResult { diff --git a/tests/testthat/test-csv.R b/tests/testthat/test-csv.R index 3a6acb375..0933e5379 100644 --- a/tests/testthat/test-csv.R +++ b/tests/testthat/test-csv.R @@ -56,7 +56,7 @@ test_that("write_csv: separator works", { test_that("write_csv: quote_style and quote works", { dat_pl2 = pl$DataFrame(iris) - #wrong quote_style + # wrong quote_style ctx = dat_pl2$write_csv(temp_out, quote_style = "foo") |> get_err_ctx() expect_identical(ctx$BadArgument, "quote_style") expect_identical(ctx$Plain, "a `quote_style` must be 'always', 'necessary' or 'non_numeric'.") @@ -82,9 +82,8 @@ test_that("write_csv: quote_style and quote works", { expect_identical(ctx$Plain, "multi byte-string not allowed") # multi string not allowed - ctx = dat_pl2$write_csv(temp_out, quote = c("a","b")) |> get_err_ctx() + ctx = dat_pl2$write_csv(temp_out, quote = c("a", "b")) |> get_err_ctx() expect_identical(ctx$TypeMismatch, "&str") - }) patrick::with_parameters_test_that( @@ -101,7 +100,7 @@ patrick::with_parameters_test_that( ) test_that("write_csv: date_format works", { - dat <- pl$DataFrame( + dat = pl$DataFrame( date = pl$date_range( as.Date("2020-01-01"), as.Date("2023-01-02"), @@ -116,7 +115,7 @@ test_that("write_csv: date_format works", { }) test_that("write_csv: datetime_format works", { - dat <- pl$DataFrame( + dat = pl$DataFrame( date = pl$date_range( as.Date("2020-01-01"), as.Date("2020-01-02"), @@ -129,7 +128,7 @@ test_that("write_csv: datetime_format works", { }) test_that("write_csv: time_format works", { - dat <- pl$DataFrame( + dat = pl$DataFrame( date = pl$date_range( as.Date("2020-10-17"), as.Date("2020-10-18"), @@ -143,7 +142,7 @@ test_that("write_csv: time_format works", { test_that("write_csv: float_precision works", { - dat <- pl$DataFrame(x = c(1.234, 5.6)) + dat = pl$DataFrame(x = c(1.234, 5.6)) dat$write_csv(temp_out, float_precision = 1) expect_snapshot_file(temp_out)