diff --git a/NEWS.md b/NEWS.md index 067ce302a..08393d392 100644 --- a/NEWS.md +++ b/NEWS.md @@ -44,6 +44,7 @@ - New function `pl$raw_list` and class `rpolars_raw_list` a list of R Raw's, where missing is encoded as `NULL` to aid conversion to polars binary Series. Support back and forth conversion from polars binary literal and Series to R raw (#417). +- New method `$write_csv()` for `DataFrame` (#414). - New method `$dt$time()` to extract the time from a `datetime` variable (#428). # polars 0.8.1 diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 71580113a..fa3636336 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -1680,3 +1680,75 @@ DataFrame_sample = function( ) |> unwrap("in $sample():") } + + + +#' Write to comma-separated values (CSV) file +#' +#' @param path File path to which the result should be written. +#' @param has_header Whether to include header in the CSV output. +#' @param separator Separate CSV fields with this symbol. +#' @param line_terminator String used to end each row. +#' @param quote Byte to use as quoting character. +#' @param batch_size Number of rows that will be processed per thread. +#' @param datetime_format A format string, with the specifiers defined by the +#' chrono Rust crate. If no format specified, the default fractional-second +#' precision is inferred from the maximum timeunit found in the frame’s Datetime +#' cols (if any). +#' @param date_format A format string, with the specifiers defined by the chrono +#' Rust crate. +#' @param time_format A format string, with the specifiers defined by the chrono +#' Rust crate. +#' @param float_precision Number of decimal places to write, applied to both +#' Float32 and Float64 datatypes. +#' @param null_values A string representing null values (defaulting to the empty +#' string). +#' @param quote_style Determines the quoting strategy used. +#' * `"necessary"` (default): This puts quotes around fields only when necessary. +#' They are necessary when fields contain a quote, delimiter or record +#' terminator. Quotes are also necessary when writing an empty record (which +#' is indistinguishable from a record with one empty field). This is the +#' default. +#' * `"always"`: This puts quotes around every field. +#' * `"non_numeric"`: This puts quotes around all fields that are non-numeric. +#' Namely, when writing a field that does not parse as a valid float or integer, +#' then quotes will be used even if they aren`t strictly necessary. + +# TODO: include "never" when bumping rust-polars to 0.34 +# * `"never"`: This never puts quotes around fields, even if that results in +# invalid CSV data (e.g.: by not quoting strings containing the separator). + +#' @return +#' This doesn't return anything but creates a CSV file. +#' +#' @rdname IO_write_csv +#' +#' @examples +#' dat = pl$DataFrame(mtcars) +#' +#' destination = tempfile(fileext = ".csv") +#' dat$select(pl$col("drat", "mpg"))$write_csv(destination) +#' +#' pl$read_csv(destination) +DataFrame_write_csv = function( + path, + has_header = TRUE, + separator = ",", + line_terminator = "\n", + quote = '"', + batch_size = 1024, + datetime_format = NULL, + date_format = NULL, + time_format = NULL, + float_precision = NULL, + null_values = "", + quote_style = "necessary") { + .pr$DataFrame$write_csv( + self, + path, has_header, separator, line_terminator, quote, batch_size, + datetime_format, date_format, time_format, float_precision, + null_values, quote_style + ) |> + unwrap("in $write_csv():") |> + invisible() +} diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index c39ec7c17..0604a83f3 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -189,6 +189,8 @@ DataFrame$sample_n <- function(n, with_replacement, shuffle, seed) .Call(wrap__D DataFrame$sample_frac <- function(frac, with_replacement, shuffle, seed) .Call(wrap__DataFrame__sample_frac, self, frac, with_replacement, shuffle, seed) +DataFrame$write_csv <- function(path, has_header, separator, line_terminator, quote, batch_size, datetime_format, date_format, time_format, float_precision, null_value, quote_style) .Call(wrap__DataFrame__write_csv, self, path, has_header, separator, line_terminator, quote, batch_size, datetime_format, date_format, time_format, float_precision, null_value, quote_style) + #' @export `$.DataFrame` <- function (self, name) { func <- DataFrame[[name]]; environment(func) <- environment(); func } diff --git a/man/IO_write_csv.Rd b/man/IO_write_csv.Rd new file mode 100644 index 000000000..bf9ed3fc6 --- /dev/null +++ b/man/IO_write_csv.Rd @@ -0,0 +1,78 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataframe__frame.R +\name{DataFrame_write_csv} +\alias{DataFrame_write_csv} +\title{Write to comma-separated values (CSV) file} +\usage{ +DataFrame_write_csv( + path, + has_header = TRUE, + separator = ",", + line_terminator = "\\n", + quote = "\\"", + batch_size = 1024, + datetime_format = NULL, + date_format = NULL, + time_format = NULL, + float_precision = NULL, + null_values = "", + quote_style = "necessary" +) +} +\arguments{ +\item{path}{File path to which the result should be written.} + +\item{has_header}{Whether to include header in the CSV output.} + +\item{separator}{Separate CSV fields with this symbol.} + +\item{line_terminator}{String used to end each row.} + +\item{quote}{Byte to use as quoting character.} + +\item{batch_size}{Number of rows that will be processed per thread.} + +\item{datetime_format}{A format string, with the specifiers defined by the +chrono Rust crate. If no format specified, the default fractional-second +precision is inferred from the maximum timeunit found in the frame’s Datetime +cols (if any).} + +\item{date_format}{A format string, with the specifiers defined by the chrono +Rust crate.} + +\item{time_format}{A format string, with the specifiers defined by the chrono +Rust crate.} + +\item{float_precision}{Number of decimal places to write, applied to both +Float32 and Float64 datatypes.} + +\item{null_values}{A string representing null values (defaulting to the empty +string).} + +\item{quote_style}{Determines the quoting strategy used. +\itemize{ +\item \code{"necessary"} (default): This puts quotes around fields only when necessary. +They are necessary when fields contain a quote, delimiter or record +terminator. Quotes are also necessary when writing an empty record (which +is indistinguishable from a record with one empty field). This is the +default. +\item \code{"always"}: This puts quotes around every field. +\item \code{"non_numeric"}: This puts quotes around all fields that are non-numeric. +Namely, when writing a field that does not parse as a valid float or integer, +then quotes will be used even if they aren`t strictly necessary. +}} +} +\value{ +This doesn't return anything but creates a CSV file. +} +\description{ +Write to comma-separated values (CSV) file +} +\examples{ +dat = pl$DataFrame(mtcars) + +destination = tempfile(fileext = ".csv") +dat$select(pl$col("drat", "mpg"))$write_csv(destination) + +pl$read_csv(destination) +} diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index 26cd804c0..683e73322 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -1314,12 +1314,7 @@ impl Expr { } pub fn dt_time(&self) -> RResult { - Ok(self - .0 - .clone() - .dt() - .time() - .into()) + Ok(self.0.clone().dt().time().into()) } pub fn dt_combine(&self, time: Robj, tu: Robj) -> RResult { diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index 87e41fce0..5d96d7ffe 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -1,5 +1,5 @@ use extendr_api::{extendr, prelude::*, rprintln, Rinternals}; -use polars::prelude::{self as pl, IntoLazy}; +use polars::prelude::{self as pl, IntoLazy, SerWriter}; use std::result::Result; pub mod read_csv; pub mod read_ipc; @@ -442,7 +442,41 @@ impl DataFrame { .map_err(polars_to_rpolars_err) .map(DataFrame) } + + pub fn write_csv( + &self, + path: Robj, + has_header: Robj, + separator: Robj, + line_terminator: Robj, + quote: Robj, + batch_size: Robj, + datetime_format: Robj, + date_format: Robj, + time_format: Robj, + float_precision: Robj, + null_value: Robj, + quote_style: Robj, + ) -> RResult<()> { + let path = robj_to!(str, path)?; + let f = std::fs::File::create(path)?; + pl::CsvWriter::new(f) + .has_header(robj_to!(bool, has_header)?) + .with_delimiter(robj_to!(Utf8Byte, separator)?) + .with_line_terminator(robj_to!(String, line_terminator)?) + .with_quoting_char(robj_to!(Utf8Byte, quote)?) + .with_batch_size(robj_to!(usize, batch_size)?) + .with_datetime_format(robj_to!(Option, String, datetime_format)?) + .with_date_format(robj_to!(Option, String, date_format)?) + .with_time_format(robj_to!(Option, String, time_format)?) + .with_float_precision(robj_to!(Option, usize, float_precision)?) + .with_null_value(robj_to!(String, null_value)?) + .with_quote_style(robj_to!(QuoteStyle, quote_style)?) + .finish(&mut self.0.clone()) + .map_err(polars_to_rpolars_err) + } } + impl DataFrame { pub fn to_list_result(&self) -> Result { //convert DataFrame to Result of to R vectors, error if DataType is not supported diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs index aa28bb6eb..d11715755 100644 --- a/src/rust/src/utils/mod.rs +++ b/src/rust/src/utils/mod.rs @@ -535,6 +535,29 @@ pub fn robj_to_usize(robj: extendr_api::Robj) -> RResult { robj_to_u64(robj).and_then(try_u64_into_usize) } +pub fn robj_to_utf8_byte(robj: extendr_api::Robj) -> RResult { + let mut utf8_byte_iter = robj_to_str(robj)?.as_bytes().iter(); + match (utf8_byte_iter.next(), utf8_byte_iter.next()) { + (Some(s), None) => Ok(*s), + (None, None) => rerr().plain("cannot extract single byte from empty string"), + (Some(_), Some(_)) => rerr().plain("multi byte-string not allowed"), + (None, Some(_)) => unreachable!("the iter() cannot yield Some after None(depleted)"), + } +} + +pub fn robj_to_quote_style(robj: Robj) -> RResult { + match robj_to_str(robj.clone())? { + "always" => Ok(pl::QuoteStyle::Always), + "necessary" => Ok(pl::QuoteStyle::Necessary), + "non_numeric" => Ok(pl::QuoteStyle::NonNumeric), + // "never" is available in rust-polars devel only for now (will be added in 0.34) + // "never" => Ok(QuoteStyle::Never), + _ => rerr() + .plain("a `quote_style` must be 'always', 'necessary' or 'non_numeric'.") + .bad_robj(&robj), + } +} + fn err_no_nan() -> RResult { rerr().plain("any NA value is not allowed here".to_string()) } @@ -885,6 +908,10 @@ macro_rules! robj_to_inner { $crate::utils::robj_to_u8($a) }; + (Utf8Byte, $a:ident) => { + $crate::utils::robj_to_utf8_byte($a) + }; + (char, $a:ident) => { $crate::utils::robj_to_char($a) }; @@ -985,6 +1012,10 @@ macro_rules! robj_to_inner { $crate::utils::robj_to_dataframe($a).map(|lf| lf.0) }; + (QuoteStyle, $a:ident) => { + $crate::utils::robj_to_quote_style($a) + }; + (RArrow_schema, $a:ident) => { $crate::utils::robj_to_rarrow_schema($a) }; diff --git a/tests/testthat/_snaps/csv.md b/tests/testthat/_snaps/csv.md new file mode 100644 index 000000000..84c909389 --- /dev/null +++ b/tests/testthat/_snaps/csv.md @@ -0,0 +1,448 @@ +# write_csv: null_values works + + Code + cat(readLines(path), sep = "\n") + Output + mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb + 21.0,6.0,hello,hello,hello,2.62,16.46,0.0,1.0,4.0,4.0 + 21.0,6.0,160.0,110.0,3.9,2.875,17.02,0.0,1.0,4.0,4.0 + 22.8,4.0,hello,hello,hello,2.32,18.61,1.0,1.0,4.0,1.0 + 21.4,6.0,258.0,110.0,3.08,3.215,19.44,1.0,0.0,3.0,1.0 + 18.7,8.0,360.0,175.0,3.15,3.44,17.02,0.0,0.0,3.0,2.0 + 18.1,6.0,225.0,105.0,2.76,3.46,20.22,1.0,0.0,3.0,1.0 + 14.3,8.0,360.0,245.0,3.21,3.57,15.84,0.0,0.0,3.0,4.0 + 24.4,4.0,146.7,62.0,3.69,3.19,20.0,1.0,0.0,4.0,2.0 + 22.8,4.0,hello,hello,hello,3.15,22.9,1.0,0.0,4.0,2.0 + 19.2,6.0,167.6,123.0,3.92,3.44,18.3,1.0,0.0,4.0,4.0 + 17.8,6.0,167.6,123.0,3.92,3.44,18.9,1.0,0.0,4.0,4.0 + 16.4,8.0,hello,hello,hello,4.07,17.4,0.0,0.0,3.0,3.0 + 17.3,8.0,275.8,180.0,3.07,3.73,17.6,0.0,0.0,3.0,3.0 + 15.2,8.0,275.8,180.0,3.07,3.78,18.0,0.0,0.0,3.0,3.0 + 10.4,8.0,472.0,205.0,2.93,5.25,17.98,0.0,0.0,3.0,4.0 + +# write_csv: separator works + + Code + cat(readLines(path), sep = "\n") + Output + mpg|cyl|disp|hp|drat|wt|qsec|vs|am|gear|carb + 21.0|6.0||||2.62|16.46|0.0|1.0|4.0|4.0 + 21.0|6.0|160.0|110.0|3.9|2.875|17.02|0.0|1.0|4.0|4.0 + 22.8|4.0||||2.32|18.61|1.0|1.0|4.0|1.0 + 21.4|6.0|258.0|110.0|3.08|3.215|19.44|1.0|0.0|3.0|1.0 + 18.7|8.0|360.0|175.0|3.15|3.44|17.02|0.0|0.0|3.0|2.0 + 18.1|6.0|225.0|105.0|2.76|3.46|20.22|1.0|0.0|3.0|1.0 + 14.3|8.0|360.0|245.0|3.21|3.57|15.84|0.0|0.0|3.0|4.0 + 24.4|4.0|146.7|62.0|3.69|3.19|20.0|1.0|0.0|4.0|2.0 + 22.8|4.0||||3.15|22.9|1.0|0.0|4.0|2.0 + 19.2|6.0|167.6|123.0|3.92|3.44|18.3|1.0|0.0|4.0|4.0 + 17.8|6.0|167.6|123.0|3.92|3.44|18.9|1.0|0.0|4.0|4.0 + 16.4|8.0||||4.07|17.4|0.0|0.0|3.0|3.0 + 17.3|8.0|275.8|180.0|3.07|3.73|17.6|0.0|0.0|3.0|3.0 + 15.2|8.0|275.8|180.0|3.07|3.78|18.0|0.0|0.0|3.0|3.0 + 10.4|8.0|472.0|205.0|2.93|5.25|17.98|0.0|0.0|3.0|4.0 + +# write_csv: quote_style and quote works + + Code + cat(readLines(path), sep = "\n") + Output + +Sepal.Length+,+Sepal.Width+,+Petal.Length+,+Petal.Width+,+Species+ + +5.1+,+3.5+,+1.4+,+0.2+,+setosa+ + +4.9+,+3.0+,+1.4+,+0.2+,+setosa+ + +4.7+,+3.2+,+1.3+,+0.2+,+setosa+ + +4.6+,+3.1+,+1.5+,+0.2+,+setosa+ + +5.0+,+3.6+,+1.4+,+0.2+,+setosa+ + +5.4+,+3.9+,+1.7+,+0.4+,+setosa+ + +4.6+,+3.4+,+1.4+,+0.3+,+setosa+ + +5.0+,+3.4+,+1.5+,+0.2+,+setosa+ + +4.4+,+2.9+,+1.4+,+0.2+,+setosa+ + +4.9+,+3.1+,+1.5+,+0.1+,+setosa+ + +5.4+,+3.7+,+1.5+,+0.2+,+setosa+ + +4.8+,+3.4+,+1.6+,+0.2+,+setosa+ + +4.8+,+3.0+,+1.4+,+0.1+,+setosa+ + +4.3+,+3.0+,+1.1+,+0.1+,+setosa+ + +5.8+,+4.0+,+1.2+,+0.2+,+setosa+ + +5.7+,+4.4+,+1.5+,+0.4+,+setosa+ + +5.4+,+3.9+,+1.3+,+0.4+,+setosa+ + +5.1+,+3.5+,+1.4+,+0.3+,+setosa+ + +5.7+,+3.8+,+1.7+,+0.3+,+setosa+ + +5.1+,+3.8+,+1.5+,+0.3+,+setosa+ + +5.4+,+3.4+,+1.7+,+0.2+,+setosa+ + +5.1+,+3.7+,+1.5+,+0.4+,+setosa+ + +4.6+,+3.6+,+1.0+,+0.2+,+setosa+ + +5.1+,+3.3+,+1.7+,+0.5+,+setosa+ + +4.8+,+3.4+,+1.9+,+0.2+,+setosa+ + +5.0+,+3.0+,+1.6+,+0.2+,+setosa+ + +5.0+,+3.4+,+1.6+,+0.4+,+setosa+ + +5.2+,+3.5+,+1.5+,+0.2+,+setosa+ + +5.2+,+3.4+,+1.4+,+0.2+,+setosa+ + +4.7+,+3.2+,+1.6+,+0.2+,+setosa+ + +4.8+,+3.1+,+1.6+,+0.2+,+setosa+ + +5.4+,+3.4+,+1.5+,+0.4+,+setosa+ + +5.2+,+4.1+,+1.5+,+0.1+,+setosa+ + +5.5+,+4.2+,+1.4+,+0.2+,+setosa+ + +4.9+,+3.1+,+1.5+,+0.2+,+setosa+ + +5.0+,+3.2+,+1.2+,+0.2+,+setosa+ + +5.5+,+3.5+,+1.3+,+0.2+,+setosa+ + +4.9+,+3.6+,+1.4+,+0.1+,+setosa+ + +4.4+,+3.0+,+1.3+,+0.2+,+setosa+ + +5.1+,+3.4+,+1.5+,+0.2+,+setosa+ + +5.0+,+3.5+,+1.3+,+0.3+,+setosa+ + +4.5+,+2.3+,+1.3+,+0.3+,+setosa+ + +4.4+,+3.2+,+1.3+,+0.2+,+setosa+ + +5.0+,+3.5+,+1.6+,+0.6+,+setosa+ + +5.1+,+3.8+,+1.9+,+0.4+,+setosa+ + +4.8+,+3.0+,+1.4+,+0.3+,+setosa+ + +5.1+,+3.8+,+1.6+,+0.2+,+setosa+ + +4.6+,+3.2+,+1.4+,+0.2+,+setosa+ + +5.3+,+3.7+,+1.5+,+0.2+,+setosa+ + +5.0+,+3.3+,+1.4+,+0.2+,+setosa+ + +7.0+,+3.2+,+4.7+,+1.4+,+versicolor+ + +6.4+,+3.2+,+4.5+,+1.5+,+versicolor+ + +6.9+,+3.1+,+4.9+,+1.5+,+versicolor+ + +5.5+,+2.3+,+4.0+,+1.3+,+versicolor+ + +6.5+,+2.8+,+4.6+,+1.5+,+versicolor+ + +5.7+,+2.8+,+4.5+,+1.3+,+versicolor+ + +6.3+,+3.3+,+4.7+,+1.6+,+versicolor+ + +4.9+,+2.4+,+3.3+,+1.0+,+versicolor+ + +6.6+,+2.9+,+4.6+,+1.3+,+versicolor+ + +5.2+,+2.7+,+3.9+,+1.4+,+versicolor+ + +5.0+,+2.0+,+3.5+,+1.0+,+versicolor+ + +5.9+,+3.0+,+4.2+,+1.5+,+versicolor+ + +6.0+,+2.2+,+4.0+,+1.0+,+versicolor+ + +6.1+,+2.9+,+4.7+,+1.4+,+versicolor+ + +5.6+,+2.9+,+3.6+,+1.3+,+versicolor+ + +6.7+,+3.1+,+4.4+,+1.4+,+versicolor+ + +5.6+,+3.0+,+4.5+,+1.5+,+versicolor+ + +5.8+,+2.7+,+4.1+,+1.0+,+versicolor+ + +6.2+,+2.2+,+4.5+,+1.5+,+versicolor+ + +5.6+,+2.5+,+3.9+,+1.1+,+versicolor+ + +5.9+,+3.2+,+4.8+,+1.8+,+versicolor+ + +6.1+,+2.8+,+4.0+,+1.3+,+versicolor+ + +6.3+,+2.5+,+4.9+,+1.5+,+versicolor+ + +6.1+,+2.8+,+4.7+,+1.2+,+versicolor+ + +6.4+,+2.9+,+4.3+,+1.3+,+versicolor+ + +6.6+,+3.0+,+4.4+,+1.4+,+versicolor+ + +6.8+,+2.8+,+4.8+,+1.4+,+versicolor+ + +6.7+,+3.0+,+5.0+,+1.7+,+versicolor+ + +6.0+,+2.9+,+4.5+,+1.5+,+versicolor+ + +5.7+,+2.6+,+3.5+,+1.0+,+versicolor+ + +5.5+,+2.4+,+3.8+,+1.1+,+versicolor+ + +5.5+,+2.4+,+3.7+,+1.0+,+versicolor+ + +5.8+,+2.7+,+3.9+,+1.2+,+versicolor+ + +6.0+,+2.7+,+5.1+,+1.6+,+versicolor+ + +5.4+,+3.0+,+4.5+,+1.5+,+versicolor+ + +6.0+,+3.4+,+4.5+,+1.6+,+versicolor+ + +6.7+,+3.1+,+4.7+,+1.5+,+versicolor+ + +6.3+,+2.3+,+4.4+,+1.3+,+versicolor+ + +5.6+,+3.0+,+4.1+,+1.3+,+versicolor+ + +5.5+,+2.5+,+4.0+,+1.3+,+versicolor+ + +5.5+,+2.6+,+4.4+,+1.2+,+versicolor+ + +6.1+,+3.0+,+4.6+,+1.4+,+versicolor+ + +5.8+,+2.6+,+4.0+,+1.2+,+versicolor+ + +5.0+,+2.3+,+3.3+,+1.0+,+versicolor+ + +5.6+,+2.7+,+4.2+,+1.3+,+versicolor+ + +5.7+,+3.0+,+4.2+,+1.2+,+versicolor+ + +5.7+,+2.9+,+4.2+,+1.3+,+versicolor+ + +6.2+,+2.9+,+4.3+,+1.3+,+versicolor+ + +5.1+,+2.5+,+3.0+,+1.1+,+versicolor+ + +5.7+,+2.8+,+4.1+,+1.3+,+versicolor+ + +6.3+,+3.3+,+6.0+,+2.5+,+virginica+ + +5.8+,+2.7+,+5.1+,+1.9+,+virginica+ + +7.1+,+3.0+,+5.9+,+2.1+,+virginica+ + +6.3+,+2.9+,+5.6+,+1.8+,+virginica+ + +6.5+,+3.0+,+5.8+,+2.2+,+virginica+ + +7.6+,+3.0+,+6.6+,+2.1+,+virginica+ + +4.9+,+2.5+,+4.5+,+1.7+,+virginica+ + +7.3+,+2.9+,+6.3+,+1.8+,+virginica+ + +6.7+,+2.5+,+5.8+,+1.8+,+virginica+ + +7.2+,+3.6+,+6.1+,+2.5+,+virginica+ + +6.5+,+3.2+,+5.1+,+2.0+,+virginica+ + +6.4+,+2.7+,+5.3+,+1.9+,+virginica+ + +6.8+,+3.0+,+5.5+,+2.1+,+virginica+ + +5.7+,+2.5+,+5.0+,+2.0+,+virginica+ + +5.8+,+2.8+,+5.1+,+2.4+,+virginica+ + +6.4+,+3.2+,+5.3+,+2.3+,+virginica+ + +6.5+,+3.0+,+5.5+,+1.8+,+virginica+ + +7.7+,+3.8+,+6.7+,+2.2+,+virginica+ + +7.7+,+2.6+,+6.9+,+2.3+,+virginica+ + +6.0+,+2.2+,+5.0+,+1.5+,+virginica+ + +6.9+,+3.2+,+5.7+,+2.3+,+virginica+ + +5.6+,+2.8+,+4.9+,+2.0+,+virginica+ + +7.7+,+2.8+,+6.7+,+2.0+,+virginica+ + +6.3+,+2.7+,+4.9+,+1.8+,+virginica+ + +6.7+,+3.3+,+5.7+,+2.1+,+virginica+ + +7.2+,+3.2+,+6.0+,+1.8+,+virginica+ + +6.2+,+2.8+,+4.8+,+1.8+,+virginica+ + +6.1+,+3.0+,+4.9+,+1.8+,+virginica+ + +6.4+,+2.8+,+5.6+,+2.1+,+virginica+ + +7.2+,+3.0+,+5.8+,+1.6+,+virginica+ + +7.4+,+2.8+,+6.1+,+1.9+,+virginica+ + +7.9+,+3.8+,+6.4+,+2.0+,+virginica+ + +6.4+,+2.8+,+5.6+,+2.2+,+virginica+ + +6.3+,+2.8+,+5.1+,+1.5+,+virginica+ + +6.1+,+2.6+,+5.6+,+1.4+,+virginica+ + +7.7+,+3.0+,+6.1+,+2.3+,+virginica+ + +6.3+,+3.4+,+5.6+,+2.4+,+virginica+ + +6.4+,+3.1+,+5.5+,+1.8+,+virginica+ + +6.0+,+3.0+,+4.8+,+1.8+,+virginica+ + +6.9+,+3.1+,+5.4+,+2.1+,+virginica+ + +6.7+,+3.1+,+5.6+,+2.4+,+virginica+ + +6.9+,+3.1+,+5.1+,+2.3+,+virginica+ + +5.8+,+2.7+,+5.1+,+1.9+,+virginica+ + +6.8+,+3.2+,+5.9+,+2.3+,+virginica+ + +6.7+,+3.3+,+5.7+,+2.5+,+virginica+ + +6.7+,+3.0+,+5.2+,+2.3+,+virginica+ + +6.3+,+2.5+,+5.0+,+1.9+,+virginica+ + +6.5+,+3.0+,+5.2+,+2.0+,+virginica+ + +6.2+,+3.4+,+5.4+,+2.3+,+virginica+ + +5.9+,+3.0+,+5.1+,+1.8+,+virginica+ + +--- + + Code + cat(readLines(path), sep = "\n") + Output + +Sepal.Length+,+Sepal.Width+,+Petal.Length+,+Petal.Width+,+Species+ + 5.1,3.5,1.4,0.2,+setosa+ + 4.9,3.0,1.4,0.2,+setosa+ + 4.7,3.2,1.3,0.2,+setosa+ + 4.6,3.1,1.5,0.2,+setosa+ + 5.0,3.6,1.4,0.2,+setosa+ + 5.4,3.9,1.7,0.4,+setosa+ + 4.6,3.4,1.4,0.3,+setosa+ + 5.0,3.4,1.5,0.2,+setosa+ + 4.4,2.9,1.4,0.2,+setosa+ + 4.9,3.1,1.5,0.1,+setosa+ + 5.4,3.7,1.5,0.2,+setosa+ + 4.8,3.4,1.6,0.2,+setosa+ + 4.8,3.0,1.4,0.1,+setosa+ + 4.3,3.0,1.1,0.1,+setosa+ + 5.8,4.0,1.2,0.2,+setosa+ + 5.7,4.4,1.5,0.4,+setosa+ + 5.4,3.9,1.3,0.4,+setosa+ + 5.1,3.5,1.4,0.3,+setosa+ + 5.7,3.8,1.7,0.3,+setosa+ + 5.1,3.8,1.5,0.3,+setosa+ + 5.4,3.4,1.7,0.2,+setosa+ + 5.1,3.7,1.5,0.4,+setosa+ + 4.6,3.6,1.0,0.2,+setosa+ + 5.1,3.3,1.7,0.5,+setosa+ + 4.8,3.4,1.9,0.2,+setosa+ + 5.0,3.0,1.6,0.2,+setosa+ + 5.0,3.4,1.6,0.4,+setosa+ + 5.2,3.5,1.5,0.2,+setosa+ + 5.2,3.4,1.4,0.2,+setosa+ + 4.7,3.2,1.6,0.2,+setosa+ + 4.8,3.1,1.6,0.2,+setosa+ + 5.4,3.4,1.5,0.4,+setosa+ + 5.2,4.1,1.5,0.1,+setosa+ + 5.5,4.2,1.4,0.2,+setosa+ + 4.9,3.1,1.5,0.2,+setosa+ + 5.0,3.2,1.2,0.2,+setosa+ + 5.5,3.5,1.3,0.2,+setosa+ + 4.9,3.6,1.4,0.1,+setosa+ + 4.4,3.0,1.3,0.2,+setosa+ + 5.1,3.4,1.5,0.2,+setosa+ + 5.0,3.5,1.3,0.3,+setosa+ + 4.5,2.3,1.3,0.3,+setosa+ + 4.4,3.2,1.3,0.2,+setosa+ + 5.0,3.5,1.6,0.6,+setosa+ + 5.1,3.8,1.9,0.4,+setosa+ + 4.8,3.0,1.4,0.3,+setosa+ + 5.1,3.8,1.6,0.2,+setosa+ + 4.6,3.2,1.4,0.2,+setosa+ + 5.3,3.7,1.5,0.2,+setosa+ + 5.0,3.3,1.4,0.2,+setosa+ + 7.0,3.2,4.7,1.4,+versicolor+ + 6.4,3.2,4.5,1.5,+versicolor+ + 6.9,3.1,4.9,1.5,+versicolor+ + 5.5,2.3,4.0,1.3,+versicolor+ + 6.5,2.8,4.6,1.5,+versicolor+ + 5.7,2.8,4.5,1.3,+versicolor+ + 6.3,3.3,4.7,1.6,+versicolor+ + 4.9,2.4,3.3,1.0,+versicolor+ + 6.6,2.9,4.6,1.3,+versicolor+ + 5.2,2.7,3.9,1.4,+versicolor+ + 5.0,2.0,3.5,1.0,+versicolor+ + 5.9,3.0,4.2,1.5,+versicolor+ + 6.0,2.2,4.0,1.0,+versicolor+ + 6.1,2.9,4.7,1.4,+versicolor+ + 5.6,2.9,3.6,1.3,+versicolor+ + 6.7,3.1,4.4,1.4,+versicolor+ + 5.6,3.0,4.5,1.5,+versicolor+ + 5.8,2.7,4.1,1.0,+versicolor+ + 6.2,2.2,4.5,1.5,+versicolor+ + 5.6,2.5,3.9,1.1,+versicolor+ + 5.9,3.2,4.8,1.8,+versicolor+ + 6.1,2.8,4.0,1.3,+versicolor+ + 6.3,2.5,4.9,1.5,+versicolor+ + 6.1,2.8,4.7,1.2,+versicolor+ + 6.4,2.9,4.3,1.3,+versicolor+ + 6.6,3.0,4.4,1.4,+versicolor+ + 6.8,2.8,4.8,1.4,+versicolor+ + 6.7,3.0,5.0,1.7,+versicolor+ + 6.0,2.9,4.5,1.5,+versicolor+ + 5.7,2.6,3.5,1.0,+versicolor+ + 5.5,2.4,3.8,1.1,+versicolor+ + 5.5,2.4,3.7,1.0,+versicolor+ + 5.8,2.7,3.9,1.2,+versicolor+ + 6.0,2.7,5.1,1.6,+versicolor+ + 5.4,3.0,4.5,1.5,+versicolor+ + 6.0,3.4,4.5,1.6,+versicolor+ + 6.7,3.1,4.7,1.5,+versicolor+ + 6.3,2.3,4.4,1.3,+versicolor+ + 5.6,3.0,4.1,1.3,+versicolor+ + 5.5,2.5,4.0,1.3,+versicolor+ + 5.5,2.6,4.4,1.2,+versicolor+ + 6.1,3.0,4.6,1.4,+versicolor+ + 5.8,2.6,4.0,1.2,+versicolor+ + 5.0,2.3,3.3,1.0,+versicolor+ + 5.6,2.7,4.2,1.3,+versicolor+ + 5.7,3.0,4.2,1.2,+versicolor+ + 5.7,2.9,4.2,1.3,+versicolor+ + 6.2,2.9,4.3,1.3,+versicolor+ + 5.1,2.5,3.0,1.1,+versicolor+ + 5.7,2.8,4.1,1.3,+versicolor+ + 6.3,3.3,6.0,2.5,+virginica+ + 5.8,2.7,5.1,1.9,+virginica+ + 7.1,3.0,5.9,2.1,+virginica+ + 6.3,2.9,5.6,1.8,+virginica+ + 6.5,3.0,5.8,2.2,+virginica+ + 7.6,3.0,6.6,2.1,+virginica+ + 4.9,2.5,4.5,1.7,+virginica+ + 7.3,2.9,6.3,1.8,+virginica+ + 6.7,2.5,5.8,1.8,+virginica+ + 7.2,3.6,6.1,2.5,+virginica+ + 6.5,3.2,5.1,2.0,+virginica+ + 6.4,2.7,5.3,1.9,+virginica+ + 6.8,3.0,5.5,2.1,+virginica+ + 5.7,2.5,5.0,2.0,+virginica+ + 5.8,2.8,5.1,2.4,+virginica+ + 6.4,3.2,5.3,2.3,+virginica+ + 6.5,3.0,5.5,1.8,+virginica+ + 7.7,3.8,6.7,2.2,+virginica+ + 7.7,2.6,6.9,2.3,+virginica+ + 6.0,2.2,5.0,1.5,+virginica+ + 6.9,3.2,5.7,2.3,+virginica+ + 5.6,2.8,4.9,2.0,+virginica+ + 7.7,2.8,6.7,2.0,+virginica+ + 6.3,2.7,4.9,1.8,+virginica+ + 6.7,3.3,5.7,2.1,+virginica+ + 7.2,3.2,6.0,1.8,+virginica+ + 6.2,2.8,4.8,1.8,+virginica+ + 6.1,3.0,4.9,1.8,+virginica+ + 6.4,2.8,5.6,2.1,+virginica+ + 7.2,3.0,5.8,1.6,+virginica+ + 7.4,2.8,6.1,1.9,+virginica+ + 7.9,3.8,6.4,2.0,+virginica+ + 6.4,2.8,5.6,2.2,+virginica+ + 6.3,2.8,5.1,1.5,+virginica+ + 6.1,2.6,5.6,1.4,+virginica+ + 7.7,3.0,6.1,2.3,+virginica+ + 6.3,3.4,5.6,2.4,+virginica+ + 6.4,3.1,5.5,1.8,+virginica+ + 6.0,3.0,4.8,1.8,+virginica+ + 6.9,3.1,5.4,2.1,+virginica+ + 6.7,3.1,5.6,2.4,+virginica+ + 6.9,3.1,5.1,2.3,+virginica+ + 5.8,2.7,5.1,1.9,+virginica+ + 6.8,3.2,5.9,2.3,+virginica+ + 6.7,3.3,5.7,2.5,+virginica+ + 6.7,3.0,5.2,2.3,+virginica+ + 6.3,2.5,5.0,1.9,+virginica+ + 6.5,3.0,5.2,2.0,+virginica+ + 6.2,3.4,5.4,2.3,+virginica+ + 5.9,3.0,5.1,1.8,+virginica+ + +# write_csv: quote_style quote_style=necessary + + Code + cat(readLines(path), sep = "\n") + Output + a,b,c + """foo""",1,a + bar,2,b + +# write_csv: quote_style quote_style=always + + Code + cat(readLines(path), sep = "\n") + Output + "a","b","c" + """foo""","1","a" + "bar","2","b" + +# write_csv: quote_style quote_style=non_numeric + + Code + cat(readLines(path), sep = "\n") + Output + "a","b","c" + """foo""",1,"a" + "bar",2,"b" + +# write_csv: date_format works + + Code + cat(readLines(path), sep = "\n") + Output + date + 2020 + 2021 + 2022 + 2023 + +--- + + Code + cat(readLines(path), sep = "\n") + Output + date + 01/01/2020 + 01/01/2021 + 01/01/2022 + 01/01/2023 + +# write_csv: datetime_format works + + Code + cat(readLines(path), sep = "\n") + Output + date + 00h00m - 01/01/2020 + 06h00m - 01/01/2020 + 12h00m - 01/01/2020 + 18h00m - 01/01/2020 + 00h00m - 02/01/2020 + +# write_csv: time_format works + + Code + cat(readLines(path), sep = "\n") + Output + date + 00h00m00s + 08h00m00s + 16h00m00s + 00h00m00s + +# write_csv: float_precision works + + Code + cat(readLines(path), sep = "\n") + Output + x + 1.2 + 5.6 + +--- + + Code + cat(readLines(path), sep = "\n") + Output + x + 1.234 + 5.600 + diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R index 5c4e3a460..73325813c 100644 --- a/tests/testthat/helper.R +++ b/tests/testthat/helper.R @@ -84,3 +84,7 @@ expect_rpolarserr = function(expr, ctxs) { expect_identical(class(res$err), "RPolarsErr") expect_identical(names(res$err$contexts()), ctxs) } + +expect_snapshot_file = function(path, ...) { + expect_snapshot(readLines(path) |> cat(sep = "\n"), ...) +} diff --git a/tests/testthat/test-csv.R b/tests/testthat/test-csv.R index 5fa7a0e76..0933e5379 100644 --- a/tests/testthat/test-csv.R +++ b/tests/testthat/test-csv.R @@ -22,3 +22,130 @@ test_that("csv read iris", { iris ) }) + + +dat = head(mtcars, n = 15) +dat[c(1, 3, 9, 12), c(3, 4, 5)] = NA +dat_pl = pl$DataFrame(dat) +temp_noext = tempfile() +temp_out = tempfile(fileext = ".csv") + +test_that("write_csv: path works", { + dat_pl$write_csv(temp_out) + expect_identical( + pl$read_csv(temp_out)$to_data_frame(), + dat, + ignore_attr = TRUE # rownames are lost when writing / reading from CSV + ) +}) + +test_that("write_csv: null_values works", { + expect_error( + dat_pl$write_csv(temp_out, null_values = NULL) + ) + dat_pl$write_csv(temp_out, null_values = "hello") + expect_snapshot_file(temp_out) +}) + + +test_that("write_csv: separator works", { + dat_pl$write_csv(temp_out, separator = "|") + expect_snapshot_file(temp_out) +}) + +test_that("write_csv: quote_style and quote works", { + dat_pl2 = pl$DataFrame(iris) + + # wrong quote_style + ctx = dat_pl2$write_csv(temp_out, quote_style = "foo") |> get_err_ctx() + expect_identical(ctx$BadArgument, "quote_style") + expect_identical(ctx$Plain, "a `quote_style` must be 'always', 'necessary' or 'non_numeric'.") + + # wrong quote_style type + ctx = dat_pl2$write_csv(temp_out, quote_style = 42) |> get_err_ctx() + expect_identical(ctx$TypeMismatch, "&str") + + # ok quote_style and quote + dat_pl2$write_csv(temp_out, quote_style = "always", quote = "+") + expect_snapshot_file(temp_out) + + # ok also + ctx = dat_pl2$write_csv(temp_out, quote_style = "non_numeric", quote = "+") + expect_snapshot_file(temp_out) + + # zero byte quote + ctx = dat_pl2$write_csv(temp_out, quote = "") |> get_err_ctx() + expect_identical(ctx$Plain, "cannot extract single byte from empty string") + + # multi byte quote not allowed + ctx = dat_pl2$write_csv(temp_out, quote = "£") |> get_err_ctx() + expect_identical(ctx$Plain, "multi byte-string not allowed") + + # multi string not allowed + ctx = dat_pl2$write_csv(temp_out, quote = c("a", "b")) |> get_err_ctx() + expect_identical(ctx$TypeMismatch, "&str") +}) + +patrick::with_parameters_test_that( + "write_csv: quote_style", + { + df = pl$DataFrame( + a = c(r"("foo")", "bar"), + b = 1:2, + c = letters[1:2] + )$write_csv(temp_out, quote_style = quote_style) + expect_snapshot_file(temp_out) + }, + quote_style = c("necessary", "always", "non_numeric") +) + +test_that("write_csv: date_format works", { + dat = pl$DataFrame( + date = pl$date_range( + as.Date("2020-01-01"), + as.Date("2023-01-02"), + interval = "1y", + eager = TRUE + ) + ) + dat$write_csv(temp_out, date_format = "%Y") + expect_snapshot_file(temp_out) + dat$write_csv(temp_out, date_format = "%d/%m/%Y") + expect_snapshot_file(temp_out) +}) + +test_that("write_csv: datetime_format works", { + dat = pl$DataFrame( + date = pl$date_range( + as.Date("2020-01-01"), + as.Date("2020-01-02"), + interval = "6h", + eager = TRUE + ) + ) + dat$write_csv(temp_out, datetime_format = "%Hh%Mm - %d/%m/%Y") + expect_snapshot_file(temp_out) +}) + +test_that("write_csv: time_format works", { + dat = pl$DataFrame( + date = pl$date_range( + as.Date("2020-10-17"), + as.Date("2020-10-18"), + "8h", + eager = TRUE + ) + )$with_columns(pl$col("date")$dt$time()) + dat$write_csv(temp_out, time_format = "%Hh%Mm%Ss") + expect_snapshot_file(temp_out) +}) + + +test_that("write_csv: float_precision works", { + dat = pl$DataFrame(x = c(1.234, 5.6)) + dat$write_csv(temp_out, float_precision = 1) + expect_snapshot_file(temp_out) + + dat$write_csv(temp_out, float_precision = 3) + expect_snapshot_file(temp_out) +})