From 4d3962e96a6eae6c888020e443be77344d80103e Mon Sep 17 00:00:00 2001 From: Flavio Poletti Date: Thu, 27 Jan 2022 10:34:40 +0100 Subject: [PATCH] functions to convert empty strings to NA and vice versa --- NAMESPACE | 1 + R/io.R | 3 +++ R/utils.R | 39 +++++++++++++++++++++++++++++++++++++ man/empty_strings_to_na.Rd | 17 ++++++++++++++++ man/na_to_empty_strings.Rd | 14 +++++++++++++ tests/testthat/test-utils.R | 18 +++++++++++++++++ 6 files changed, 92 insertions(+) create mode 100644 man/empty_strings_to_na.Rd create mode 100644 man/na_to_empty_strings.Rd diff --git a/NAMESPACE b/NAMESPACE index 2ec062a5..497c75ee 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ S3method(plot,tidygtfs) S3method(print,tidygtfs) S3method(summary,tidygtfs) export(cluster_stops) +export(empty_strings_to_na) export(filter_feed_by_area) export(filter_feed_by_date) export(filter_feed_by_stops) diff --git a/R/io.R b/R/io.R index be1f3788..cecca559 100644 --- a/R/io.R +++ b/R/io.R @@ -62,6 +62,9 @@ write_gtfs <- function(gtfs_obj, zipfile, compression_level = 9, as_dir = FALSE) # convert sf tables gtfs_out = sf_as_tbl(gtfs_obj) + # convert NA to empty strings + gtfs_out <- na_to_empty_strings(gtfs_out) + # data.tables gtfs_out <- gtfs_out[names(gtfs_out) != "."] gtfs_out <- lapply(gtfs_out, as.data.table) diff --git a/R/utils.R b/R/utils.R index 27e326e6..575ef49e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -6,3 +6,42 @@ feed_contains <- function(gtfs_obj, table_name) { exists(table_name, where = gtfs_obj) || (exists(".", where = gtfs_obj) && exists(table_name, where = gtfs_obj$.)) } + +#' Convert empty strings ("") to NA values in gtfs tables +#' +#' @param gtfs_obj tidygtfs object +#' +#' @return a gtfs_obj where all empty strings in tables have been replaced with NA +#' +#' @export +empty_strings_to_na = function(gtfs_obj) { + tbl_names = names(gtfs_obj) + tbl_names <- tbl_names[tbl_names != "."] + for(tbl in tbl_names) { + if(inherits(gtfs_obj[[tbl]], "data.frame")) { + gtfs_obj[[tbl]][gtfs_obj[[tbl]] == ""] <- NA + } + } + gtfs_obj +} + +#' Convert NA values to empty strings ("") +#' +#' @param gtfs_obj tidygtfs object +na_to_empty_strings = function(gtfs_obj) { + lapply(gtfs_obj, function(df) { + if(inherits(df, "data.frame")) { + df2 = lapply(df, function(.col) { + if(is.character(.col)) { + .col[is.na(.col)] <- "" + } + .col + }) + attributes(df2) <- attributes(df) + df <- df2 + } else if(inherits(df, "list")) { + df <- na_to_empty_strings(df) + } + df + }) +} diff --git a/man/empty_strings_to_na.Rd b/man/empty_strings_to_na.Rd new file mode 100644 index 00000000..a2671e9c --- /dev/null +++ b/man/empty_strings_to_na.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{empty_strings_to_na} +\alias{empty_strings_to_na} +\title{Convert empty strings ("") to NA values in gtfs tables} +\usage{ +empty_strings_to_na(gtfs_obj) +} +\arguments{ +\item{gtfs_obj}{tidygtfs object} +} +\value{ +a gtfs_obj where all empty strings in tables have been replaced with NA +} +\description{ +Convert empty strings ("") to NA values in gtfs tables +} diff --git a/man/na_to_empty_strings.Rd b/man/na_to_empty_strings.Rd new file mode 100644 index 00000000..45d71716 --- /dev/null +++ b/man/na_to_empty_strings.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{na_to_empty_strings} +\alias{na_to_empty_strings} +\title{Convert NA values to empty strings ("")} +\usage{ +na_to_empty_strings(gtfs_obj) +} +\arguments{ +\item{gtfs_obj}{tidygtfs object} +} +\description{ +Convert NA values to empty strings ("") +} diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 3370439f..6b633ccf 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -100,3 +100,21 @@ test_that("gtfs_meta", { # empty test expect_equal(gtfs_meta, get_gtfs_meta()) }) +test_that("empty_strings_to_na", { + gpath = system.file("extdata", "sample-feed-fixed.zip", package = "tidytransit") + g1 = read_gtfs(gpath) + g_na = empty_strings_to_na(g1) + g2 = na_to_empty_strings(g_na) + + for(tbl in names(g1)) { + expect_equal(g1[[tbl]], g2[[tbl]]) + } + + tmppath = tempfile(fileext = ".zip") + write_gtfs(g_na, tmppath) + g3 = read_gtfs(tmppath) + + for(tbl in names(g1)) { + expect_equal(g1[[tbl]], g3[[tbl]]) + } +})