Merge branch 'Release-0.1.4'

wangyuchen · Feb 28, 2017 · cd70a2c · cd70a2c
2 parents 8ca4e38 + 33c4fe5
commit cd70a2c
Show file tree

Hide file tree

Showing 11 changed files with 203 additions and 34 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: extdplyr
 Type: Package
-Title: Data Manipulation Extension Based on 'Dplyr' and 'Tidyr'
-Version: 0.1.3
+Title: Data Manipulation Extensions of 'Dplyr' and 'Tidyr'
+Version: 0.1.4
 Authors@R: person("Yuchen", "Wang", email = "[email protected]",
     role = c("aut", "cre"))
 Description: If 'dplyr' is a grammar for data manipulation, 'extdplyr' is like
@@ -17,3 +17,5 @@ Imports:
     tidyr,
     lazyeval
 RoxygenNote: 6.0.1
+Suggests: testthat,
+    data.table
diff --git a/NAMESPACE b/NAMESPACE
@@ -11,5 +11,5 @@ export(pct_routine)
 export(pct_routine_)
 export(tally_pct)
 export(tally_pct_)
-importFrom(dplyr,"%>%")
 importFrom(lazyeval,interp)
+importFrom(tidyr,"%>%")
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# extdplyr 0.1.4
+
+* Added tests for `ind_to_char_` to check compatibility for `tbl_df` and `data.table`.
+* Adapted `append_col` and `append_df` from `tidyr`.
+
+
 # extdplyr 0.1.3
 
 * Added a `NEWS.md` file to track changes to the package.

diff --git a/R/grp_routine.R b/R/grp_routine.R
@@ -19,15 +19,14 @@ grp_routine <- function(data, col, ..., ret_factor = FALSE) {
 
 #' @describeIn grp_routine SE version of grp_routine.
 #' @export
-grp_routine_ <- function(data, col, ..., .dots,
-                            ret_factor = FALSE) {
+grp_routine_ <- function(data, col, ..., .dots, ret_factor = FALSE) {
   conds <- lazyeval::all_dots(.dots, ..., all_named = TRUE)
 
-  ret <- data %>%
-    dplyr::transmute_(.dots = conds) %>%
-    ind_to_char_(col, names(conds), ret_factor = ret_factor)
-
-  dplyr::bind_cols(data, ret)
+  data %>%
+    dplyr::mutate_(.dots = conds) %>%
+    ind_to_char_(col, names(conds), ret_factor = ret_factor,
+                 remove = TRUE, mutually_exclusive = TRUE,
+                 collectively_exhaustive = TRUE)
 }
 
 
@@ -72,9 +71,8 @@ ind_to_char_ <- function(data, col, from, ret_factor = FALSE, remove = TRUE,
   # According to coercion rule, logical - integer - double - character,
   # Here convert to logical first for safety.
 
-
-  int_df <- dplyr::mutate_all(data[from],
-                              dplyr::funs_(quote(as.integer(as.logical(.)))))
+  int_df <- data[from]
+  int_df[] <- lapply(int_df, function(x) as.integer(as.logical(x)))
 
   rs <- rowSums(int_df)
 
@@ -97,9 +95,14 @@ ind_to_char_ <- function(data, col, from, ret_factor = FALSE, remove = TRUE,
 
   if (ret_factor) char_vec <- as.factor(char_vec)
 
-  ret <- dplyr::mutate_(data, .dots = named_expr(col, ~ char_vec))
+  first_col <- which(names(data) %in% from)[1]
+  ret <- append_col(data, char_vec, col, first_col - 1)
+
+  # Give back groups
+  if (dplyr::is.grouped_df(data))
+    ret <- dplyr::group_by_(ret, .dots = dplyr::groups(data))
 
-  if (remove) ret <- dplyr::select(ret, -dplyr::one_of(from))
+  if (remove) ret <- ret[setdiff(names(ret), from)]
 
   ret
 }

diff --git a/R/imports.R b/R/imports.R
@@ -1,4 +1,4 @@
-#' @importFrom dplyr %>%
+#' @importFrom tidyr %>%
 #' @export
 dplyr::`%>%`
 

diff --git a/R/utilities.R b/R/utilities.R
@@ -25,3 +25,19 @@ check_se_column <- function(col_name) {
 common_params <- function(data, col, .dots) {
   NULL
 }
+
+
+# Adapted from tidyr
+append_df <- function (x, values, after = length(x)) {
+  y <- append(x, values, after = after)
+  class(y) <- class(x)
+  attr(y, "row.names") <- attr(x, "row.names")
+  y
+}
+
+append_col <- function (x, col, name, after = length(x)) {
+  name <- enc2utf8(name)
+  append_df(x, named_expr(name, col), after = after)
+}
+
+
diff --git a/cran-comments.md b/cran-comments.md
@@ -6,27 +6,14 @@
 ## R CMD check results
 There were no ERRORs or WARNINGs.
 
-win-builder check has 1 note:
+There was 1 NOTES on win-builder:
 
 ```
-New submission
-```
-
+* checking CRAN incoming feasibility ... NOTE
+Maintainer: 'Yuchen Wang <[email protected]>'
 
-Fixed 2 notes in previous submission.
-
-```
-Possibly mis-spelled words in DESCRIPTION:
-   dplyr (3:45, 7:18, 8:35, 8:63, 10:61)
-   extdplyr (7:62, 8:44)
-   tidyr (3:55, 9:6)
+Days since last update: 4
 ```
 
-Fixed description file for title case and single quotes. 
-
-```
-Non-standard file/directory found at top level:
-  'examples'
-```
+I'm sorry for the constant update. A problem was found that could cause `ind_to_char_` to fail when data are grouped as a `grouped_df` object from `dplyr`. This has been fixed in the latest version, and several tests are added to make sure the functions work with various data structures, including `tbl_df`, `grouped_df` and `data.table`.
 
-This is fixed by adding the folder to `.Rbuildignore`.
diff --git a/examples/ind_to_char_ex.R b/examples/ind_to_char_ex.R
@@ -1,3 +1,21 @@
+# Supports converting the following atomic types to indicator
+
+df <- data.frame(integer_ind = c(2L, 0L, 0L, 0L, 0L, 0L),
+                 # non-zero integer is 1, otherwise 0.
+                 logcal_ind = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE),
+                 # TRUE is 1.
+                 double_ind = c(0, 0, 2.0, 0, 0, 0),
+                 # non-zero double is 1.
+                 char_ind = c("FALSE", "FALSE", "F", "TRUE", "T", "FALSE"),
+                 # "T" and "TRUE" converts to 1.
+                 factor_ind = factor(c(1, 1, 1, 1, 1, 0), levels = c(0, 1),
+                                     labels = c(TRUE, FALSE)),
+                 # Factors are converted based on levels.
+                 stringsAsFactors = FALSE)
+
+ind_to_char_(df, col = "new_y", from = names(df), remove = FALSE)
+
+
 # ind_to_char as complement to use model.matrix on a factor
 df <- data.frame(x = 1:6, y = factor(c(letters[1:5], NA)))
 ind_df <- as.data.frame(model.matrix(~ x + y - 1,

diff --git a/man/ind_to_char.Rd b/man/ind_to_char.Rd
diff --git a/tests/testthat.R b/tests/testthat.R
@@ -0,0 +1,4 @@
+library(testthat)
+library(extdplyr)
+
+test_check("extdplyr")
diff --git a/tests/testthat/test_grp_routine.R b/tests/testthat/test_grp_routine.R
@@ -0,0 +1,115 @@
+library(extdplyr)
+context("ind_to_char_ and grp_routine_")
+
+test_that("ind_to_char_ works with regular data.frame", {
+  df <- data.frame(x = 1:5, y = factor(c(letters[1:5])))
+  ind_df <- as.data.frame(model.matrix(~ x + y - 1, df))
+
+  # Using SE
+  df_ret <- ind_to_char_(ind_df, col = "new_y",
+                         from = c("ya", "yb", "yc", "yd", "ye"))
+
+  expect_equal(ncol(df_ret), 2)
+  expect_equal(df_ret[['new_y']], c("ya", "yb", "yc", "yd", "ye"))
+
+  df_ret2 <- ind_to_char_(ind_df, col = "new_y",
+                          from = c("ya", "yb", "yc", "yd", "ye"),
+                          remove = FALSE)
+  expect_equal(ncol(df_ret2), 7)
+  expect_equal(df_ret2[["new_y"]], c("ya", "yb", "yc", "yd", "ye"))
+})
+
+
+test_that("ind_to_char_ works with non-integer indicators", {
+  df <- data.frame(integer_ind = c(1L, 0L, 0L, 0L, 0L, 0L),
+                   logcal_ind = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE),
+                   double_ind = c(0, 0, 2.0, 0, 0, 0),
+                   char_ind = c("FALSE", "FALSE", "F", "TRUE", "T", "FALSE"),
+                   factor_ind = factor(c(1, 1, 1, 1, 1, 0), levels = c(0, 1),
+                                       labels = c(TRUE, FALSE)),
+                   stringsAsFactors = FALSE)
+
+  # Using SE
+  df_ret <- ind_to_char_(df, col = "new_y", from = names(df), remove = FALSE)
+
+  expect_equal(ncol(df_ret), 6)
+  expect_equal(which(names(df_ret) == "new_y"), 1)
+  expect_equal(df_ret[['new_y']],
+               c("integer_ind", "logcal_ind", "double_ind", "char_ind",
+                 "char_ind", "factor_ind"))
+
+})
+
+
+
+library(dplyr)
+test_that("ind_to_char_ works with tbl_df, tbl, data.frame", {
+  df <- data.frame(x = 1:5, y = factor(c(letters[1:5])))
+  ind_df <- as_data_frame(model.matrix(~ x + y - 1, df))
+
+  # Using SE
+  df_ret <- ind_to_char_(ind_df, col = "new_y",
+                         from = c("ya", "yb", "yc", "yd", "ye"))
+
+  expect_equal(class(ind_df), class(df_ret))
+  expect_equal(ncol(df_ret), 2)
+  expect_equal(df_ret[['new_y']], c("ya", "yb", "yc", "yd", "ye"))
+
+  df_ret2 <- ind_to_char_(ind_df, col = "new_y",
+                          from = c("ya", "yb", "yc", "yd", "ye"),
+                          remove = FALSE)
+
+  expect_equal(class(ind_df), class(df_ret2))
+  expect_equal(ncol(df_ret2), 7)
+  expect_equal(df_ret2[["new_y"]], c("ya", "yb", "yc", "yd", "ye"))
+})
+
+
+test_that("ind_to_char_ works with grouped_df, tbl_df, tbl, data.frame", {
+  df <- data.frame(x = 1:5, y = factor(c(letters[1:5])))
+  ind_df <- as_data_frame(model.matrix(~ x + y - 1, df)) %>%
+    group_by(z = x > 3)
+
+  # Using SE
+  df_ret <- ind_to_char_(ind_df, col = "new_y",
+                         from = c("ya", "yb", "yc", "yd", "ye"))
+
+  expect_equal(class(ind_df), class(df_ret))
+  expect_equal(ncol(df_ret), 3)
+  expect_equal(df_ret[['new_y']], c("ya", "yb", "yc", "yd", "ye"))
+
+  df_ret2 <- ind_to_char_(ind_df, col = "new_y",
+                          from = c("ya", "yb", "yc", "yd", "ye"),
+                          remove = FALSE)
+
+  expect_equal(class(ind_df), class(df_ret2))
+  expect_equal(ncol(df_ret2), 8)
+  expect_equal(df_ret2[["new_y"]], c("ya", "yb", "yc", "yd", "ye"))
+})
+
+
+if (requireNamespace("data.table", quietly = TRUE)) {
+  test_that("ind_to_char_ works with tbl_df, tbl, data.frame", {
+    df <- data.frame(x = 1:5, y = factor(c(letters[1:5])))
+    ind_df <- data.table::data.table(model.matrix(~ x + y - 1, df))
+
+    # Using SE
+    df_ret <- ind_to_char_(ind_df, col = "new_y",
+                           from = c("ya", "yb", "yc", "yd", "ye"))
+
+    expect_equal(class(ind_df), class(df_ret))
+    expect_equal(ncol(df_ret), 2)
+    expect_equal(df_ret[['new_y']], c("ya", "yb", "yc", "yd", "ye"))
+
+    df_ret2 <- ind_to_char_(ind_df, col = "new_y",
+                            from = c("ya", "yb", "yc", "yd", "ye"),
+                            remove = FALSE)
+
+    expect_equal(class(ind_df), class(df_ret2))
+    expect_equal(ncol(df_ret2), 7)
+    expect_equal(df_ret2[["new_y"]], c("ya", "yb", "yc", "yd", "ye"))
+  })
+}
+
+
+