Skip to content

Commit

Permalink
Merge branch 'Release-0.1.4'
Browse files Browse the repository at this point in the history
  • Loading branch information
wangyuchen committed Feb 28, 2017
2 parents 8ca4e38 + 33c4fe5 commit cd70a2c
Show file tree
Hide file tree
Showing 11 changed files with 203 additions and 34 deletions.
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: extdplyr
Type: Package
Title: Data Manipulation Extension Based on 'Dplyr' and 'Tidyr'
Version: 0.1.3
Title: Data Manipulation Extensions of 'Dplyr' and 'Tidyr'
Version: 0.1.4
Authors@R: person("Yuchen", "Wang", email = "[email protected]",
role = c("aut", "cre"))
Description: If 'dplyr' is a grammar for data manipulation, 'extdplyr' is like
Expand All @@ -17,3 +17,5 @@ Imports:
tidyr,
lazyeval
RoxygenNote: 6.0.1
Suggests: testthat,
data.table
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ export(pct_routine)
export(pct_routine_)
export(tally_pct)
export(tally_pct_)
importFrom(dplyr,"%>%")
importFrom(lazyeval,interp)
importFrom(tidyr,"%>%")
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# extdplyr 0.1.4

* Added tests for `ind_to_char_` to check compatibility for `tbl_df` and `data.table`.
* Adapted `append_col` and `append_df` from `tidyr`.


# extdplyr 0.1.3

* Added a `NEWS.md` file to track changes to the package.
Expand Down
27 changes: 15 additions & 12 deletions R/grp_routine.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,14 @@ grp_routine <- function(data, col, ..., ret_factor = FALSE) {

#' @describeIn grp_routine SE version of grp_routine.
#' @export
grp_routine_ <- function(data, col, ..., .dots,
ret_factor = FALSE) {
grp_routine_ <- function(data, col, ..., .dots, ret_factor = FALSE) {
conds <- lazyeval::all_dots(.dots, ..., all_named = TRUE)

ret <- data %>%
dplyr::transmute_(.dots = conds) %>%
ind_to_char_(col, names(conds), ret_factor = ret_factor)

dplyr::bind_cols(data, ret)
data %>%
dplyr::mutate_(.dots = conds) %>%
ind_to_char_(col, names(conds), ret_factor = ret_factor,
remove = TRUE, mutually_exclusive = TRUE,
collectively_exhaustive = TRUE)
}


Expand Down Expand Up @@ -72,9 +71,8 @@ ind_to_char_ <- function(data, col, from, ret_factor = FALSE, remove = TRUE,
# According to coercion rule, logical - integer - double - character,
# Here convert to logical first for safety.


int_df <- dplyr::mutate_all(data[from],
dplyr::funs_(quote(as.integer(as.logical(.)))))
int_df <- data[from]
int_df[] <- lapply(int_df, function(x) as.integer(as.logical(x)))

rs <- rowSums(int_df)

Expand All @@ -97,9 +95,14 @@ ind_to_char_ <- function(data, col, from, ret_factor = FALSE, remove = TRUE,

if (ret_factor) char_vec <- as.factor(char_vec)

ret <- dplyr::mutate_(data, .dots = named_expr(col, ~ char_vec))
first_col <- which(names(data) %in% from)[1]
ret <- append_col(data, char_vec, col, first_col - 1)

# Give back groups
if (dplyr::is.grouped_df(data))
ret <- dplyr::group_by_(ret, .dots = dplyr::groups(data))

if (remove) ret <- dplyr::select(ret, -dplyr::one_of(from))
if (remove) ret <- ret[setdiff(names(ret), from)]

ret
}
Expand Down
2 changes: 1 addition & 1 deletion R/imports.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' @importFrom dplyr %>%
#' @importFrom tidyr %>%
#' @export
dplyr::`%>%`

Expand Down
16 changes: 16 additions & 0 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,19 @@ check_se_column <- function(col_name) {
common_params <- function(data, col, .dots) {
NULL
}


# Adapted from tidyr
append_df <- function (x, values, after = length(x)) {
y <- append(x, values, after = after)
class(y) <- class(x)
attr(y, "row.names") <- attr(x, "row.names")
y
}

append_col <- function (x, col, name, after = length(x)) {
name <- enc2utf8(name)
append_df(x, named_expr(name, col), after = after)
}


23 changes: 5 additions & 18 deletions cran-comments.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,14 @@
## R CMD check results
There were no ERRORs or WARNINGs.

win-builder check has 1 note:
There was 1 NOTES on win-builder:

```
New submission
```

* checking CRAN incoming feasibility ... NOTE
Maintainer: 'Yuchen Wang <[email protected]>'
Fixed 2 notes in previous submission.

```
Possibly mis-spelled words in DESCRIPTION:
dplyr (3:45, 7:18, 8:35, 8:63, 10:61)
extdplyr (7:62, 8:44)
tidyr (3:55, 9:6)
Days since last update: 4
```

Fixed description file for title case and single quotes.

```
Non-standard file/directory found at top level:
'examples'
```
I'm sorry for the constant update. A problem was found that could cause `ind_to_char_` to fail when data are grouped as a `grouped_df` object from `dplyr`. This has been fixed in the latest version, and several tests are added to make sure the functions work with various data structures, including `tbl_df`, `grouped_df` and `data.table`.

This is fixed by adding the folder to `.Rbuildignore`.
18 changes: 18 additions & 0 deletions examples/ind_to_char_ex.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
# Supports converting the following atomic types to indicator

df <- data.frame(integer_ind = c(2L, 0L, 0L, 0L, 0L, 0L),
# non-zero integer is 1, otherwise 0.
logcal_ind = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE),
# TRUE is 1.
double_ind = c(0, 0, 2.0, 0, 0, 0),
# non-zero double is 1.
char_ind = c("FALSE", "FALSE", "F", "TRUE", "T", "FALSE"),
# "T" and "TRUE" converts to 1.
factor_ind = factor(c(1, 1, 1, 1, 1, 0), levels = c(0, 1),
labels = c(TRUE, FALSE)),
# Factors are converted based on levels.
stringsAsFactors = FALSE)

ind_to_char_(df, col = "new_y", from = names(df), remove = FALSE)


# ind_to_char as complement to use model.matrix on a factor
df <- data.frame(x = 1:6, y = factor(c(letters[1:5], NA)))
ind_df <- as.data.frame(model.matrix(~ x + y - 1,
Expand Down
18 changes: 18 additions & 0 deletions man/ind_to_char.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions tests/testthat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
library(testthat)
library(extdplyr)

test_check("extdplyr")
115 changes: 115 additions & 0 deletions tests/testthat/test_grp_routine.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
library(extdplyr)
context("ind_to_char_ and grp_routine_")

test_that("ind_to_char_ works with regular data.frame", {
df <- data.frame(x = 1:5, y = factor(c(letters[1:5])))
ind_df <- as.data.frame(model.matrix(~ x + y - 1, df))

# Using SE
df_ret <- ind_to_char_(ind_df, col = "new_y",
from = c("ya", "yb", "yc", "yd", "ye"))

expect_equal(ncol(df_ret), 2)
expect_equal(df_ret[['new_y']], c("ya", "yb", "yc", "yd", "ye"))

df_ret2 <- ind_to_char_(ind_df, col = "new_y",
from = c("ya", "yb", "yc", "yd", "ye"),
remove = FALSE)
expect_equal(ncol(df_ret2), 7)
expect_equal(df_ret2[["new_y"]], c("ya", "yb", "yc", "yd", "ye"))
})


test_that("ind_to_char_ works with non-integer indicators", {
df <- data.frame(integer_ind = c(1L, 0L, 0L, 0L, 0L, 0L),
logcal_ind = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE),
double_ind = c(0, 0, 2.0, 0, 0, 0),
char_ind = c("FALSE", "FALSE", "F", "TRUE", "T", "FALSE"),
factor_ind = factor(c(1, 1, 1, 1, 1, 0), levels = c(0, 1),
labels = c(TRUE, FALSE)),
stringsAsFactors = FALSE)

# Using SE
df_ret <- ind_to_char_(df, col = "new_y", from = names(df), remove = FALSE)

expect_equal(ncol(df_ret), 6)
expect_equal(which(names(df_ret) == "new_y"), 1)
expect_equal(df_ret[['new_y']],
c("integer_ind", "logcal_ind", "double_ind", "char_ind",
"char_ind", "factor_ind"))

})



library(dplyr)
test_that("ind_to_char_ works with tbl_df, tbl, data.frame", {
df <- data.frame(x = 1:5, y = factor(c(letters[1:5])))
ind_df <- as_data_frame(model.matrix(~ x + y - 1, df))

# Using SE
df_ret <- ind_to_char_(ind_df, col = "new_y",
from = c("ya", "yb", "yc", "yd", "ye"))

expect_equal(class(ind_df), class(df_ret))
expect_equal(ncol(df_ret), 2)
expect_equal(df_ret[['new_y']], c("ya", "yb", "yc", "yd", "ye"))

df_ret2 <- ind_to_char_(ind_df, col = "new_y",
from = c("ya", "yb", "yc", "yd", "ye"),
remove = FALSE)

expect_equal(class(ind_df), class(df_ret2))
expect_equal(ncol(df_ret2), 7)
expect_equal(df_ret2[["new_y"]], c("ya", "yb", "yc", "yd", "ye"))
})


test_that("ind_to_char_ works with grouped_df, tbl_df, tbl, data.frame", {
df <- data.frame(x = 1:5, y = factor(c(letters[1:5])))
ind_df <- as_data_frame(model.matrix(~ x + y - 1, df)) %>%
group_by(z = x > 3)

# Using SE
df_ret <- ind_to_char_(ind_df, col = "new_y",
from = c("ya", "yb", "yc", "yd", "ye"))

expect_equal(class(ind_df), class(df_ret))
expect_equal(ncol(df_ret), 3)
expect_equal(df_ret[['new_y']], c("ya", "yb", "yc", "yd", "ye"))

df_ret2 <- ind_to_char_(ind_df, col = "new_y",
from = c("ya", "yb", "yc", "yd", "ye"),
remove = FALSE)

expect_equal(class(ind_df), class(df_ret2))
expect_equal(ncol(df_ret2), 8)
expect_equal(df_ret2[["new_y"]], c("ya", "yb", "yc", "yd", "ye"))
})


if (requireNamespace("data.table", quietly = TRUE)) {
test_that("ind_to_char_ works with tbl_df, tbl, data.frame", {
df <- data.frame(x = 1:5, y = factor(c(letters[1:5])))
ind_df <- data.table::data.table(model.matrix(~ x + y - 1, df))

# Using SE
df_ret <- ind_to_char_(ind_df, col = "new_y",
from = c("ya", "yb", "yc", "yd", "ye"))

expect_equal(class(ind_df), class(df_ret))
expect_equal(ncol(df_ret), 2)
expect_equal(df_ret[['new_y']], c("ya", "yb", "yc", "yd", "ye"))

df_ret2 <- ind_to_char_(ind_df, col = "new_y",
from = c("ya", "yb", "yc", "yd", "ye"),
remove = FALSE)

expect_equal(class(ind_df), class(df_ret2))
expect_equal(ncol(df_ret2), 7)
expect_equal(df_ret2[["new_y"]], c("ya", "yb", "yc", "yd", "ye"))
})
}



0 comments on commit cd70a2c

Please sign in to comment.