From 0711e4004d48db749fa39dd407c87bb06726b1c0 Mon Sep 17 00:00:00 2001 From: Nitish Jha <151559388+Nj221102@users.noreply.github.com> Date: Thu, 11 Apr 2024 22:02:00 +0530 Subject: [PATCH] Added skip_absent arguement to colnamesInt() (#6068) * Added skip_absent arguement to colnamesInt() * Update NEWS.md * Update NEWS.md * Update utils.c * Update utils.c * Update utils.c * Update utils.c * Update utils.c * added test * Update src/nafill.c Co-authored-by: Michael Chirico * Update src/utils.c Co-authored-by: Michael Chirico * Update src/utils.c Co-authored-by: Michael Chirico * Update src/utils.c Co-authored-by: Michael Chirico * Implemented suggestions * small fix * Update utils.c * minor issues * restore comment for now * Update nafill.Rraw * adjusted any colno. > ncol to 0L * Added test and changed refrence to deep copy * annotate test purpose * More careful about when duplicate() is needed * refine comment * whitespace * Add a new test against duplicates for numeric input * update last test number --------- Co-authored-by: nitish jha Co-authored-by: Michael Chirico --- R/wrappers.R | 2 +- inst/tests/nafill.Rraw | 16 +++++++++++++++- src/data.table.h | 2 +- src/nafill.c | 2 +- src/utils.c | 27 +++++++++++++++++++-------- 5 files changed, 37 insertions(+), 12 deletions(-) diff --git a/R/wrappers.R b/R/wrappers.R index dcf8ba08e..a018b91ae 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -8,7 +8,7 @@ setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE) fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na) fcase = function(..., default=NA) .Call(CfcaseR, default, parent.frame(), as.list(substitute(list(...)))[-1L]) -colnamesInt = function(x, cols, check_dups=FALSE) .Call(CcolnamesInt, x, cols, check_dups) +colnamesInt = function(x, cols, check_dups=FALSE, skip_absent=FALSE) .Call(CcolnamesInt, x, cols, check_dups, skip_absent) testMsg = function(status=0L, nx=2L, nk=2L) .Call(CtestMsgR, as.integer(status)[1L], as.integer(nx)[1L], as.integer(nk)[1L]) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index b72c0b506..cf65f61bf 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -149,8 +149,22 @@ test(4.20, colnamesInt(dt, integer()), integer()) test(4.21, colnamesInt(dt, NULL), seq_along(dt)) test(4.22, colnamesInt("asd", 1), error="must be data.table compatible") test(4.23, colnamesInt(dt, 1, check_dups="a"), error="check_dups") +test(4.24, colnamesInt(dt, c("a", "e"), skip_absent=TRUE), c(1L,0L)) +test(4.25, colnamesInt(dt, c(1L, 4L), skip_absent=TRUE), c(1L,0L)) +test(4.26, colnamesInt(dt, c(1, 4), skip_absent=TRUE), c(1L,0L)) +test(4.27, colnamesInt(dt, c("a", NA), skip_absent=TRUE), c(1L,0L)) +test(4.28, colnamesInt(dt, c(1L, 0L), skip_absent=TRUE), error="received non-existing column*.*0") +test(4.29, colnamesInt(dt, c(1, -5), skip_absent=TRUE), error="received non-existing column*.*-5") +test(4.30, colnamesInt(dt, c(1, 4), skip_absent=NULL), error="skip_absent must be TRUE or FALSE") +test(4.31, colnamesInt(dt, c(1L, 1000L), skip_absent=TRUE), c(1L,0L)) +cols=c(1L,100L) +test(4.32, colnamesInt(dt, cols, skip_absent=TRUE), c(1L, 0L)) +test(4.33, cols, c(1L, 100L)) # ensure input was not overwritten with output 0 +cols=c(1,100) +test(4.34, colnamesInt(dt, cols, skip_absent=TRUE), c(1L, 0L)) +test(4.35, cols, c(1, 100)) # ensure input was not overwritten with output 0 names(dt) <- NULL -test(4.24, colnamesInt(dt, "a"), error="has no names") +test(4.36, colnamesInt(dt, "a"), error="has no names") # verbose dt = data.table(a=c(1L, 2L, NA_integer_), b=c(1, 2, NA_real_)) diff --git a/src/data.table.h b/src/data.table.h index 21b7e30e0..297167d46 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -238,7 +238,7 @@ bool isRealReallyInt(SEXP x); SEXP isRealReallyIntR(SEXP x); SEXP isReallyReal(SEXP x); bool allNA(SEXP x, bool errorForBadType); -SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups); +SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups, SEXP skip_absent); bool INHERITS(SEXP x, SEXP char_); SEXP copyAsPlain(SEXP x); void copySharedColumns(SEXP x); diff --git a/src/nafill.c b/src/nafill.c index 5fe81933d..8d50f32ea 100644 --- a/src/nafill.c +++ b/src/nafill.c @@ -114,7 +114,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S obj = PROTECT(allocVector(VECSXP, 1)); protecti++; // wrap into list SET_VECTOR_ELT(obj, 0, obj1); } - SEXP ricols = PROTECT(colnamesInt(obj, cols, ScalarLogical(TRUE))); protecti++; // nafill cols=NULL which turns into seq_along(obj) + SEXP ricols = PROTECT(colnamesInt(obj, cols, /* check_dups= */ ScalarLogical(TRUE), /* skip_absent= */ ScalarLogical(FALSE))); protecti++; // nafill cols=NULL which turns into seq_along(obj) x = PROTECT(allocVector(VECSXP, length(ricols))); protecti++; int *icols = INTEGER(ricols); for (int i=0; inx) || (icols[i]<1)) + for (int i=0; inx) || (icols[i]<1)) error(_("argument specifying columns received non-existing column(s): cols[%d]=%d"), i+1, icols[i]); // handles NAs also + else if(bskip_absent && icols[i]>nx) + icols[i] = 0L; } } else if (isString(cols)) { SEXP xnames = PROTECT(getAttrib(x, R_NamesSymbol)); protecti++; @@ -133,9 +142,11 @@ SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups) { error(_("'x' argument data.table has no names")); ricols = PROTECT(chmatch(cols, xnames, 0)); protecti++; int *icols = INTEGER(ricols); - for (int i=0; i