Skip to content

Commit

Permalink
Added skip_absent arguement to colnamesInt() (#6068)
Browse files Browse the repository at this point in the history
* Added skip_absent arguement to colnamesInt()

* Update NEWS.md

* Update NEWS.md

* Update utils.c

* Update utils.c

* Update utils.c

* Update utils.c

* Update utils.c

* added test

* Update src/nafill.c

Co-authored-by: Michael Chirico <[email protected]>

* Update src/utils.c

Co-authored-by: Michael Chirico <[email protected]>

* Update src/utils.c

Co-authored-by: Michael Chirico <[email protected]>

* Update src/utils.c

Co-authored-by: Michael Chirico <[email protected]>

* Implemented suggestions

* small fix

* Update utils.c

* minor issues

* restore comment for now

* Update nafill.Rraw

* adjusted any colno. > ncol to 0L

* Added test and changed refrence to deep copy

* annotate test purpose

* More careful about when duplicate() is needed

* refine comment

* whitespace

* Add a new test against duplicates for numeric input

* update last test number

---------

Co-authored-by: nitish jha <[email protected]>
Co-authored-by: Michael Chirico <[email protected]>
  • Loading branch information
3 people authored Apr 11, 2024
1 parent 3171397 commit 0711e40
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 12 deletions.
2 changes: 1 addition & 1 deletion R/wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE)
fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na)
fcase = function(..., default=NA) .Call(CfcaseR, default, parent.frame(), as.list(substitute(list(...)))[-1L])

colnamesInt = function(x, cols, check_dups=FALSE) .Call(CcolnamesInt, x, cols, check_dups)
colnamesInt = function(x, cols, check_dups=FALSE, skip_absent=FALSE) .Call(CcolnamesInt, x, cols, check_dups, skip_absent)

testMsg = function(status=0L, nx=2L, nk=2L) .Call(CtestMsgR, as.integer(status)[1L], as.integer(nx)[1L], as.integer(nk)[1L])

Expand Down
16 changes: 15 additions & 1 deletion inst/tests/nafill.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,22 @@ test(4.20, colnamesInt(dt, integer()), integer())
test(4.21, colnamesInt(dt, NULL), seq_along(dt))
test(4.22, colnamesInt("asd", 1), error="must be data.table compatible")
test(4.23, colnamesInt(dt, 1, check_dups="a"), error="check_dups")
test(4.24, colnamesInt(dt, c("a", "e"), skip_absent=TRUE), c(1L,0L))
test(4.25, colnamesInt(dt, c(1L, 4L), skip_absent=TRUE), c(1L,0L))
test(4.26, colnamesInt(dt, c(1, 4), skip_absent=TRUE), c(1L,0L))
test(4.27, colnamesInt(dt, c("a", NA), skip_absent=TRUE), c(1L,0L))
test(4.28, colnamesInt(dt, c(1L, 0L), skip_absent=TRUE), error="received non-existing column*.*0")
test(4.29, colnamesInt(dt, c(1, -5), skip_absent=TRUE), error="received non-existing column*.*-5")
test(4.30, colnamesInt(dt, c(1, 4), skip_absent=NULL), error="skip_absent must be TRUE or FALSE")
test(4.31, colnamesInt(dt, c(1L, 1000L), skip_absent=TRUE), c(1L,0L))
cols=c(1L,100L)
test(4.32, colnamesInt(dt, cols, skip_absent=TRUE), c(1L, 0L))
test(4.33, cols, c(1L, 100L)) # ensure input was not overwritten with output 0
cols=c(1,100)
test(4.34, colnamesInt(dt, cols, skip_absent=TRUE), c(1L, 0L))
test(4.35, cols, c(1, 100)) # ensure input was not overwritten with output 0
names(dt) <- NULL
test(4.24, colnamesInt(dt, "a"), error="has no names")
test(4.36, colnamesInt(dt, "a"), error="has no names")

# verbose
dt = data.table(a=c(1L, 2L, NA_integer_), b=c(1, 2, NA_real_))
Expand Down
2 changes: 1 addition & 1 deletion src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ bool isRealReallyInt(SEXP x);
SEXP isRealReallyIntR(SEXP x);
SEXP isReallyReal(SEXP x);
bool allNA(SEXP x, bool errorForBadType);
SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups);
SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups, SEXP skip_absent);
bool INHERITS(SEXP x, SEXP char_);
SEXP copyAsPlain(SEXP x);
void copySharedColumns(SEXP x);
Expand Down
2 changes: 1 addition & 1 deletion src/nafill.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S
obj = PROTECT(allocVector(VECSXP, 1)); protecti++; // wrap into list
SET_VECTOR_ELT(obj, 0, obj1);
}
SEXP ricols = PROTECT(colnamesInt(obj, cols, ScalarLogical(TRUE))); protecti++; // nafill cols=NULL which turns into seq_along(obj)
SEXP ricols = PROTECT(colnamesInt(obj, cols, /* check_dups= */ ScalarLogical(TRUE), /* skip_absent= */ ScalarLogical(FALSE))); protecti++; // nafill cols=NULL which turns into seq_along(obj)
x = PROTECT(allocVector(VECSXP, length(ricols))); protecti++;
int *icols = INTEGER(ricols);
for (int i=0; i<length(ricols); i++) {
Expand Down
27 changes: 19 additions & 8 deletions src/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,20 @@ SEXP allNAR(SEXP x) {
* adds validation for:
* correct range [1,ncol], and if type real checks whole integer
* existing columns for character
* optionally check for no duplicates
* optionally (check_dups) check for no duplicates
* optionally (skip_absent) skip (return 0) for numbers outside the range or not naming extant columns
*/
SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups) {
SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups, SEXP skip_absent) {
if (!isNewList(x))
error(_("'x' argument must be data.table compatible"));
if (!IS_TRUE_OR_FALSE(check_dups))
error(_("%s must be TRUE or FALSE"), "check_dups");
if (!IS_TRUE_OR_FALSE(skip_absent))
error(_("%s must be TRUE or FALSE"), "skip_absent");
int protecti = 0;
R_len_t nx = length(x);
R_len_t nc = length(cols);
bool bskip_absent = LOGICAL(skip_absent)[0];
SEXP ricols = R_NilValue;
if (isNull(cols)) { // seq_along(x)
ricols = PROTECT(allocVector(INTSXP, nx)); protecti++;
Expand All @@ -116,26 +120,33 @@ SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups) {
ricols = PROTECT(allocVector(INTSXP, 0)); protecti++;
} else if (isInteger(cols) || isReal(cols)) {
if (isInteger(cols)) {
ricols = cols;
if (bskip_absent) { // we might overwrite values with 0, so make a copy
ricols = PROTECT(duplicate(cols)); protecti++;
} else
ricols = cols;
} else if (isReal(cols)) {
if (!isRealReallyInt(cols))
error(_("argument specifying columns is type 'double' and one or more items in it are not whole integers"));
ricols = PROTECT(coerceVector(cols, INTSXP)); protecti++;
}
int *icols = INTEGER(ricols);
for (int i=0; i<nc; i++) {
if ((icols[i]>nx) || (icols[i]<1))
for (int i=0; i<nc; ++i) {
if ((!bskip_absent && icols[i]>nx) || (icols[i]<1))
error(_("argument specifying columns received non-existing column(s): cols[%d]=%d"), i+1, icols[i]); // handles NAs also
else if(bskip_absent && icols[i]>nx)
icols[i] = 0L;
}
} else if (isString(cols)) {
SEXP xnames = PROTECT(getAttrib(x, R_NamesSymbol)); protecti++;
if (isNull(xnames))
error(_("'x' argument data.table has no names"));
ricols = PROTECT(chmatch(cols, xnames, 0)); protecti++;
int *icols = INTEGER(ricols);
for (int i=0; i<nc; i++) {
if (icols[i]==0)
error(_("argument specifying columns received non-existing column(s): cols[%d]='%s'"), i+1, CHAR(STRING_ELT(cols, i))); // handles NAs also
if (!bskip_absent) {
for (int i=0; i<nc; ++i) {
if (icols[i]==0)
error(_("argument specifying columns received non-existing column(s): cols[%d]='%s'"), i+1, CHAR(STRING_ELT(cols, i))); // handles NAs also
}
}
} else {
error(_("argument specifying columns must be character or numeric"));
Expand Down

0 comments on commit 0711e40

Please sign in to comment.