From a7882332c764a4df7add2abeaee0529e84396628 Mon Sep 17 00:00:00 2001 From: nitish jha Date: Mon, 18 Mar 2024 00:52:43 +0530 Subject: [PATCH 01/16] Added the source of the error in every error message --- R/data.table.R | 16 ++++++++-------- R/duplicated.R | 2 +- R/frank.R | 2 +- R/setkey.R | 2 +- R/setops.R | 4 ++-- R/wrappers.R | 2 +- inst/tests/tests.Rraw | 21 +++++++++++++++++++++ src/data.table.h | 2 +- src/nafill.c | 2 +- src/utils.c | 18 +++++++++--------- 10 files changed, 46 insertions(+), 25 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index c80e89f88..881a07aff 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -481,8 +481,8 @@ replace_dot_alias = function(e) { allow.cartesian = TRUE } # TODO: collect all '==' ops first to speeden up Cnestedid - rightcols = colnamesInt(x, names(on), check_dups=FALSE) - leftcols = colnamesInt(i, unname(on), check_dups=FALSE) + rightcols = colnamesInt(x, names(on), check_dups=FALSE, source = "data.table.R(ln:484)" ) + leftcols = colnamesInt(i, unname(on), check_dups=FALSE, source = "data.table.R(ln:484)") } else { ## missing on rightcols = chmatch(key(x), names_x) # NAs here (i.e. invalid data.table) checked in bmerge() @@ -2352,7 +2352,7 @@ na.omit.data.table = function (object, cols = seq_along(object), invert = FALSE, if (!cedta()) return(NextMethod()) # nocov if ( !missing(invert) && is.na(as.logical(invert)) ) stopf("Argument 'invert' must be logical TRUE/FALSE") - cols = colnamesInt(object, cols, check_dups=FALSE) + cols = colnamesInt(object, cols, check_dups=FALSE, "data.table.R(ln:2355)") ix = .Call(Cdt_na, object, cols) # forgot about invert with no NA case, #2660 if (invert) { @@ -2498,7 +2498,7 @@ copy = function(x) { .shallow = function(x, cols = NULL, retain.key = FALSE, unlock = FALSE) { wasnull = is.null(cols) - cols = colnamesInt(x, cols, check_dups=FALSE) + cols = colnamesInt(x, cols, check_dups=FALSE, "data.table.R(ln:2501)") ans = .Call(Cshallowwrapper, x, cols) # copies VECSXP only if(retain.key){ @@ -2685,11 +2685,11 @@ setcolorder = function(x, neworder=key(x), before=NULL, after=NULL) # before/af stopf("Provide either before= or after= but not both") if (length(before)>1L || length(after)>1L) stopf("before=/after= accept a single column name or number, not more than one") - neworder = colnamesInt(x, neworder, check_dups=FALSE) # dups are now checked inside Csetcolorder below + neworder = colnamesInt(x, neworder, check_dups=FALSE, "data.table.R(ln:2688)") # dups are now checked inside Csetcolorder below if (length(before)) - neworder = c(setdiff(seq_len(colnamesInt(x, before) - 1L), neworder), neworder) + neworder = c(setdiff(seq_len(colnamesInt(x, before, source = "data.table.R(ln:2690)") - 1L), neworder), neworder) if (length(after)) - neworder = c(setdiff(seq_len(colnamesInt(x, after)), neworder), neworder) + neworder = c(setdiff(seq_len(colnamesInt(x, after, source = "data.table.R(ln:2692)")), neworder), neworder) if (length(neworder) != length(x)) { # pad by the missing elements (checks inside Csetcolorder catch other mistakes) neworder = c(neworder, setdiff(seq_along(x), neworder)) @@ -2970,7 +2970,7 @@ rleidv = function(x, cols=seq_along(x), prefix=NULL) { } else if (!length(cols)) { stopf("x is a list, 'cols' cannot be 0-length.") } - cols = colnamesInt(x, cols, check_dups=FALSE) + cols = colnamesInt(x, cols, check_dups=FALSE, "data.table.R(ln:2973)") ids = .Call(Crleid, x, cols) if (!is.null(prefix)) ids = paste0(prefix, ids) ids diff --git a/R/duplicated.R b/R/duplicated.R index 27b903812..f26e78a27 100644 --- a/R/duplicated.R +++ b/R/duplicated.R @@ -74,7 +74,7 @@ unique.data.table = function(x, incomparables=FALSE, fromLast=FALSE, by=seq_alon ## unique.data.table has been refactored to simply call duplicated.data.table ## making the refactor unnecessary, but let's leave it here just in case .duplicated.helper = function(x, by) { - cols = colnamesInt(x, by, check_dups=FALSE) + cols = colnamesInt(x, by, check_dups=FALSE, "duplicated.R(ln:77)") use.keyprefix = if (is.null(by)) FALSE else { haskey(x) && length(by) <= length(key(x)) && diff --git a/R/frank.R b/R/frank.R index 419f5ea41..d43f7e3cf 100644 --- a/R/frank.R +++ b/R/frank.R @@ -18,7 +18,7 @@ frankv = function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c("a cols = 1L x = as_list(x) } else { - cols = colnamesInt(x, cols, check_dups=TRUE) + cols = colnamesInt(x, cols, check_dups=TRUE, "frank.R(ln:21)") if (!length(cols)) stopf("x is a list, 'cols' can not be 0-length") } diff --git a/R/setkey.R b/R/setkey.R index 84488a803..468ea8d75 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -168,7 +168,7 @@ forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.las by = NULL } else { if (!length(x)) return(integer(0L)) # e.g. forderv(data.table(NULL)) and forderv(list()) return integer(0L)) - by = colnamesInt(x, by, check_dups=FALSE) + by = colnamesInt(x, by, check_dups=FALSE, "setKey.R(ln:171)") if (length(order) == 1L) order = rep(order, length(by)) } order = as.integer(order) # length and contents of order being +1/-1 is checked at C level diff --git a/R/setops.R b/R/setops.R index 1034b0f0f..8dde9e72a 100644 --- a/R/setops.R +++ b/R/setops.R @@ -3,9 +3,9 @@ setdiff_ = function(x, y, by.x=seq_along(x), by.y=seq_along(y), use.names=FALSE) if (!is.data.table(x) || !is.data.table(y)) stopf("x and y must both be data.tables") # !ncol redundant since all 0-column data.tables have 0 rows if (!nrow(x)) return(x) - by.x = colnamesInt(x, by.x, check_dups=TRUE) + by.x = colnamesInt(x, by.x, check_dups=TRUE, "setops.R(ln:6)") if (!nrow(y)) return(unique(x, by=by.x)) - by.y = colnamesInt(y, by.y, check_dups=TRUE) + by.y = colnamesInt(y, by.y, check_dups=TRUE, "setops.R(ln:8)") if (length(by.x) != length(by.y)) stopf("length(by.x) != length(by.y)") # factor in x should've factor/character in y, and viceversa for (a in seq_along(by.x)) { diff --git a/R/wrappers.R b/R/wrappers.R index dcf8ba08e..12705d001 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -8,7 +8,7 @@ setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE) fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na) fcase = function(..., default=NA) .Call(CfcaseR, default, parent.frame(), as.list(substitute(list(...)))[-1L]) -colnamesInt = function(x, cols, check_dups=FALSE) .Call(CcolnamesInt, x, cols, check_dups) +colnamesInt = function(x, cols, check_dups=FALSE, source = "Unknown") .Call(CcolnamesInt, x, cols, check_dups, source) testMsg = function(status=0L, nx=2L, nk=2L) .Call(CtestMsgR, as.integer(status)[1L], as.integer(nx)[1L], as.integer(nk)[1L]) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index bca2c13ab..9a1fbf1ea 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18350,3 +18350,24 @@ if (test_bit64) { apple = data.table(id = c("a", "b", "b"), time = c(1L, 1L, 2L), y = i64v[1:3]) test(2248, dcast(apple, id ~ time, value.var = "y"), data.table(id = c('a', 'b'), `1` = i64v[1:2], `2` = i64v[4:3], key='id')) } + +# Tests for the updated error message containing source of the problem in colnamesInt() +test(2249.1, colnamesInt(1, NULL, NULL, "tests"), + error = "In tests 'x' argument must be data.table compatible" ) +test(2249.2, colnamesInt(list(), NULL, 2, "tests"), + error = "In tests 'check_dups' must be TRUE or FALSE" ) +test(2249.3, colnamesInt(list(), c(1.5, 2.5), TRUE, "tests"), + error = "In tests argument specifying columns is type 'double' and one or more items in it are not whole integers" ) +test(2249.4, colnamesInt(list(), c(0, 1), FALSE, "tests"), + error = "In tests argument specifying columns received non-existing column(s): cols[1]=0" ) + +x <- data.table(a = 1:3, b = 4:6) +names(x) <- NULL +test(2249.5, colnamesInt(x, c("a", "b"), TRUE, "tests"), + error = "In tests 'x' argument data.table has no names" ) +test(2249.6, colnamesInt(data.table(a = 1:3, b = 4:6), c("a", "c"), TRUE, "tests"), + error = "In tests argument specifying columns received non-existing column(s): cols[2]='c'" ) +test(2249.7, colnamesInt(list(), TRUE, TRUE, "tests"), + error = "In tests argument specifying columns must be character or numeric" ) +test(2249.8, colnamesInt(data.table(a = 1:3, b = 4:6), c("a", "a"), TRUE, "tests" ), + error = "In tests argument specifying columns received duplicate column(s)" ) \ No newline at end of file diff --git a/src/data.table.h b/src/data.table.h index 21b7e30e0..03af40219 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -238,7 +238,7 @@ bool isRealReallyInt(SEXP x); SEXP isRealReallyIntR(SEXP x); SEXP isReallyReal(SEXP x); bool allNA(SEXP x, bool errorForBadType); -SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups); +SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups, SEXP source); bool INHERITS(SEXP x, SEXP char_); SEXP copyAsPlain(SEXP x); void copySharedColumns(SEXP x); diff --git a/src/nafill.c b/src/nafill.c index 03aa6d091..acccf3b9e 100644 --- a/src/nafill.c +++ b/src/nafill.c @@ -114,7 +114,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S obj = PROTECT(allocVector(VECSXP, 1)); protecti++; // wrap into list SET_VECTOR_ELT(obj, 0, obj1); } - SEXP ricols = PROTECT(colnamesInt(obj, cols, ScalarLogical(TRUE))); protecti++; // nafill cols=NULL which turns into seq_along(obj) + SEXP ricols = PROTECT(colnamesInt(obj, cols, ScalarLogical(TRUE),mkString("nafill.R(ln:117)"))); protecti++; // nafill cols=NULL which turns into seq_along(obj) x = PROTECT(allocVector(VECSXP, length(ricols))); protecti++; int *icols = INTEGER(ricols); for (int i=0; inx) || (icols[i]<1)) - error(_("argument specifying columns received non-existing column(s): cols[%d]=%d"), i+1, icols[i]); // handles NAs also + error(_("In %s argument specifying columns received non-existing column(s): cols[%d]=%d"),CHAR(STRING_ELT(source, 0)), i+1, icols[i]); // handles NAs also } } else if (isString(cols)) { SEXP xnames = PROTECT(getAttrib(x, R_NamesSymbol)); protecti++; if (isNull(xnames)) - error(_("'x' argument data.table has no names")); + error(_("In %s 'x' argument data.table has no names"), CHAR(STRING_ELT(source, 0))); ricols = PROTECT(chmatch(cols, xnames, 0)); protecti++; int *icols = INTEGER(ricols); for (int i=0; i Date: Mon, 18 Mar 2024 00:59:28 +0530 Subject: [PATCH 02/16] loading colnamesInt in tests.Rraw --- inst/tests/tests.Rraw | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9a1fbf1ea..4920a45d6 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -72,6 +72,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { which.last = data.table:::which.last `-.IDate` = data.table:::`-.IDate` haszlib = data.table:::haszlib + colnamesInt = data.table:::colnamesInt # Also, for functions that are masked by other packages, we need to map the data.table one. Or else, # the other package's function would be picked up. As above, we only need to do this because we desire From 4cc8181ae4b7e82e8dd0e750b8b07cede281c29e Mon Sep 17 00:00:00 2001 From: Nitish Jha <151559388+Nj221102@users.noreply.github.com> Date: Mon, 18 Mar 2024 11:58:17 +0530 Subject: [PATCH 03/16] Update R/data.table.R Co-authored-by: Michael Chirico --- R/data.table.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 881a07aff..cc925484c 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -481,8 +481,8 @@ replace_dot_alias = function(e) { allow.cartesian = TRUE } # TODO: collect all '==' ops first to speeden up Cnestedid - rightcols = colnamesInt(x, names(on), check_dups=FALSE, source = "data.table.R(ln:484)" ) - leftcols = colnamesInt(i, unname(on), check_dups=FALSE, source = "data.table.R(ln:484)") + rightcols = colnamesInt(x, names(on), check_dups=FALSE, source = "matching columns from 'on' names to columns in 'x'") + leftcols = colnamesInt(i, unname(on), check_dups=FALSE, source = "matching column names from 'on' values to columns in 'i'") } else { ## missing on rightcols = chmatch(key(x), names_x) # NAs here (i.e. invalid data.table) checked in bmerge() From ce7f5c75f2d213c9ab45a6518b9e5ed5835c873e Mon Sep 17 00:00:00 2001 From: nitish jha Date: Mon, 18 Mar 2024 14:49:23 +0530 Subject: [PATCH 04/16] changed source message --- R/data.table.R | 12 ++++++------ R/duplicated.R | 2 +- R/frank.R | 2 +- R/setkey.R | 2 +- R/setops.R | 4 ++-- src/nafill.c | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index cc925484c..f38ca0862 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2352,7 +2352,7 @@ na.omit.data.table = function (object, cols = seq_along(object), invert = FALSE, if (!cedta()) return(NextMethod()) # nocov if ( !missing(invert) && is.na(as.logical(invert)) ) stopf("Argument 'invert' must be logical TRUE/FALSE") - cols = colnamesInt(object, cols, check_dups=FALSE, "data.table.R(ln:2355)") + cols = colnamesInt(object, cols, check_dups=FALSE, source = "matching column names from 'cols' to columns in 'object'") ix = .Call(Cdt_na, object, cols) # forgot about invert with no NA case, #2660 if (invert) { @@ -2498,7 +2498,7 @@ copy = function(x) { .shallow = function(x, cols = NULL, retain.key = FALSE, unlock = FALSE) { wasnull = is.null(cols) - cols = colnamesInt(x, cols, check_dups=FALSE, "data.table.R(ln:2501)") + cols = colnamesInt(x, cols, check_dups=FALSE, source = "matching column names from 'cols' to columns in 'x'") ans = .Call(Cshallowwrapper, x, cols) # copies VECSXP only if(retain.key){ @@ -2685,11 +2685,11 @@ setcolorder = function(x, neworder=key(x), before=NULL, after=NULL) # before/af stopf("Provide either before= or after= but not both") if (length(before)>1L || length(after)>1L) stopf("before=/after= accept a single column name or number, not more than one") - neworder = colnamesInt(x, neworder, check_dups=FALSE, "data.table.R(ln:2688)") # dups are now checked inside Csetcolorder below + neworder = colnamesInt(x, neworder, check_dups=FALSE, source = "matching column names from 'neworder' to columns in 'x'") # dups are now checked inside Csetcolorder below if (length(before)) - neworder = c(setdiff(seq_len(colnamesInt(x, before, source = "data.table.R(ln:2690)") - 1L), neworder), neworder) + neworder = c(setdiff(seq_len(colnamesInt(x, before, source = "matching column names from 'before' to columns in 'x'") - 1L), neworder), neworder) if (length(after)) - neworder = c(setdiff(seq_len(colnamesInt(x, after, source = "data.table.R(ln:2692)")), neworder), neworder) + neworder = c(setdiff(seq_len(colnamesInt(x, after, source = "matching column names from 'after' to columns in 'x'")), neworder), neworder) if (length(neworder) != length(x)) { # pad by the missing elements (checks inside Csetcolorder catch other mistakes) neworder = c(neworder, setdiff(seq_along(x), neworder)) @@ -2970,7 +2970,7 @@ rleidv = function(x, cols=seq_along(x), prefix=NULL) { } else if (!length(cols)) { stopf("x is a list, 'cols' cannot be 0-length.") } - cols = colnamesInt(x, cols, check_dups=FALSE, "data.table.R(ln:2973)") + cols = colnamesInt(x, cols, check_dups=FALSE, source = "matching column names from 'cols' to columns in 'x'") ids = .Call(Crleid, x, cols) if (!is.null(prefix)) ids = paste0(prefix, ids) ids diff --git a/R/duplicated.R b/R/duplicated.R index f26e78a27..0e8d94dbb 100644 --- a/R/duplicated.R +++ b/R/duplicated.R @@ -74,7 +74,7 @@ unique.data.table = function(x, incomparables=FALSE, fromLast=FALSE, by=seq_alon ## unique.data.table has been refactored to simply call duplicated.data.table ## making the refactor unnecessary, but let's leave it here just in case .duplicated.helper = function(x, by) { - cols = colnamesInt(x, by, check_dups=FALSE, "duplicated.R(ln:77)") + cols = colnamesInt(x, by, check_dups=FALSE, source = "matching column names from 'by' to columns in 'x'") use.keyprefix = if (is.null(by)) FALSE else { haskey(x) && length(by) <= length(key(x)) && diff --git a/R/frank.R b/R/frank.R index d43f7e3cf..3b5d5ca53 100644 --- a/R/frank.R +++ b/R/frank.R @@ -18,7 +18,7 @@ frankv = function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c("a cols = 1L x = as_list(x) } else { - cols = colnamesInt(x, cols, check_dups=TRUE, "frank.R(ln:21)") + cols = colnamesInt(x, cols, check_dups=TRUE, source = "matching column names from 'cols' to columns in 'x'") if (!length(cols)) stopf("x is a list, 'cols' can not be 0-length") } diff --git a/R/setkey.R b/R/setkey.R index 468ea8d75..f23f69b74 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -168,7 +168,7 @@ forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.las by = NULL } else { if (!length(x)) return(integer(0L)) # e.g. forderv(data.table(NULL)) and forderv(list()) return integer(0L)) - by = colnamesInt(x, by, check_dups=FALSE, "setKey.R(ln:171)") + by = colnamesInt(x, by, check_dups=FALSE, source = "matching column names from 'by' to columns in 'x'") if (length(order) == 1L) order = rep(order, length(by)) } order = as.integer(order) # length and contents of order being +1/-1 is checked at C level diff --git a/R/setops.R b/R/setops.R index 8dde9e72a..76a30beea 100644 --- a/R/setops.R +++ b/R/setops.R @@ -3,9 +3,9 @@ setdiff_ = function(x, y, by.x=seq_along(x), by.y=seq_along(y), use.names=FALSE) if (!is.data.table(x) || !is.data.table(y)) stopf("x and y must both be data.tables") # !ncol redundant since all 0-column data.tables have 0 rows if (!nrow(x)) return(x) - by.x = colnamesInt(x, by.x, check_dups=TRUE, "setops.R(ln:6)") + by.x = colnamesInt(x, by.x, check_dups=TRUE, source = "matching column names from 'by.x' to columns in 'x'") if (!nrow(y)) return(unique(x, by=by.x)) - by.y = colnamesInt(y, by.y, check_dups=TRUE, "setops.R(ln:8)") + by.y = colnamesInt(y, by.y, check_dups=TRUE, source = "matching column names from 'by.y' to columns in 'y'") if (length(by.x) != length(by.y)) stopf("length(by.x) != length(by.y)") # factor in x should've factor/character in y, and viceversa for (a in seq_along(by.x)) { diff --git a/src/nafill.c b/src/nafill.c index acccf3b9e..651b80a33 100644 --- a/src/nafill.c +++ b/src/nafill.c @@ -114,7 +114,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S obj = PROTECT(allocVector(VECSXP, 1)); protecti++; // wrap into list SET_VECTOR_ELT(obj, 0, obj1); } - SEXP ricols = PROTECT(colnamesInt(obj, cols, ScalarLogical(TRUE),mkString("nafill.R(ln:117)"))); protecti++; // nafill cols=NULL which turns into seq_along(obj) + SEXP ricols = PROTECT(colnamesInt(obj, cols, ScalarLogical(TRUE),mkString("matching column names from 'cols' to columns in 'obj'"))); protecti++; // nafill cols=NULL which turns into seq_along(obj) x = PROTECT(allocVector(VECSXP, length(ricols))); protecti++; int *icols = INTEGER(ricols); for (int i=0; i Date: Tue, 19 Mar 2024 16:02:56 +0530 Subject: [PATCH 05/16] added new enty --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index cb020d1e1..a4822361d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -50,6 +50,8 @@ 7. Updated a test relying on `>` working for comparing language objects to a string, which will be deprecated by R, [#5977](https://github.com/Rdatatable/data.table/issues/5977); no user-facing effect. Thanks to R-core for continuously improving the language. +8. Added an optional source argument to the colnamesInt function, providing hints about the source of errors. It improves the clarity of error messages by including the source wherever the colnamesInt function is called. Additionally, tests have been added to ensure the accuracy of the updated error messages. [#5039](https://github.com/Rdatatable/data.table/issues/5039), Thanks @MichaelChirico for the suggestion and @Nj221102 for the fix. + # data.table [v1.15.0](https://github.com/Rdatatable/data.table/milestone/29) (30 Jan 2024) ## BREAKING CHANGE From 589c265efbcfe726a4be7c901a9425cb01544f44 Mon Sep 17 00:00:00 2001 From: Nitish Jha <151559388+Nj221102@users.noreply.github.com> Date: Tue, 19 Mar 2024 18:38:40 +0530 Subject: [PATCH 06/16] Update R/wrappers.R Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> --- R/wrappers.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/wrappers.R b/R/wrappers.R index 12705d001..96961e706 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -8,7 +8,7 @@ setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE) fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na) fcase = function(..., default=NA) .Call(CfcaseR, default, parent.frame(), as.list(substitute(list(...)))[-1L]) -colnamesInt = function(x, cols, check_dups=FALSE, source = "Unknown") .Call(CcolnamesInt, x, cols, check_dups, source) +colnamesInt = function(x, cols, check_dups=FALSE, source = "") .Call(CcolnamesInt, x, cols, check_dups, source) testMsg = function(status=0L, nx=2L, nk=2L) .Call(CtestMsgR, as.integer(status)[1L], as.integer(nx)[1L], as.integer(nk)[1L]) From 2af7e78d7a937fcf977116234baa861559173f46 Mon Sep 17 00:00:00 2001 From: Nitish Jha <151559388+Nj221102@users.noreply.github.com> Date: Tue, 19 Mar 2024 18:39:41 +0530 Subject: [PATCH 07/16] Update src/utils.c Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> --- src/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.c b/src/utils.c index 25de4aeb2..2a39f6fa2 100644 --- a/src/utils.c +++ b/src/utils.c @@ -103,7 +103,7 @@ SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups, SEXP source) { if (!isNewList(x)) error(_("In %s 'x' argument must be data.table compatible"),CHAR(STRING_ELT(source, 0))); if (!IS_TRUE_OR_FALSE(check_dups)) - error(_("In %s 'check_dups' must be TRUE or FALSE"), CHAR(STRING_ELT(source, 0))); + error(_("%s must be TRUE or FALSE"), "check_dups"); int protecti = 0; R_len_t nx = length(x); R_len_t nc = length(cols); From 8eeb5c86e68a1bc36f515ec16b55ef4e9b2219e5 Mon Sep 17 00:00:00 2001 From: Nitish Jha <151559388+Nj221102@users.noreply.github.com> Date: Tue, 19 Mar 2024 18:40:15 +0530 Subject: [PATCH 08/16] Update NEWS.md Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> --- NEWS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index a4822361d..9db87112b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -50,7 +50,8 @@ 7. Updated a test relying on `>` working for comparing language objects to a string, which will be deprecated by R, [#5977](https://github.com/Rdatatable/data.table/issues/5977); no user-facing effect. Thanks to R-core for continuously improving the language. -8. Added an optional source argument to the colnamesInt function, providing hints about the source of errors. It improves the clarity of error messages by including the source wherever the colnamesInt function is called. Additionally, tests have been added to ensure the accuracy of the updated error messages. [#5039](https://github.com/Rdatatable/data.table/issues/5039), Thanks @MichaelChirico for the suggestion and @Nj221102 for the fix. +8. Added a source argument to `colnamesInt`, to provide additional debug information about calling code, [#5039](https://github.com/Rdatatable/data.table/issues/5039). +`colnamesInt` is the internal workhorse for converting column names to the corresponding column number in several places. Thanks to @MichaelChirico for the suggestion and @Nj221102 for the fix. # data.table [v1.15.0](https://github.com/Rdatatable/data.table/milestone/29) (30 Jan 2024) From 2833dd0f2d3ddb1be16393594e3a8dc107dcc360 Mon Sep 17 00:00:00 2001 From: nitish jha Date: Tue, 19 Mar 2024 19:41:54 +0530 Subject: [PATCH 09/16] shifting tests from tests.Rraw to nafill.Rraw --- inst/tests/nafill.Rraw | 20 ++++++++++++++++++++ inst/tests/tests.Rraw | 22 ---------------------- 2 files changed, 20 insertions(+), 22 deletions(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index b72c0b506..1a59783ea 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -332,3 +332,23 @@ options(datatable.verbose=1) test(99.4, coerceAs(1, 2L), error="verbose option must be length 1 non-NA logical or integer") options(datatable.verbose=FALSE) +# Tests for the updated error message containing source of the problem in colnamesInt() +test(2249.1, colnamesInt(1, NULL, NULL, "tests"), + error = "In tests 'x' argument must be data.table compatible" ) +test(2249.2, colnamesInt(list(), NULL, 2, "tests"), + error = "check_dups must be TRUE or FALSE" ) +test(2249.3, colnamesInt(list(), c(1.5, 2.5), TRUE, "tests"), + error = "In tests argument specifying columns is type 'double' and one or more items in it are not whole integers" ) +test(2249.4, colnamesInt(list(), c(0, 1), FALSE, "tests"), + error = "In tests argument specifying columns received non-existing column(s): cols[1]=0" ) + +x <- data.table(a = 1:3, b = 4:6) +names(x) <- NULL +test(2249.5, colnamesInt(x, c("a", "b"), TRUE, "tests"), + error = "In tests 'x' argument data.table has no names" ) +test(2249.6, colnamesInt(data.table(a = 1:3, b = 4:6), c("a", "c"), TRUE, "tests"), + error = "In tests argument specifying columns received non-existing column(s): cols[2]='c'" ) +test(2249.7, colnamesInt(list(), TRUE, TRUE, "tests"), + error = "In tests argument specifying columns must be character or numeric" ) +test(2249.8, colnamesInt(data.table(a = 1:3, b = 4:6), c("a", "a"), TRUE, "tests" ), + error = "In tests argument specifying columns received duplicate column(s)" ) \ No newline at end of file diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4920a45d6..bca2c13ab 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -72,7 +72,6 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { which.last = data.table:::which.last `-.IDate` = data.table:::`-.IDate` haszlib = data.table:::haszlib - colnamesInt = data.table:::colnamesInt # Also, for functions that are masked by other packages, we need to map the data.table one. Or else, # the other package's function would be picked up. As above, we only need to do this because we desire @@ -18351,24 +18350,3 @@ if (test_bit64) { apple = data.table(id = c("a", "b", "b"), time = c(1L, 1L, 2L), y = i64v[1:3]) test(2248, dcast(apple, id ~ time, value.var = "y"), data.table(id = c('a', 'b'), `1` = i64v[1:2], `2` = i64v[4:3], key='id')) } - -# Tests for the updated error message containing source of the problem in colnamesInt() -test(2249.1, colnamesInt(1, NULL, NULL, "tests"), - error = "In tests 'x' argument must be data.table compatible" ) -test(2249.2, colnamesInt(list(), NULL, 2, "tests"), - error = "In tests 'check_dups' must be TRUE or FALSE" ) -test(2249.3, colnamesInt(list(), c(1.5, 2.5), TRUE, "tests"), - error = "In tests argument specifying columns is type 'double' and one or more items in it are not whole integers" ) -test(2249.4, colnamesInt(list(), c(0, 1), FALSE, "tests"), - error = "In tests argument specifying columns received non-existing column(s): cols[1]=0" ) - -x <- data.table(a = 1:3, b = 4:6) -names(x) <- NULL -test(2249.5, colnamesInt(x, c("a", "b"), TRUE, "tests"), - error = "In tests 'x' argument data.table has no names" ) -test(2249.6, colnamesInt(data.table(a = 1:3, b = 4:6), c("a", "c"), TRUE, "tests"), - error = "In tests argument specifying columns received non-existing column(s): cols[2]='c'" ) -test(2249.7, colnamesInt(list(), TRUE, TRUE, "tests"), - error = "In tests argument specifying columns must be character or numeric" ) -test(2249.8, colnamesInt(data.table(a = 1:3, b = 4:6), c("a", "a"), TRUE, "tests" ), - error = "In tests argument specifying columns received duplicate column(s)" ) \ No newline at end of file From 2f71063b1936826e4de4d164787c0cfb35d324e8 Mon Sep 17 00:00:00 2001 From: nitish jha Date: Tue, 19 Mar 2024 22:34:22 +0530 Subject: [PATCH 10/16] format change --- R/data.table.R | 17 +++++++++-------- R/duplicated.R | 2 +- R/frank.R | 2 +- R/setkey.R | 2 +- R/setops.R | 4 ++-- inst/tests/nafill.Rraw | 30 +++++++++++++++--------------- src/nafill.c | 2 +- src/utils.c | 14 +++++++------- 8 files changed, 37 insertions(+), 36 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index f38ca0862..022229c97 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -51,6 +51,7 @@ null.data.table = function() { data.table = function(..., keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFactors=FALSE) { + colnamesInt(1, NULL, NULL, "While running tests") # NOTE: It may be faster in some circumstances for users to create a data.table by creating a list l # first, and then setattr(l,"class",c("data.table","data.frame")) and forgo checking. x = list(...) # list() doesn't copy named inputs as from R >= 3.1.0 (a very welcome change) @@ -481,8 +482,8 @@ replace_dot_alias = function(e) { allow.cartesian = TRUE } # TODO: collect all '==' ops first to speeden up Cnestedid - rightcols = colnamesInt(x, names(on), check_dups=FALSE, source = "matching columns from 'on' names to columns in 'x'") - leftcols = colnamesInt(i, unname(on), check_dups=FALSE, source = "matching column names from 'on' values to columns in 'i'") + rightcols = colnamesInt(x, names(on), check_dups=FALSE, source = "While matching columns from 'on' names to columns in 'x'") + leftcols = colnamesInt(i, unname(on), check_dups=FALSE, source = "While matching column names from 'on' values to columns in 'i'") } else { ## missing on rightcols = chmatch(key(x), names_x) # NAs here (i.e. invalid data.table) checked in bmerge() @@ -2352,7 +2353,7 @@ na.omit.data.table = function (object, cols = seq_along(object), invert = FALSE, if (!cedta()) return(NextMethod()) # nocov if ( !missing(invert) && is.na(as.logical(invert)) ) stopf("Argument 'invert' must be logical TRUE/FALSE") - cols = colnamesInt(object, cols, check_dups=FALSE, source = "matching column names from 'cols' to columns in 'object'") + cols = colnamesInt(object, cols, check_dups=FALSE, source = "While matching column names from 'cols' to columns in 'object'") ix = .Call(Cdt_na, object, cols) # forgot about invert with no NA case, #2660 if (invert) { @@ -2498,7 +2499,7 @@ copy = function(x) { .shallow = function(x, cols = NULL, retain.key = FALSE, unlock = FALSE) { wasnull = is.null(cols) - cols = colnamesInt(x, cols, check_dups=FALSE, source = "matching column names from 'cols' to columns in 'x'") + cols = colnamesInt(x, cols, check_dups=FALSE, source = "While matching column names from 'cols' to columns in 'x'") ans = .Call(Cshallowwrapper, x, cols) # copies VECSXP only if(retain.key){ @@ -2685,11 +2686,11 @@ setcolorder = function(x, neworder=key(x), before=NULL, after=NULL) # before/af stopf("Provide either before= or after= but not both") if (length(before)>1L || length(after)>1L) stopf("before=/after= accept a single column name or number, not more than one") - neworder = colnamesInt(x, neworder, check_dups=FALSE, source = "matching column names from 'neworder' to columns in 'x'") # dups are now checked inside Csetcolorder below + neworder = colnamesInt(x, neworder, check_dups=FALSE, source = "While matching column names from 'neworder' to columns in 'x'") # dups are now checked inside Csetcolorder below if (length(before)) - neworder = c(setdiff(seq_len(colnamesInt(x, before, source = "matching column names from 'before' to columns in 'x'") - 1L), neworder), neworder) + neworder = c(setdiff(seq_len(colnamesInt(x, before, source = "While matching column names from 'before' to columns in 'x'") - 1L), neworder), neworder) if (length(after)) - neworder = c(setdiff(seq_len(colnamesInt(x, after, source = "matching column names from 'after' to columns in 'x'")), neworder), neworder) + neworder = c(setdiff(seq_len(colnamesInt(x, after, source = "While matching column names from 'after' to columns in 'x'")), neworder), neworder) if (length(neworder) != length(x)) { # pad by the missing elements (checks inside Csetcolorder catch other mistakes) neworder = c(neworder, setdiff(seq_along(x), neworder)) @@ -2970,7 +2971,7 @@ rleidv = function(x, cols=seq_along(x), prefix=NULL) { } else if (!length(cols)) { stopf("x is a list, 'cols' cannot be 0-length.") } - cols = colnamesInt(x, cols, check_dups=FALSE, source = "matching column names from 'cols' to columns in 'x'") + cols = colnamesInt(x, cols, check_dups=FALSE, source = "While matching column names from 'cols' to columns in 'x'") ids = .Call(Crleid, x, cols) if (!is.null(prefix)) ids = paste0(prefix, ids) ids diff --git a/R/duplicated.R b/R/duplicated.R index 0e8d94dbb..cbbe3a355 100644 --- a/R/duplicated.R +++ b/R/duplicated.R @@ -74,7 +74,7 @@ unique.data.table = function(x, incomparables=FALSE, fromLast=FALSE, by=seq_alon ## unique.data.table has been refactored to simply call duplicated.data.table ## making the refactor unnecessary, but let's leave it here just in case .duplicated.helper = function(x, by) { - cols = colnamesInt(x, by, check_dups=FALSE, source = "matching column names from 'by' to columns in 'x'") + cols = colnamesInt(x, by, check_dups=FALSE, source = "While matching column names from 'by' to columns in 'x'") use.keyprefix = if (is.null(by)) FALSE else { haskey(x) && length(by) <= length(key(x)) && diff --git a/R/frank.R b/R/frank.R index 3b5d5ca53..81ada1914 100644 --- a/R/frank.R +++ b/R/frank.R @@ -18,7 +18,7 @@ frankv = function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c("a cols = 1L x = as_list(x) } else { - cols = colnamesInt(x, cols, check_dups=TRUE, source = "matching column names from 'cols' to columns in 'x'") + cols = colnamesInt(x, cols, check_dups=TRUE, source = "While matching column names from 'cols' to columns in 'x'") if (!length(cols)) stopf("x is a list, 'cols' can not be 0-length") } diff --git a/R/setkey.R b/R/setkey.R index f23f69b74..af37c1a3e 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -168,7 +168,7 @@ forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.las by = NULL } else { if (!length(x)) return(integer(0L)) # e.g. forderv(data.table(NULL)) and forderv(list()) return integer(0L)) - by = colnamesInt(x, by, check_dups=FALSE, source = "matching column names from 'by' to columns in 'x'") + by = colnamesInt(x, by, check_dups=FALSE, source = "While matching column names from 'by' to columns in 'x'") if (length(order) == 1L) order = rep(order, length(by)) } order = as.integer(order) # length and contents of order being +1/-1 is checked at C level diff --git a/R/setops.R b/R/setops.R index 76a30beea..280f317ba 100644 --- a/R/setops.R +++ b/R/setops.R @@ -3,9 +3,9 @@ setdiff_ = function(x, y, by.x=seq_along(x), by.y=seq_along(y), use.names=FALSE) if (!is.data.table(x) || !is.data.table(y)) stopf("x and y must both be data.tables") # !ncol redundant since all 0-column data.tables have 0 rows if (!nrow(x)) return(x) - by.x = colnamesInt(x, by.x, check_dups=TRUE, source = "matching column names from 'by.x' to columns in 'x'") + by.x = colnamesInt(x, by.x, check_dups=TRUE, source = "While matching column names from 'by.x' to columns in 'x'") if (!nrow(y)) return(unique(x, by=by.x)) - by.y = colnamesInt(y, by.y, check_dups=TRUE, source = "matching column names from 'by.y' to columns in 'y'") + by.y = colnamesInt(y, by.y, check_dups=TRUE, source = "While matching column names from 'by.y' to columns in 'y'") if (length(by.x) != length(by.y)) stopf("length(by.x) != length(by.y)") # factor in x should've factor/character in y, and viceversa for (a in seq_along(by.x)) { diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 1a59783ea..83017b1d8 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -333,22 +333,22 @@ test(99.4, coerceAs(1, 2L), error="verbose option must be length 1 non-NA logica options(datatable.verbose=FALSE) # Tests for the updated error message containing source of the problem in colnamesInt() -test(2249.1, colnamesInt(1, NULL, NULL, "tests"), - error = "In tests 'x' argument must be data.table compatible" ) -test(2249.2, colnamesInt(list(), NULL, 2, "tests"), +test(100.1, colnamesInt(1, NULL, NULL, "While running tests"), + error = "While running tests\n'x' argument must be data.table compatible" ) +test(100.2, colnamesInt(list(), NULL, 2, "While running tests"), error = "check_dups must be TRUE or FALSE" ) -test(2249.3, colnamesInt(list(), c(1.5, 2.5), TRUE, "tests"), - error = "In tests argument specifying columns is type 'double' and one or more items in it are not whole integers" ) -test(2249.4, colnamesInt(list(), c(0, 1), FALSE, "tests"), - error = "In tests argument specifying columns received non-existing column(s): cols[1]=0" ) +test(100.3, colnamesInt(list(), c(1.5, 2.5), TRUE, "While running tests"), + error = "While running tests\nargument specifying columns is type 'double' and one or more items in it are not whole integers" ) +test(100.4, colnamesInt(list(), c(0, 1), FALSE, "While running tests"), + error = "While running tests\nargument specifying columns received non-existing column(s): cols[1]=0" ) x <- data.table(a = 1:3, b = 4:6) names(x) <- NULL -test(2249.5, colnamesInt(x, c("a", "b"), TRUE, "tests"), - error = "In tests 'x' argument data.table has no names" ) -test(2249.6, colnamesInt(data.table(a = 1:3, b = 4:6), c("a", "c"), TRUE, "tests"), - error = "In tests argument specifying columns received non-existing column(s): cols[2]='c'" ) -test(2249.7, colnamesInt(list(), TRUE, TRUE, "tests"), - error = "In tests argument specifying columns must be character or numeric" ) -test(2249.8, colnamesInt(data.table(a = 1:3, b = 4:6), c("a", "a"), TRUE, "tests" ), - error = "In tests argument specifying columns received duplicate column(s)" ) \ No newline at end of file +test(100.5, colnamesInt(x, c("a", "b"), TRUE, "While running tests"), + error = "While running tests\n'x' argument data.table has no names" ) +test(100.6, colnamesInt(data.table(a = 1:3, b = 4:6), c("a", "c"), TRUE, "While running tests"), + error = "While running tests\nargument specifying columns received non-existing column(s): cols[2]='c'" ) +test(100.7, colnamesInt(list(), TRUE, TRUE, "While running tests"), + error = "While running tests\nargument specifying columns must be character or numeric" ) +test(100.8, colnamesInt(data.table(a = 1:3, b = 4:6), c("a", "a"), TRUE, "While running tests" ), + error = "While running tests\nargument specifying columns received duplicate column(s)" ) \ No newline at end of file diff --git a/src/nafill.c b/src/nafill.c index 651b80a33..f8141c3aa 100644 --- a/src/nafill.c +++ b/src/nafill.c @@ -114,7 +114,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S obj = PROTECT(allocVector(VECSXP, 1)); protecti++; // wrap into list SET_VECTOR_ELT(obj, 0, obj1); } - SEXP ricols = PROTECT(colnamesInt(obj, cols, ScalarLogical(TRUE),mkString("matching column names from 'cols' to columns in 'obj'"))); protecti++; // nafill cols=NULL which turns into seq_along(obj) + SEXP ricols = PROTECT(colnamesInt(obj, cols, ScalarLogical(TRUE),mkString("While matching column names from 'cols' to columns in 'obj'"))); protecti++; // nafill cols=NULL which turns into seq_along(obj) x = PROTECT(allocVector(VECSXP, length(ricols))); protecti++; int *icols = INTEGER(ricols); for (int i=0; inx) || (icols[i]<1)) - error(_("In %s argument specifying columns received non-existing column(s): cols[%d]=%d"),CHAR(STRING_ELT(source, 0)), i+1, icols[i]); // handles NAs also + error(_("%s\nargument specifying columns received non-existing column(s): cols[%d]=%d"),CHAR(STRING_ELT(source, 0)), i+1, icols[i]); // handles NAs also } } else if (isString(cols)) { SEXP xnames = PROTECT(getAttrib(x, R_NamesSymbol)); protecti++; if (isNull(xnames)) - error(_("In %s 'x' argument data.table has no names"), CHAR(STRING_ELT(source, 0))); + error(_("%s\n'x' argument data.table has no names"), CHAR(STRING_ELT(source, 0))); ricols = PROTECT(chmatch(cols, xnames, 0)); protecti++; int *icols = INTEGER(ricols); for (int i=0; i Date: Tue, 19 Mar 2024 22:38:30 +0530 Subject: [PATCH 11/16] fixing --- R/data.table.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/data.table.R b/R/data.table.R index 022229c97..14f4ac548 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -51,7 +51,6 @@ null.data.table = function() { data.table = function(..., keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFactors=FALSE) { - colnamesInt(1, NULL, NULL, "While running tests") # NOTE: It may be faster in some circumstances for users to create a data.table by creating a list l # first, and then setattr(l,"class",c("data.table","data.frame")) and forgo checking. x = list(...) # list() doesn't copy named inputs as from R >= 3.1.0 (a very welcome change) From b45a193aeb088e1da5f5bfc5e1f7af07862fa821 Mon Sep 17 00:00:00 2001 From: nitish jha Date: Thu, 28 Mar 2024 11:49:36 +0530 Subject: [PATCH 12/16] reverting changes --- R/data.table.R | 16 ++++----- R/duplicated.R | 2 +- R/frank.R | 2 +- R/setkey.R | 2 +- R/setops.R | 4 +-- R/wrappers.R | 2 +- inst/tests/tests.Rraw | 75 ------------------------------------------- src/data.table.h | 2 +- src/nafill.c | 2 +- src/utils.c | 16 ++++----- 10 files changed, 24 insertions(+), 99 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 9c2e02b78..24eff62d5 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -485,8 +485,8 @@ replace_dot_alias = function(e) { allow.cartesian = TRUE } # TODO: collect all '==' ops first to speeden up Cnestedid - rightcols = colnamesInt(x, names(on), check_dups=FALSE, source = "While matching columns from 'on' names to columns in 'x'") - leftcols = colnamesInt(i, unname(on), check_dups=FALSE, source = "While matching column names from 'on' values to columns in 'i'") + rightcols = colnamesInt(x, names(on), check_dups=FALSE) + leftcols = colnamesInt(i, unname(on), check_dups=FALSE) } else { ## missing on rightcols = chmatch(key(x), names_x) # NAs here (i.e. invalid data.table) checked in bmerge() @@ -2356,7 +2356,7 @@ na.omit.data.table = function (object, cols = seq_along(object), invert = FALSE, if (!cedta()) return(NextMethod()) # nocov if ( !missing(invert) && is.na(as.logical(invert)) ) stopf("Argument 'invert' must be logical TRUE/FALSE") - cols = colnamesInt(object, cols, check_dups=FALSE, source = "While matching column names from 'cols' to columns in 'object'") + cols = colnamesInt(object, cols, check_dups=FALSE) ix = .Call(Cdt_na, object, cols) # forgot about invert with no NA case, #2660 if (invert) { @@ -2502,7 +2502,7 @@ copy = function(x) { .shallow = function(x, cols = NULL, retain.key = FALSE, unlock = FALSE) { wasnull = is.null(cols) - cols = colnamesInt(x, cols, check_dups=FALSE, source = "While matching column names from 'cols' to columns in 'x'") + cols = colnamesInt(x, cols, check_dups=FALSE) ans = .Call(Cshallowwrapper, x, cols) # copies VECSXP only if(retain.key){ @@ -2689,11 +2689,11 @@ setcolorder = function(x, neworder=key(x), before=NULL, after=NULL) # before/af stopf("Provide either before= or after= but not both") if (length(before)>1L || length(after)>1L) stopf("before=/after= accept a single column name or number, not more than one") - neworder = colnamesInt(x, neworder, check_dups=FALSE, source = "While matching column names from 'neworder' to columns in 'x'") # dups are now checked inside Csetcolorder below + neworder = colnamesInt(x, neworder, check_dups=FALSE) # dups are now checked inside Csetcolorder below if (length(before)) - neworder = c(setdiff(seq_len(colnamesInt(x, before, source = "While matching column names from 'before' to columns in 'x'") - 1L), neworder), neworder) + neworder = c(setdiff(seq_len(colnamesInt(x, before) - 1L), neworder), neworder) if (length(after)) - neworder = c(setdiff(seq_len(colnamesInt(x, after, source = "While matching column names from 'after' to columns in 'x'")), neworder), neworder) + neworder = c(setdiff(seq_len(colnamesInt(x, after)), neworder), neworder) if (length(neworder) != length(x)) { # pad by the missing elements (checks inside Csetcolorder catch other mistakes) neworder = c(neworder, setdiff(seq_along(x), neworder)) @@ -2974,7 +2974,7 @@ rleidv = function(x, cols=seq_along(x), prefix=NULL) { } else if (!length(cols)) { stopf("x is a list, 'cols' cannot be 0-length.") } - cols = colnamesInt(x, cols, check_dups=FALSE, source = "While matching column names from 'cols' to columns in 'x'") + cols = colnamesInt(x, cols, check_dups=FALSE) ids = .Call(Crleid, x, cols) if (!is.null(prefix)) ids = paste0(prefix, ids) ids diff --git a/R/duplicated.R b/R/duplicated.R index cbbe3a355..27b903812 100644 --- a/R/duplicated.R +++ b/R/duplicated.R @@ -74,7 +74,7 @@ unique.data.table = function(x, incomparables=FALSE, fromLast=FALSE, by=seq_alon ## unique.data.table has been refactored to simply call duplicated.data.table ## making the refactor unnecessary, but let's leave it here just in case .duplicated.helper = function(x, by) { - cols = colnamesInt(x, by, check_dups=FALSE, source = "While matching column names from 'by' to columns in 'x'") + cols = colnamesInt(x, by, check_dups=FALSE) use.keyprefix = if (is.null(by)) FALSE else { haskey(x) && length(by) <= length(key(x)) && diff --git a/R/frank.R b/R/frank.R index 81ada1914..419f5ea41 100644 --- a/R/frank.R +++ b/R/frank.R @@ -18,7 +18,7 @@ frankv = function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c("a cols = 1L x = as_list(x) } else { - cols = colnamesInt(x, cols, check_dups=TRUE, source = "While matching column names from 'cols' to columns in 'x'") + cols = colnamesInt(x, cols, check_dups=TRUE) if (!length(cols)) stopf("x is a list, 'cols' can not be 0-length") } diff --git a/R/setkey.R b/R/setkey.R index af37c1a3e..84488a803 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -168,7 +168,7 @@ forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.las by = NULL } else { if (!length(x)) return(integer(0L)) # e.g. forderv(data.table(NULL)) and forderv(list()) return integer(0L)) - by = colnamesInt(x, by, check_dups=FALSE, source = "While matching column names from 'by' to columns in 'x'") + by = colnamesInt(x, by, check_dups=FALSE) if (length(order) == 1L) order = rep(order, length(by)) } order = as.integer(order) # length and contents of order being +1/-1 is checked at C level diff --git a/R/setops.R b/R/setops.R index 280f317ba..1034b0f0f 100644 --- a/R/setops.R +++ b/R/setops.R @@ -3,9 +3,9 @@ setdiff_ = function(x, y, by.x=seq_along(x), by.y=seq_along(y), use.names=FALSE) if (!is.data.table(x) || !is.data.table(y)) stopf("x and y must both be data.tables") # !ncol redundant since all 0-column data.tables have 0 rows if (!nrow(x)) return(x) - by.x = colnamesInt(x, by.x, check_dups=TRUE, source = "While matching column names from 'by.x' to columns in 'x'") + by.x = colnamesInt(x, by.x, check_dups=TRUE) if (!nrow(y)) return(unique(x, by=by.x)) - by.y = colnamesInt(y, by.y, check_dups=TRUE, source = "While matching column names from 'by.y' to columns in 'y'") + by.y = colnamesInt(y, by.y, check_dups=TRUE) if (length(by.x) != length(by.y)) stopf("length(by.x) != length(by.y)") # factor in x should've factor/character in y, and viceversa for (a in seq_along(by.x)) { diff --git a/R/wrappers.R b/R/wrappers.R index 96961e706..dcf8ba08e 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -8,7 +8,7 @@ setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE) fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na) fcase = function(..., default=NA) .Call(CfcaseR, default, parent.frame(), as.list(substitute(list(...)))[-1L]) -colnamesInt = function(x, cols, check_dups=FALSE, source = "") .Call(CcolnamesInt, x, cols, check_dups, source) +colnamesInt = function(x, cols, check_dups=FALSE) .Call(CcolnamesInt, x, cols, check_dups) testMsg = function(status=0L, nx=2L, nk=2L) .Call(CtestMsgR, as.integer(status)[1L], as.integer(nx)[1L], as.integer(nk)[1L]) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 24b4f4c60..1f7ee59ff 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18350,78 +18350,3 @@ if (test_bit64) { apple = data.table(id = c("a", "b", "b"), time = c(1L, 1L, 2L), y = i64v[1:3]) test(2248, dcast(apple, id ~ time, value.var = "y"), data.table(id = c('a', 'b'), `1` = i64v[1:2], `2` = i64v[4:3], key='id')) } - -# Unit tests for DT[, .SD] retaining secondary indices, #1709 -DT = data.table(x=1:5, y=6:10) -setindex(DT, x) -test(2249.1, indices(DT), 'x') -test(2249.2, indices(DT[, .SD]), 'x') -setindex(DT, y) -test(2249.3, indices(DT), c('x', 'y')) -test(2249.4, indices(DT[, .SD]), c('x', 'y')) - -# make names(.SD) work - issue #795 -dt = data.table(a = 1:4, b = 5:8) -test(2250.01, dt[, names(.SD) := lapply(.SD, '*', 2), .SDcols = 1L], data.table(a = 1:4 * 2, b = 5:8)) -test(2250.02, dt[, names(.SD) := lapply(.SD, '*', 2), .SDcols = 2L], data.table(a = 1:4 * 2, b = 5:8 * 2)) -test(2250.03, dt[, names(.SD) := lapply(.SD, as.integer)], data.table(a = as.integer(1:4 * 2), b = as.integer(5:8 * 2))) -test(2250.04, dt[1L, names(.SD) := lapply(.SD, '+', 2L)], data.table(a = as.integer(c(4, 2:4 * 2)), b = as.integer(c(12, 6:8 * 2)))) -test(2250.05, dt[, setdiff(names(.SD), 'a') := NULL], data.table(a = as.integer(c(4, 2:4 * 2)))) -test(2250.06, dt[, c(names(.SD)) := NULL], null.data.table()) - -dt = data.table(a = 1:4, b = 5:8, grp = c('a', 'a', 'b', 'c')) -test(2250.07, dt[, names(.SD) := lapply(.SD, max), by = grp], data.table(a = c(2L, 2L, 3L, 4L), b = c(6L, 6L, 7L, 8L), grp = c('a', 'a', 'b', 'c'))) - -dt = data.table(a = 1:4, b = 5:8, grp = c('a', 'a', 'b', 'c')) -keep = c('a', 'b') -test(2250.08, dt[, names(.SD) := NULL, .SDcols = !keep], data.table(a = 1:4, b = 5:8)) - -dt = data.table(a = 1:4, b = 5:8, grp = c('a', 'a', 'b', 'c')) -test(2250.09, dt[, paste(names(.SD), 'max', sep = '_') := lapply(.SD, max), by = grp] , data.table(a = 1:4, b = 5:8, grp = c('a', 'a', 'b', 'c'), a_max = c(2L, 2L, 3L, 4L), b_max = c(6L, 6L, 7L, 8L))) - -dt = data.table(a = 1:3, b = 5:7, grp = c('a', 'a', 'b')) -test(2250.10, dt[1:2, paste(names(.SD), 'max', sep = '_') := lapply(.SD, max), by = grp], data.table(a = 1:3, b = 5:7, grp = c('a', 'a', 'b'), a_max = c(2L, 2L, NA_integer_), b_max = c(6L, 6L, NA_integer_))) -test(2250.11, dt[, names(.SD(2)) := lapply(.SD, .I)], error = 'could not find function ".SD"') - -dt = data.table(a = 1:3, b = 5:7, grp = c('a', 'a', 'b')) -test(2250.12, dt[, names(.SD) := lapply(.SD, \(x) x + b), .SDcols = "a"], data.table(a = 1:3 + 5:7, b = 5:7, grp = c('a', 'a', 'b'))) - - -dt = data.table(a = 1L, b = 2L, c = 3L, d = 4L, e = 5L, f = 6L) -test(2250.13, dt[, names(.SD)[1:5] := sum(.SD)], data.table(a = 21L, b = 21L, c = 21L, d = 21L, e = 21L, f = 6L)) - -# fread(...,fill) can also be used to specify a guess on the maximum number of columns #2691 #1812 #4130 #3436 #2727 -dt_str = paste(rep(c("1,2\n", "1,2,3\n"), each=100), collapse="") -ans = data.table(1L, 2L, rep(c(NA, 3L), each=100L)) -test(2251.01, fread(text = dt_str, fill=FALSE), ans[1:100, -3L], warning=".*Consider fill=TRUE.*") -test(2251.02, fread(text = dt_str, fill=TRUE), ans[1:100, -3L], warning=".*Consider fill=3.*") -test(2251.03, fread(text = dt_str, fill=2L), ans[1:100, -3L], warning=".*Consider fill=3.*") -test(2251.04, fread(text = dt_str, fill=3L), ans) -test(2251.05, fread(text = dt_str, fill=5L, verbose=TRUE), ans, output="Provided number of fill columns: 5 but only found 3\n Dropping 2 overallocated columns") # user guess slightly too big -test(2251.06, fread(text = dt_str, fill=1000L), ans) # user guess much too big -lines = c( - "12223, University", - "12227, bridge, Sky", - "12828, Sunset", - "13801, Ground", - "14853, Tranceamerica", - "14854, San Francisco", - "15595, shibuya, Shrine", - "16126, fog, San Francisco", - "16520, California, ocean, summer, golden gate, beach, San Francisco", - "") -text = paste(lines, collapse="\n") -test(2251.07, dim(fread(text)), c(6L, 3L), warning=c("fill=TRUE", "Discarded")) -test(2251.08, dim(fread(text, fill=TRUE)), c(9L, 9L)) -text = paste(lines[c(1:5, 9L, 6:8, 10L)], collapse="\n") -test(2251.09, dim(fread(text)), c(3L, 3L), warning=c("fill=TRUE", "fill=7")) -test(2251.10, dim(fread(text, fill=TRUE)), c(9L, 9L)) -test(2251.11, dim(fread(text, fill=7)), c(9L, 9L)) -test(2251.12, dim(fread(text, fill=9)), c(9L, 9L)) -test(2251.13, dim(fread(text, fill=20)), c(9L, 20L)) # clean up currently only kicks in if sep!=' ' - -.datatable.aware = FALSE -dt = data.table(a = 1L) -test(2252.1, dt[, b:=2L], error = "\\[ was called on a data.table.*not data.table-aware.*':='") -test(2252.2, dt[, let(b=2L)], error = "\\[ was called on a data.table.*not data.table-aware.*'let'") -rm(.datatable.aware) diff --git a/src/data.table.h b/src/data.table.h index 03af40219..21b7e30e0 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -238,7 +238,7 @@ bool isRealReallyInt(SEXP x); SEXP isRealReallyIntR(SEXP x); SEXP isReallyReal(SEXP x); bool allNA(SEXP x, bool errorForBadType); -SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups, SEXP source); +SEXP colnamesInt(SEXP x, SEXP cols, SEXP check_dups); bool INHERITS(SEXP x, SEXP char_); SEXP copyAsPlain(SEXP x); void copySharedColumns(SEXP x); diff --git a/src/nafill.c b/src/nafill.c index f8141c3aa..03aa6d091 100644 --- a/src/nafill.c +++ b/src/nafill.c @@ -114,7 +114,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S obj = PROTECT(allocVector(VECSXP, 1)); protecti++; // wrap into list SET_VECTOR_ELT(obj, 0, obj1); } - SEXP ricols = PROTECT(colnamesInt(obj, cols, ScalarLogical(TRUE),mkString("While matching column names from 'cols' to columns in 'obj'"))); protecti++; // nafill cols=NULL which turns into seq_along(obj) + SEXP ricols = PROTECT(colnamesInt(obj, cols, ScalarLogical(TRUE))); protecti++; // nafill cols=NULL which turns into seq_along(obj) x = PROTECT(allocVector(VECSXP, length(ricols))); protecti++; int *icols = INTEGER(ricols); for (int i=0; inx) || (icols[i]<1)) - error(_("%s\nargument specifying columns received non-existing column(s): cols[%d]=%d"),CHAR(STRING_ELT(source, 0)), i+1, icols[i]); // handles NAs also + error(_("argument specifying columns received non-existing column(s): cols[%d]=%d"), i+1, icols[i]); // handles NAs also } } else if (isString(cols)) { SEXP xnames = PROTECT(getAttrib(x, R_NamesSymbol)); protecti++; if (isNull(xnames)) - error(_("%s\n'x' argument data.table has no names"), CHAR(STRING_ELT(source, 0))); + error(_("'x' argument data.table has no names")); ricols = PROTECT(chmatch(cols, xnames, 0)); protecti++; int *icols = INTEGER(ricols); for (int i=0; i Date: Thu, 28 Mar 2024 11:54:16 +0530 Subject: [PATCH 13/16] fixing mistakes --- inst/tests/tests.Rraw | 75 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1f7ee59ff..92fb33fb4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18350,3 +18350,78 @@ if (test_bit64) { apple = data.table(id = c("a", "b", "b"), time = c(1L, 1L, 2L), y = i64v[1:3]) test(2248, dcast(apple, id ~ time, value.var = "y"), data.table(id = c('a', 'b'), `1` = i64v[1:2], `2` = i64v[4:3], key='id')) } + +# Unit tests for DT[, .SD] retaining secondary indices, #1709 +DT = data.table(x=1:5, y=6:10) +setindex(DT, x) +test(2249.1, indices(DT), 'x') +test(2249.2, indices(DT[, .SD]), 'x') +setindex(DT, y) +test(2249.3, indices(DT), c('x', 'y')) +test(2249.4, indices(DT[, .SD]), c('x', 'y')) + +# make names(.SD) work - issue #795 +dt = data.table(a = 1:4, b = 5:8) +test(2250.01, dt[, names(.SD) := lapply(.SD, '*', 2), .SDcols = 1L], data.table(a = 1:4 * 2, b = 5:8)) +test(2250.02, dt[, names(.SD) := lapply(.SD, '*', 2), .SDcols = 2L], data.table(a = 1:4 * 2, b = 5:8 * 2)) +test(2250.03, dt[, names(.SD) := lapply(.SD, as.integer)], data.table(a = as.integer(1:4 * 2), b = as.integer(5:8 * 2))) +test(2250.04, dt[1L, names(.SD) := lapply(.SD, '+', 2L)], data.table(a = as.integer(c(4, 2:4 * 2)), b = as.integer(c(12, 6:8 * 2)))) +test(2250.05, dt[, setdiff(names(.SD), 'a') := NULL], data.table(a = as.integer(c(4, 2:4 * 2)))) +test(2250.06, dt[, c(names(.SD)) := NULL], null.data.table()) + +dt = data.table(a = 1:4, b = 5:8, grp = c('a', 'a', 'b', 'c')) +test(2250.07, dt[, names(.SD) := lapply(.SD, max), by = grp], data.table(a = c(2L, 2L, 3L, 4L), b = c(6L, 6L, 7L, 8L), grp = c('a', 'a', 'b', 'c'))) + +dt = data.table(a = 1:4, b = 5:8, grp = c('a', 'a', 'b', 'c')) +keep = c('a', 'b') +test(2250.08, dt[, names(.SD) := NULL, .SDcols = !keep], data.table(a = 1:4, b = 5:8)) + +dt = data.table(a = 1:4, b = 5:8, grp = c('a', 'a', 'b', 'c')) +test(2250.09, dt[, paste(names(.SD), 'max', sep = '_') := lapply(.SD, max), by = grp] , data.table(a = 1:4, b = 5:8, grp = c('a', 'a', 'b', 'c'), a_max = c(2L, 2L, 3L, 4L), b_max = c(6L, 6L, 7L, 8L))) + +dt = data.table(a = 1:3, b = 5:7, grp = c('a', 'a', 'b')) +test(2250.10, dt[1:2, paste(names(.SD), 'max', sep = '_') := lapply(.SD, max), by = grp], data.table(a = 1:3, b = 5:7, grp = c('a', 'a', 'b'), a_max = c(2L, 2L, NA_integer_), b_max = c(6L, 6L, NA_integer_))) +test(2250.11, dt[, names(.SD(2)) := lapply(.SD, .I)], error = 'could not find function ".SD"') + +dt = data.table(a = 1:3, b = 5:7, grp = c('a', 'a', 'b')) +test(2250.12, dt[, names(.SD) := lapply(.SD, \(x) x + b), .SDcols = "a"], data.table(a = 1:3 + 5:7, b = 5:7, grp = c('a', 'a', 'b'))) + + +dt = data.table(a = 1L, b = 2L, c = 3L, d = 4L, e = 5L, f = 6L) +test(2250.13, dt[, names(.SD)[1:5] := sum(.SD)], data.table(a = 21L, b = 21L, c = 21L, d = 21L, e = 21L, f = 6L)) + +# fread(...,fill) can also be used to specify a guess on the maximum number of columns #2691 #1812 #4130 #3436 #2727 +dt_str = paste(rep(c("1,2\n", "1,2,3\n"), each=100), collapse="") +ans = data.table(1L, 2L, rep(c(NA, 3L), each=100L)) +test(2251.01, fread(text = dt_str, fill=FALSE), ans[1:100, -3L], warning=".*Consider fill=TRUE.*") +test(2251.02, fread(text = dt_str, fill=TRUE), ans[1:100, -3L], warning=".*Consider fill=3.*") +test(2251.03, fread(text = dt_str, fill=2L), ans[1:100, -3L], warning=".*Consider fill=3.*") +test(2251.04, fread(text = dt_str, fill=3L), ans) +test(2251.05, fread(text = dt_str, fill=5L, verbose=TRUE), ans, output="Provided number of fill columns: 5 but only found 3\n Dropping 2 overallocated columns") # user guess slightly too big +test(2251.06, fread(text = dt_str, fill=1000L), ans) # user guess much too big +lines = c( + "12223, University", + "12227, bridge, Sky", + "12828, Sunset", + "13801, Ground", + "14853, Tranceamerica", + "14854, San Francisco", + "15595, shibuya, Shrine", + "16126, fog, San Francisco", + "16520, California, ocean, summer, golden gate, beach, San Francisco", + "") +text = paste(lines, collapse="\n") +test(2251.07, dim(fread(text)), c(6L, 3L), warning=c("fill=TRUE", "Discarded")) +test(2251.08, dim(fread(text, fill=TRUE)), c(9L, 9L)) +text = paste(lines[c(1:5, 9L, 6:8, 10L)], collapse="\n") +test(2251.09, dim(fread(text)), c(3L, 3L), warning=c("fill=TRUE", "fill=7")) +test(2251.10, dim(fread(text, fill=TRUE)), c(9L, 9L)) +test(2251.11, dim(fread(text, fill=7)), c(9L, 9L)) +test(2251.12, dim(fread(text, fill=9)), c(9L, 9L)) +test(2251.13, dim(fread(text, fill=20)), c(9L, 20L)) # clean up currently only kicks in if sep!=' ' + +.datatable.aware = FALSE +dt = data.table(a = 1L) +test(2252.1, dt[, b:=2L], error = "\\[ was called on a data.table.*not data.table-aware.*':='") +test(2252.2, dt[, let(b=2L)], error = "\\[ was called on a data.table.*not data.table-aware.*'let'") +rm(.datatable.aware) \ No newline at end of file From a41262841152c03f88812549036c5c4e6ea208c8 Mon Sep 17 00:00:00 2001 From: nitish jha Date: Thu, 28 Mar 2024 11:56:02 +0530 Subject: [PATCH 14/16] reverting more changes --- inst/tests/nafill.Rraw | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 83017b1d8..13e56416a 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -331,24 +331,3 @@ test(99.3, data.table(a=1,b=2)[1,1, verbose=NA], error="verbose must be length 1 options(datatable.verbose=1) test(99.4, coerceAs(1, 2L), error="verbose option must be length 1 non-NA logical or integer") options(datatable.verbose=FALSE) - -# Tests for the updated error message containing source of the problem in colnamesInt() -test(100.1, colnamesInt(1, NULL, NULL, "While running tests"), - error = "While running tests\n'x' argument must be data.table compatible" ) -test(100.2, colnamesInt(list(), NULL, 2, "While running tests"), - error = "check_dups must be TRUE or FALSE" ) -test(100.3, colnamesInt(list(), c(1.5, 2.5), TRUE, "While running tests"), - error = "While running tests\nargument specifying columns is type 'double' and one or more items in it are not whole integers" ) -test(100.4, colnamesInt(list(), c(0, 1), FALSE, "While running tests"), - error = "While running tests\nargument specifying columns received non-existing column(s): cols[1]=0" ) - -x <- data.table(a = 1:3, b = 4:6) -names(x) <- NULL -test(100.5, colnamesInt(x, c("a", "b"), TRUE, "While running tests"), - error = "While running tests\n'x' argument data.table has no names" ) -test(100.6, colnamesInt(data.table(a = 1:3, b = 4:6), c("a", "c"), TRUE, "While running tests"), - error = "While running tests\nargument specifying columns received non-existing column(s): cols[2]='c'" ) -test(100.7, colnamesInt(list(), TRUE, TRUE, "While running tests"), - error = "While running tests\nargument specifying columns must be character or numeric" ) -test(100.8, colnamesInt(data.table(a = 1:3, b = 4:6), c("a", "a"), TRUE, "While running tests" ), - error = "While running tests\nargument specifying columns received duplicate column(s)" ) \ No newline at end of file From b44a8f9aaac32e0ef91a710ebbe2ac04ef8c6da0 Mon Sep 17 00:00:00 2001 From: nitish jha Date: Thu, 28 Mar 2024 11:58:29 +0530 Subject: [PATCH 15/16] final revert --- NEWS.md | 2 -- inst/tests/nafill.Rraw | 1 + inst/tests/tests.Rraw | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 429ad2e7d..b49f300c0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -54,8 +54,6 @@ 7. Updated a test relying on `>` working for comparing language objects to a string, which will be deprecated by R, [#5977](https://github.com/Rdatatable/data.table/issues/5977); no user-facing effect. Thanks to R-core for continuously improving the language. -8. Added a source argument to `colnamesInt`, to provide additional debug information about calling code, [#5039](https://github.com/Rdatatable/data.table/issues/5039). -`colnamesInt` is the internal workhorse for converting column names to the corresponding column number in several places. Thanks to @MichaelChirico for the suggestion and @Nj221102 for the fix. # data.table [v1.15.0](https://github.com/Rdatatable/data.table/milestone/29) (30 Jan 2024) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 13e56416a..b72c0b506 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -331,3 +331,4 @@ test(99.3, data.table(a=1,b=2)[1,1, verbose=NA], error="verbose must be length 1 options(datatable.verbose=1) test(99.4, coerceAs(1, 2L), error="verbose option must be length 1 non-NA logical or integer") options(datatable.verbose=FALSE) + diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 92fb33fb4..24b4f4c60 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18424,4 +18424,4 @@ test(2251.13, dim(fread(text, fill=20)), c(9L, 20L)) # clean up currently only k dt = data.table(a = 1L) test(2252.1, dt[, b:=2L], error = "\\[ was called on a data.table.*not data.table-aware.*':='") test(2252.2, dt[, let(b=2L)], error = "\\[ was called on a data.table.*not data.table-aware.*'let'") -rm(.datatable.aware) \ No newline at end of file +rm(.datatable.aware) From c20c7a0d798858d6783bc26473a4205408d05a29 Mon Sep 17 00:00:00 2001 From: nitish jha Date: Thu, 28 Mar 2024 12:00:02 +0530 Subject: [PATCH 16/16] removing extra space --- NEWS.md | 1 - 1 file changed, 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index b49f300c0..7110f10e0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -54,7 +54,6 @@ 7. Updated a test relying on `>` working for comparing language objects to a string, which will be deprecated by R, [#5977](https://github.com/Rdatatable/data.table/issues/5977); no user-facing effect. Thanks to R-core for continuously improving the language. - # data.table [v1.15.0](https://github.com/Rdatatable/data.table/milestone/29) (30 Jan 2024) ## BREAKING CHANGE