Skip to content

Commit

Permalink
remove use of rbindlist(..., use.names=FALSE, fill=TRUE) in merge (#5857
Browse files Browse the repository at this point in the history
)

* add regression fix

* add tests from #5309

* added comment about NA rectangle

* emphasize subtle part about attributes too

---------

Co-authored-by: Michael Chirico <[email protected]>
  • Loading branch information
ben-schwen and MichaelChirico authored Dec 27, 2023
1 parent b88112a commit 62b1044
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 2 deletions.
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@
# v1.14.4 0.4826 0.5586 0.6586 0.6329 0.7348 1.318 100
```

30. `rbind()` and `rbindlist()` now support `fill=TRUE` with `use.names=FALSE` instead of issuing the warning `use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE.`, [#5444](https://github.com/Rdatatable/data.table/issues/5444). Thanks to @sindribaldur for testing dev and filing a bug report which was fixed before release.
30. `rbind()` and `rbindlist()` now support `fill=TRUE` with `use.names=FALSE` instead of issuing the warning `use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE.`, [#5444](https://github.com/Rdatatable/data.table/issues/5444). Thanks to @sindribaldur, @dcaseykc, @fox34, @adrian-quintario and @berg-michael for testing dev and filing a bug report which was fixed before release.

```R
DT1
Expand Down
14 changes: 13 additions & 1 deletion R/merge.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,20 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
if (all.y && nrow(y)) { # If y does not have any rows, no need to proceed
# Perhaps not very commonly used, so not a huge deal that the join is redone here.
missingyidx = y[!x, which=TRUE, on=by, allow.cartesian=allow.cartesian]
# TO DO: replace by following once #5446 is merged
# if (length(missingyidx)) dt = rbind(dt, y[missingyidx], use.names=FALSE, fill=TRUE, ignore.attr=TRUE)
if (length(missingyidx)) {
dt = rbind(dt, y[missingyidx], use.names=FALSE, fill=TRUE)
yy = y[missingyidx]
othercolsx = setdiff(nm_x, by)
if (length(othercolsx)) {
# create NA rectangle with correct types and attributes of x to cbind to y
tmp = rep.int(NA_integer_, length(missingyidx))
# TO DO: use set() here instead..
yy = cbind(yy, x[tmp, othercolsx, with = FALSE])
}
# empty data.tables (nrow =0, ncol>0) doesn't skip names anymore in new rbindlist
# takes care of #24 without having to save names. This is how it should be, IMHO.
dt = rbind(dt, yy, use.names=FALSE)
}
}
# X[Y] syntax puts JIS i columns at the end, merge likes them alongside i.
Expand Down
13 changes: 13 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -1919,6 +1919,11 @@ test(631.5, DT3, data.table(a=c(2), total=c(5), key="a"))
# .. nrow(y)=1, i subset y with 1 and match with x
test(631.6, merge(DT1,DT4,all.y=TRUE), data.table(a=c(3),total.x=c(1),total.y=c(1),key="a"))
test(631.7, DT4, data.table(a=c(3), total=c(1), key="a"))
# merge columns with different attributes #5309
x = data.table(a=1L, b=as.IDate(16801))
y = data.table(a=2L, b=NA)
test(631.8, merge(x,y,by="a",all=TRUE), data.table(a=c(1L,2L), b.x=as.IDate(c(16801,NA)), b.y=NA, key="a"))
test(631.9, merge(y,x,by="a",all=TRUE), data.table(a=c(1L,2L), b.x=NA, b.y=as.IDate(c(16801,NA)), key="a"))

test(632, merge(DT1,DT2,all=TRUE), data.table(a=c(1,2,3,4,5),total.x=c(2,NA,1,3,1),total.y=c(NA,5,1,NA,2),key="a"))
test(632.1, merge(DT1,DT2,all=TRUE), setkey(adt(merge(adf(DT1),adf(DT2),by="a",all=TRUE)),a))
Expand Down Expand Up @@ -14335,6 +14340,14 @@ test(2003.5, rbindlist(list(data.table(a=1:2), data.table(b=3:4, c=5:6)), fill=T
# rbindlist segfault with fill=TRUE and usenames=FALSE #5444
test(2003.6, rbindlist(list(list(1), list(2,3)), fill=TRUE, use.names=FALSE), data.table(c(1,2), c(NA, 3)))
test(2003.7, rbindlist(list(list(1), list(2,factor(3))), fill=TRUE, use.names=FALSE), data.table(c(1,2), factor(c(NA, 3))))
# rbind with different attributes #5309
x=data.table(a=as.Date(NA))
y=data.table(a=as.Date('2021-10-05'), b=as.POSIXct("2021-10-06 13:58:00 UTC"))
ans=data.table(a=as.Date(c(NA_character_, '2021-10-05')), b=as.POSIXct(c(NA_character_, "2021-10-06 13:58:00 UTC")))
test(2003.81, rbind(x, y, fill=TRUE, use.names=TRUE), ans)
test(2003.82, rbind(y, x, fill=TRUE, use.names=TRUE), ans[2:1,])
test(2003.83, rbind(x, y, fill=TRUE, use.names=FALSE), ans)
test(2003.84, rbind(y, x, fill=TRUE, use.names=FALSE), ans[2:1,])

# chmatch coverage for two different non-ascii encodings matching; issues mentioned in comments in chmatch.c #69 #2538 #111
x1 = "fa\xE7ile"
Expand Down

0 comments on commit 62b1044

Please sign in to comment.