Skip to content

Commit

Permalink
fix Rcpp::CharacterVector copies
Browse files Browse the repository at this point in the history
  • Loading branch information
mschubert committed Oct 2, 2022
1 parent 5f6163d commit a67c4c8
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 12 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: narray
Title: Subset- And Name-Aware Array Utility Functions
Version: 0.5.0
Version: 0.5.1
Author: Michael Schubert <[email protected]>
Maintainer: Michael Schubert <[email protected]>
Description: Stacking arrays according to dimension names, subset-aware
Expand Down
7 changes: 6 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# git head
# narray 0.5.1

* Fixed a bug where `stack` made unnecessary vector copies

# narray 0.5.0

Expand Down Expand Up @@ -27,16 +29,19 @@
* new `lambda` syntax (#14)

# narray 0.3.2

* add `collect` function as opposite to `mask`
* deprecate `summarize` in favor of `map`
* adjust tests for `testthat>=2`

# narray 0.2.2

* fix bug where `split` with `NA` in `subsets` caused wrong splitting; these
are now dropped with a warning (#5)
* fix bug where vectors are not bound along the right dimensions (#7)
* add `rep` functions for arrays (and `rrep` and `crep` aliases for rows and
columns, respectively)

# narray 0.1.1

* Initial release on CRAN
21 changes: 14 additions & 7 deletions src/stack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ using namespace Rcpp;
using namespace std;

template<int RTYPE> Vector<RTYPE> cpp_stack_impl(List array_list, int along, Vector<RTYPE> fill, bool ovr) {
auto dimnames = vector<CharacterVector>(along); // dim: names along
auto dimnames = vector<vector<String>>(along); // dim: names along
auto axmap = vector<unordered_map<string, int>>(along); // dim: element name->index
auto ax_unnamed = vector<int>(along); // counter for unnamed dimension elements
auto a2r = vector<vector<vector<int>>>(array_list.size()); // array > dim > element
auto a2r = vector<vector<vector<int>>>(array_list.size()); // index array>dim>element

// create lookup tables for all present dimension names
for (int ai=0; ai<Rf_xlength(array_list); ai++) { // array index
Expand Down Expand Up @@ -46,12 +46,16 @@ template<int RTYPE> Vector<RTYPE> cpp_stack_impl(List array_list, int along, Vec
} else {
auto dni = as<vector<string>>(dn[d]);
for (int e=0; e<da[d]; e++) { // element in dimension
if (axmap[d].count(dni[e]) == 0) {
axmap[d].emplace(dni[e], axmap[d].size() + ax_unnamed[d]);
auto it = axmap[d].find(dni[e]);
if (it == axmap[d].end()) {
int val = axmap[d].size() + ax_unnamed[d];
axmap[d].emplace(dni[e], val);
dimnames[d].push_back(dni[e]);
a2r[ai][d].push_back(val);
} else {
a2r[ai][d].push_back(it->second);
}
// Rprintf("array %i dim %i: %s -> %i\n", ai, d, dni[e].c_str(), axmap[d][dni[e]]);
a2r[ai][d].push_back(axmap[d][dni[e]]);
}
}
}
Expand All @@ -73,10 +77,13 @@ template<int RTYPE> Vector<RTYPE> cpp_stack_impl(List array_list, int along, Vec
auto rdnames = List(dimnames.size());
for (int i=0; i<dimnames.size(); i++) {
rdim[i] = dimnames[i].size();
if (all(is_na(dimnames[i])))
auto rdni = CharacterVector(dimnames[i].size());
for (int j=0; j<rdni.size(); j++)
rdni[j] = dimnames[i][j];
if (all(is_na(rdni)))
rdnames[i] = R_NilValue;
else
rdnames[i] = dimnames[i];
rdnames[i] = rdni;
}
auto n = accumulate(rdim.begin(), rdim.end(), 1, multiplies<int>());
auto result = Vector<RTYPE>(n, fill[0]);
Expand Down
15 changes: 12 additions & 3 deletions tests/testthat/test-stack.r
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,22 @@ test_that("keep_empty arg when stacking zero-length vectors", {
test_that("performance", {
skip_on_cran()

size = 500 # 500x500, 500 arrays
# stack 500 arrays, 500x500 with overwriting
size = 500
syms = c(letters, LETTERS, 0:9)
idx = do.call(paste0, expand.grid(syms, syms))

ars = replicate(size, simplify=FALSE,
matrix(runif(size*size), nrow=size, ncol=size,
dimnames=list(sample(idx, size), sample(idx, size))))
tt = system.time(stack(ars, along=2, allow_overwrite=TRUE))
expect_lt(tt["user.self"], 6)
expect_lt(tt["user.self"], 6) # 1.5 sec locally

# stack 10 arrays, 10k rows and 1 column
size = 5e4
idx = do.call(paste0, expand.grid(syms, syms, syms))[1:size]
ars2 = replicate(10, simplify=FALSE,
matrix(runif(size), nrow=size, ncol=1,
dimnames=list(sample(idx, size), sample(idx,1))))
tt = system.time(stack(ars2, along=2))
expect_lt(tt["user.self"], 2) # 0.1 sec locally
})

0 comments on commit a67c4c8

Please sign in to comment.