Merge pull request #68 from tdhock/result=fun

press comparison
tdhock · Nov 19, 2024 · 045d7f2 · 045d7f2
2 parents a2d5d24 + 55264ef
commit 045d7f2
Show file tree

Hide file tree

Showing 8 changed files with 184 additions and 83 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: atime
 Type: Package
 Title: Asymptotic Timing
-Version: 2024.11.8
+Version: 2024.11.19
 Authors@R: c(
     person("Toby", "Hocking",
      email="[email protected]",

diff --git a/NEWS b/NEWS
@@ -1,3 +1,13 @@
+Changes in version 2024.11.19
+
+- comment predict(kilobytes) in sparse vignette, for CRAN.
+
+Changes in version 2024.11.15
+
+- only run tests that involve Matrix if it is available.
+- row -> column names in error message for custom units.
+- New feature: atime(result=fun) means that fun will be applied to the output of each expression, and that result will be saved. Useful when you want to specify a custom unit, with a function that returns a 1-row data frame, that can be applied to each expression.
+
 Changes in version 2024.11.8
 
 - only run tests that involve plotting if ggplot2 is available, to fix CRAN noSuggests issue, https://github.com/tdhock/atime/issues/67

diff --git a/R/atime.R b/R/atime.R
@@ -96,6 +96,15 @@ default_N <- function(){
 atime <- function(N=default_N(), setup, expr.list=NULL, times=10, seconds.limit=0.01, verbose=FALSE, result=FALSE, N.env.parent=NULL, ...){
   kilobytes <- mem_alloc <- . <- sizes <- NULL
   ## above for CRAN NOTE.
+  result.fun <- identity
+  result.keep <- if(is.function(result)){
+    result.fun <- result
+    TRUE
+  }else if(isTRUE(result)){
+    TRUE
+  }else{
+    FALSE
+  }
   if(is.null(N.env.parent)){
     N.env.parent <- parent.frame()
   }
@@ -133,10 +142,10 @@ atime <- function(N=default_N(), setup, expr.list=NULL, times=10, seconds.limit=
       N.env$result.list <- list()
       for(expr.name in not.done.yet){
         expr <- elist[[expr.name]]
-        m.list[expr.name] <- list(if(result){
+        m.list[expr.name] <- list(if(result.keep){
           substitute(
-            result.list[NAME] <- list(EXPR),
-            list(NAME=expr.name, EXPR=expr))
+            result.list[NAME] <- list(FUN(EXPR)),
+            list(NAME=expr.name, FUN=result.fun, EXPR=expr))
         }else{
           expr
         })
@@ -150,7 +159,7 @@ atime <- function(N=default_N(), setup, expr.list=NULL, times=10, seconds.limit=
         names.list <- lapply(N.env$result.list, names)
         for(result.i in seq_along(names.list)){
           if(!identical(names.list[[1]], names.list[[result.i]])){
-            stop(sprintf("results are all 1 row data frames, but some have different names (%s, %s); please fix by making row names of results identical", names(names.list)[[1]], names(names.list)[[result.i]]))
+            stop(sprintf("results are all 1 row data frames, but some have different names (%s, %s); please fix by making column names of results identical", names(names.list)[[1]], names(names.list)[[result.i]]))
           }
         }
         result.rows <- do.call(rbind, N.env$result.list)
@@ -159,7 +168,7 @@ atime <- function(N=default_N(), setup, expr.list=NULL, times=10, seconds.limit=
       }else{
         result.rows <- NULL
       }
-      if(result){
+      if(result.keep){
         N.df$result <- N.env$result.list
       }
       N.stats <- data.table(

diff --git a/man/atime.Rd b/man/atime.Rd
@@ -16,10 +16,14 @@ result=FALSE, N.env.parent=NULL, ...)}
   \item{seconds.limit}{if the median timing of any expression exceeds
     this many seconds, then no timings for larger N are computed.}
   \item{verbose}{logical, print messages after every data size?}
-  \item{result}{logical, save each result? If \code{TRUE}, and result is
-  a data frame with one row, then the numeric column names will be
-  saved as more units to analyze (in addition to kilobytes and
-  seconds).}
+  \item{result}{
+    logical: save the result of evaluating each expression?
+    Or a function to compute a result, given the value obtained after
+    evaluating each expression.
+    If each result is a data frame with one row, then the numeric column
+    names will be saved as more units to analyze (in addition to kilobytes
+    and seconds).
+  }
   \item{N.env.parent}{environment to use as parent of environment
   created for each data size N, or NULL to use default parent env.}
   \item{\dots}{named expressions to time.}
@@ -30,6 +34,9 @@ result=FALSE, N.env.parent=NULL, ...)}
   convenience, expressions may be specified either via code (\dots) or
   data (\code{expr.list} arg).}
 
+\seealso{\code{\link{atime_grid}} for avoiding repetition when measuring
+  asymptotic properties of several similar expressions.}
+
 \value{list of class atime with elements \code{unit.col.vec} (character
   vector of column names to analyze), \code{seconds.limit} (numeric
   input param), \code{measurements} (data table of results).}
@@ -38,10 +45,10 @@ result=FALSE, N.env.parent=NULL, ...)}
 
 \examples{
 
-## Example 1: polynomial and exponential time string functions.
+## Polynomial and exponential time string functions.
 atime_result_string <- atime::atime(
   seconds.limit=0.001,
-  N=unique(as.integer(10^seq(0,3.5,l=100))),
+  N=unique(as.integer(10^seq(0,3,l=100))),
   setup={
     subject <- paste(rep("a", N), collapse="")
     pattern <- paste(rep(c("a?", "a"), each=N), collapse="")
@@ -53,26 +60,4 @@ atime_result_string <- atime::atime(
   linear.replacement=gsub("a",linear_size_replacement,subject))
 plot(atime_result_string)
 
-## Example 2: combine using rbind inside or outside for loop.
-atime_result_rbind <- atime::atime(
-  seconds.limit=0.001,
-  setup={
-    DF <- data.frame(i=1:100)
-  },
-  inside={
-    big.frame <- data.frame()
-    for(table.i in 1:N){
-      big.frame <- rbind(big.frame, DF)
-    }
-  },
-  outside={
-    big.frame.list <- list()
-    for(table.i in 1:N){
-      big.frame.list[[table.i]] <- DF
-    }
-    big.frame <- do.call(rbind, big.frame.list)
-  }
-)
-plot(atime_result_rbind)
-
 }
diff --git a/man/atime_grid.Rd b/man/atime_grid.Rd
@@ -59,7 +59,8 @@ list(m=quote(mean(data)), s=quote(sum(data)))
   expr.param.sep="\n",
   regexpr=regexpr(pattern, subject, perl=PERL)))
 atime.list <- atime::atime(
-  N=unique(as.integer(10^seq(0,3.5,l=20))),
+  seconds.limit=0.001,
+  N=unique(as.integer(10^seq(0,2,l=30))),
   setup={
     subject <- paste(rep("a", N), collapse="")
     pattern <- paste(rep(c("a?", "a"), each=N), collapse="")
@@ -77,6 +78,7 @@ sapply(sub.param.list$FUN,function(name)substitute(fun("a","",subject), list(fun
   replace=FUN("a","",subject,perl=PERL),
   symbol.params="FUN"))
 sub.atime.list <- atime::atime(
+  seconds.limit=0.001,
   setup={
     subject <- paste(rep("a",N),collapse="")
   }, 
@@ -88,8 +90,9 @@ sub.atime.edited <- sub.atime.list
 library(data.table)
 sub.atime.edited$measurements <- data.table(sub.atime.list$measurements)[
 , expr.name := paste0("PERL=",PERL)]
-library(ggplot2)
-plot(sub.atime.edited)+
-  facet_grid(unit ~ FUN, labeller=label_both)
+if(require(ggplot2)){
+  plot(sub.atime.edited)+
+    facet_grid(unit ~ FUN, labeller=label_both)
+}
 
 }
diff --git a/man/references_best.Rd b/man/references_best.Rd
@@ -25,7 +25,7 @@ references_best(L, fun.list=NULL)
 
 \examples{
 
-## Example 1: polynomial and exponential time string functions.
+## Polynomial and exponential time string functions.
 atime_result_string <- atime::atime(
   seconds.limit=0.001,
   N=unique(as.integer(10^seq(0,4,l=100))),
@@ -49,31 +49,4 @@ plot(refs_best_string)
 (pred_string <- predict(refs_best_string))
 plot(pred_string)
 
-## Example 2: combine using rbind inside or outside for loop.
-atime_result_rbind <- atime::atime(
-  seconds.limit=0.001,
-  setup={
-    DF <- data.frame(i=1:100)
-  },
-  inside={
-    big.frame <- data.frame()
-    for(table.i in 1:N){
-      big.frame <- rbind(big.frame, DF)
-    }
-  },
-  outside={
-    big.frame.list <- list()
-    for(table.i in 1:N){
-      big.frame.list[[table.i]] <- DF
-    }
-    big.frame <- do.call(rbind, big.frame.list)
-  }
-)
-(refs_best_rbind <- atime::references_best(atime_result_rbind))
-plot(refs_best_rbind)
-refs_best_rbind$plot.references <- refs_best_rbind$ref[c("N","N^2"),on="fun.name"]
-plot(refs_best_rbind)
-(pred_rbind <- predict(refs_best_rbind))
-plot(pred_rbind)
-
 }
diff --git a/tests/testthat/test-CRAN.R b/tests/testthat/test-CRAN.R
@@ -398,7 +398,7 @@ if(requireNamespace("ggplot2"))test_that("references for non-NA unit, with NA un
   expect_identical(sort(names(rtab)), c("linear","quadratic"))
 })
 
-test_that("references for non-NA unit, with NA unit",{
+test_that("error for result data frames with different column names",{
   expect_error({
     atime::atime(
       missing=data.frame(my_unit=NA),
@@ -407,7 +407,7 @@ test_that("references for non-NA unit, with NA unit",{
       quadratic=data.frame(my_unit=N^2),
       seconds.limit=0.001,
       result=TRUE)
-  }, "results are all 1 row data frames, but some have different names (missing, constant); please fix by making row names of results identical", fixed=TRUE)
+  }, "results are all 1 row data frames, but some have different names (missing, constant); please fix by making column names of results identical", fixed=TRUE)
 })
 
 test_that("error for new unit name conflicting with existing", {
@@ -438,8 +438,7 @@ test_that("atime_test outputs historical versions", {
   expect_identical(names(atest), c("setup", "expr", "Slow", "Fast"))
 })
 
-test_that("atime_grid parameters attribute", {
-  library(Matrix)
+if(require(Matrix))test_that("atime_grid parameters attribute", {
   param.list <- list(
     non_zeros=c("N","N^2/10"),
     fun=c("matrix","Matrix")
@@ -486,3 +485,23 @@ test_that("atime_grid parameters attribute", {
   expect_in(expected.names, names(mult.pred$measurements))
   expect_in(expected.names, names(mult.pred$prediction))
 })
+
+if(require(Matrix))test_that("result=fun works", {
+  pred.len <- 100
+  sqrt.len <- sqrt(pred.len)
+  vec.mat.result <- atime::atime(
+    N=2^seq(1,ceiling(log2(pred.len))),
+    vector=numeric(N),
+    matrix=matrix(0, N, N),
+    Matrix=Matrix(0, N, N),
+    result=function(x)data.frame(length=length(x)))
+  expect_is(vec.mat.result$measurements$length, "integer")
+  vec.mat.refs <- atime::references_best(vec.mat.result)
+  vec.mat.pred <- predict(vec.mat.refs, length=pred.len)
+  ## precise estimation of length:
+  expect_equal(vec.mat.pred$prediction, data.table(
+    unit="length",
+    expr.name=c("vector","matrix","Matrix"),
+    unit.value=pred.len,
+    N=c(pred.len,sqrt.len,sqrt.len)))
+})