Merge pull request #2 from biometryhub/feature/unit-test

Feature/unit test
biometryhub · Mar 5, 2024 · 79e43b5 · 79e43b5
2 parents e05fd67 + c93d77a
commit 79e43b5
Show file tree

Hide file tree

Showing 10 changed files with 136 additions and 28 deletions.
diff --git a/R/One_Sample.R b/R/One_Sample.R
@@ -29,35 +29,35 @@ OneSample <- function(data, set_size, method = c("JPS", "RSS"), confidence = 0.9
     # If model is 0, it's design based inference, if model = 1, it is model based inference using super population model
     # pop_size: nrow(data)*set_size <= pop_size, > 0, only relevant if replace = FALSE
 
-    if(set_size < 1 | is.na(set_size) | is.null(set_size) | !is.numeric(set_size)) {
+    if (set_size < 1 || is.na(set_size) || is.null(set_size) || !is.numeric(set_size)) {
         stop("set_size must be a positive numeric value")
     }
 
-    if(!isTRUE(replace) & !isFALSE(replace)) {
+    if (!isTRUE(replace) && !isFALSE(replace)) {
         stop("replace must be TRUE or FALSE")
     }
 
     method <- match.arg(toupper(method), c("JPS", "RSS"))
 
-    if(confidence > 1 | confidence < 0 | !is.numeric(confidence)) {
+    if (confidence > 1 || confidence < 0 || !is.numeric(confidence)) {
         stop("confidence must take a numeric value between 0 and 1, indicating the confidence level")
     }
 
-    if(!model %in% c(1, 0)) {
+    if (!model %in% c(1, 0)) {
         stop("model must be 0 for design based inference or 1 for super-population model")
     }
 
     alpha <- 1 - confidence
 
     if (!replace) {
-        if(missing(pop_size) | is.null(pop_size) | !is.numeric(pop_size)) {
+        if (missing(pop_size) || is.null(pop_size) || !is.numeric(pop_size)) {
             stop("A numeric population size pop_size must be provided when sampling without replacement")
         }
-        else if(pop_size <= nrow(data)*set_size | pop_size <= 0) {
+        else if(pop_size <= nrow(data)*set_size || pop_size <= 0) {
             stop("pop_size must be positive and less than data x set_size")
         }
     }
-    if (model == 1 & missing(pop_size)) {
+    if (model == 1 && missing(pop_size)) {
         stop("The population size pop_size must be provided for super-population model")
     }
 
@@ -74,11 +74,11 @@ OneSample <- function(data, set_size, method = c("JPS", "RSS"), confidence = 0.9
     ### Ranked set sample ###########################################
     #################################################################
 
-    else if(method == "RSS") {
+    else if (method == "RSS") {
         RV <- data[, 2]
         GSV <- aggregate(RV, list(RV), length)$x
 
-        if (length(GSV) != set_size | min(GSV) <= 1) {
+        if (length(GSV) != set_size || min(GSV) <= 1) {
             stop("In ranked set sample, first ranking method should have at least two observations in any judgment ranking group")
         }
 

diff --git a/R/RSSDF.R b/R/RSSDF.R
@@ -1,29 +1,31 @@
 ############################## 3
-# This function genrates RSS sample with K ranking methods with replacemet
+# This function generates RSS sample with K ranking methods with replacement
 # Pop: has two variables popY, variable of interest
 #  popAux: Auxiliary variable
 # We assume pop and popAux are correlated
 # n: sample size n=Hd, H: set size, d: cycle size
 RSSDF <- function(pop, n, H, K) {
-  d <- n / H
+  verify_rss_params(pop, n, H, K)
+
+  n_cycles <- n / H
   popY <- pop[, 1]
   popAux <- pop[, 2]
   N <- length(popY)
   RSSM <- matrix(0, ncol = (K + 1), nrow = n)
   ic <- 1
-  for (j in (1:d)) {
+  for (j in (1:n_cycles)) {
     for (h in (1:H)) {
-      setid <- sample(1:N, H)
-      setY <- popY[setid]
-      setX <- popAux[setid]
-      orAux <- order(setX)
-      orY <- setY[orAux]
-      osetX <- setX[orAux]
-      setidO <- setid[orAux]
+      sampled_id <- sample(1:N, H)
+      setY <- popY[sampled_id]
+      setX <- popAux[sampled_id]
+      auxiliary_order <- order(setX)
+      ordered_setY <- setY[auxiliary_order]
+      ordered_setX <- setX[auxiliary_order]
+      ordered_sample_id <- sampled_id[auxiliary_order]
       # oset=DELLF(set,tauV[1])
-      RSSM[ic, c(1, 2)] <- c(orY[h], h)
-      k1obs <- osetX[h]
-      redAux <- popAux[-setidO[h]]
+      RSSM[ic, c(1, 2)] <- c(ordered_setY[h], h)
+      k1obs <- ordered_setX[h]
+      redAux <- popAux[-ordered_sample_id[h]]
       if (K > 1) {
         for (k in (2:K)) {
           kset <- c(k1obs, sample(redAux, (H - 1)))

diff --git a/R/RSSEF.R b/R/RSSEF.R
@@ -1,7 +1,7 @@
 ###########################################
 #  This function provides estimator for RSS data
 #  RSSK: n by (K+1) dimensional data matrix, the first column is Y-values,
-#  the next K coulumns are the ranks of K-ranking methods
+#  the next K columns are the ranks of K-ranking methods
 #  set_size: set Size
 # N: population size
 # model: if Modle=0 design based inference, if model=1, superpopulation model
@@ -21,7 +21,8 @@
 #' @keywords internal
 #'
 RSSEF <- function(data, set_size, replace, model, N, alpha) {
-  RM <- data[, -1]
+  # unused variable
+  # RM <- data[, -1]
   RV <- data[, 2] # We need to be careful about this.
   Y <- data[, 1]  # We need to be careful about this. Need to ensure response is in col 1.
   n <- nrow(data)
@@ -86,7 +87,7 @@ RSSEF <- function(data, set_size, replace, model, N, alpha) {
   Jack.Repl.AWi <- apply(matrix(1:n, ncol = 1), 1, FWDel1, AWY = AWY) # Aggrement weight estimator
   # when the i-th obseervation is deleted
   if (replace) fc <- 1 else fc <- 1 - n / (N - 1)
-  J.var <- fc * (n - 1) * var(Jack.Repl.AWi) * ((n - 1) / n)^2 # Jackknife variance estiamte  for aggreement weight JPS estimator
+  J.var <- fc * (n - 1) * var(Jack.Repl.AWi) * ((n - 1) / n)^2 # Jackknife variance estimate  for aggreement weight JPS estimator
   ##############################################################
 
 

diff --git a/R/RSSNRF.R b/R/RSSNRF.R
@@ -5,15 +5,18 @@
 # We assume pop and popAux are correlated
 # n: sample size n=Hd, H: set size, d: cycle size
 RSSNRF <- function(pop, n, H, K) {
-  d <- n / H
+  verify_rss_params(pop, n, H, K)
+
+  n_cycles <- n / H
   K1 <- K + 1
-  rseq <- rep((1:H), times = d)
+  rseq <- rep((1:H), times = n_cycles)
   popY <- pop[, 1]
   N <- length(popY)
   popAux <- pop[, 2]
   popind <- 1:N
   RSSM <- matrix(0, ncol = (K1), nrow = n)
-  ic <- 1
+  # unused variable
+  # ic <- 1
   ind <- sample(popind, n * H)
   setY <- matrix(popY[ind], ncol = H, nrow = n)
   setX <- matrix(popAux[ind], ncol = H, nrow = n)

diff --git a/R/utils.R b/R/utils.R
@@ -0,0 +1,42 @@
+is_positive_wholenumber <- function(x, tol = .Machine$double.eps^0.5) {
+  is_wholenumber(x, tol) && x > 0
+}
+
+is_wholenumber <- function(x, tol = .Machine$double.eps^0.5) {
+  abs(x - round(x)) < tol
+}
+
+verify_rss_params <- function(pop, n, H, K) {
+  pop_dimension <- dim(pop)
+  if (length(pop_dimension) != 2) {
+    stop("`pop` must be a 2-dimension matrix-like object.")
+  }
+
+  if (pop_dimension[[2]] < 2) {
+    stop("`pop` must have at least 2 columns.")
+  }
+
+  if (!is_positive_wholenumber(n)) {
+    stop("`n` must be a positive whole number.")
+  }
+
+  if (!is_positive_wholenumber(H)) {
+    stop("`H` must be a positive whole number.")
+  }
+
+  if (!is_positive_wholenumber(K)) {
+    stop("`K` must be a positive whole number.")
+  }
+
+  if (pop_dimension[[1]] < n) {
+    stop("`pop` must have at least `n` rows.")
+  }
+
+  if (n < H) {
+    stop("`n` must >= `H`.")
+  }
+
+  if (n %% H != 0) {
+    stop("`n` must be a multiple of `H`.")
+  }
+}
diff --git a/data/Coombe2019.rda → data/coombe2019.rda b/data/Coombe2019.rda → data/coombe2019.rda
diff --git a/man/Coombe2019.Rd → man/coombe2019.Rd b/man/Coombe2019.Rd → man/coombe2019.Rd
diff --git a/tests/population.rda b/tests/population.rda
diff --git a/tests/testthat/test-RSSDF.R b/tests/testthat/test-RSSDF.R
@@ -0,0 +1,34 @@
+test_that("RSSDF has a correct output.", {
+  skip_if(getRversion() < 3.4)
+  # TODO: create a matrix to not rely on `population.rda`
+  load("../population.rda")
+
+  rss_matrix <- RSSDF(population, 100, 10, 2)
+  expect_equal(dim(rss_matrix), c(100, 3))
+  expect_equal(sort(unique(rss_matrix[, 2])), 1:10)
+
+  sample_counts_in_sets <- table(rss_matrix[, 2])
+  expect_equal(sample_counts_in_sets[[2]], 10)
+  expect_equal(table(sample_counts_in_sets)[[1]], 10)
+
+  # # # in case we are supporting `n %% H > 0`
+  # rss_matrix_with_dropped_sample <- RSSDF(population, 100, 11, 2)
+  # expect_equal(sort(unique(rss_matrix_with_dropped_sample[, 2])), 0:11)
+  #
+  # sample_counts_in_sets <- table(rss_matrix_with_dropped_sample[, 2])
+  # expect_equal(sample_counts_in_sets[[2]], 9)
+  # expect_equal(table(sample_counts_in_sets)[[2]], 11)
+})
+
+test_that("Inputs are valid.", {
+  matrix_ <- matrix(1:10, nrow = 5, ncol = 2)
+
+  expect_error(RSSDF(1:10, -100, 10, 1), "`pop` must be a 2-dimension matrix-like object.")
+  expect_error(RSSDF(matrix(1:10), -100, 10, 1), "`pop` must have at least 2 columns.")
+  expect_error(RSSDF(matrix_, -100, 10, 1), "`n` must be a positive whole number.")
+  expect_error(RSSDF(matrix_, 100, -10, 1), "`H` must be a positive whole number.")
+  expect_error(RSSDF(matrix_, 100, 10, -1), "`K` must be a positive whole number.")
+  expect_error(RSSDF(matrix_, 100, 10, 1), "`pop` must have at least `n` rows.")
+  expect_error(RSSDF(matrix_, 5, 11, 1), "`n` must >= `H`.")
+  expect_error(RSSDF(matrix_, 5, 4, 1), "`n` must be a multiple of `H`.")
+})
diff --git a/tests/testthat/test-RSSNRF.R b/tests/testthat/test-RSSNRF.R
@@ -0,0 +1,26 @@
+test_that("RSSDF has a correct output.", {
+  skip_if(getRversion() < 3.4)
+  # TODO: create a matrix to not rely on `population.rda`
+  load("../population.rda")
+
+  rss_matrix <- RSSNRF(population, 100, 10, 2)
+  expect_equal(dim(rss_matrix), c(100, 3))
+  expect_equal(sort(unique(rss_matrix[, 2])), 1:10)
+
+  sample_counts_in_sets <- table(rss_matrix[, 2])
+  expect_equal(sample_counts_in_sets[[2]], 10)
+  expect_equal(table(sample_counts_in_sets)[[1]], 10)
+})
+
+test_that("Inputs are valid.", {
+  matrix_ <- matrix(1:10, nrow = 5, ncol = 2)
+
+  expect_error(RSSNRF(1:10, -100, 10, 1), "`pop` must be a 2-dimension matrix-like object.")
+  expect_error(RSSNRF(matrix(1:10), -100, 10, 1), "`pop` must have at least 2 columns.")
+  expect_error(RSSNRF(matrix_, -100, 10, 1), "`n` must be a positive whole number.")
+  expect_error(RSSNRF(matrix_, 100, -10, 1), "`H` must be a positive whole number.")
+  expect_error(RSSNRF(matrix_, 100, 10, -1), "`K` must be a positive whole number.")
+  expect_error(RSSNRF(matrix_, 100, 10, 1), "`pop` must have at least `n` rows.")
+  expect_error(RSSNRF(matrix_, 5, 11, 1), "`n` must >= `H`.")
+  expect_error(RSSNRF(matrix_, 5, 4, 1), "`n` must be a multiple of `H`.")
+})