diff --git a/R/One_Sample.R b/R/One_Sample.R index 4a60017..a29f183 100644 --- a/R/One_Sample.R +++ b/R/One_Sample.R @@ -29,35 +29,35 @@ OneSample <- function(data, set_size, method = c("JPS", "RSS"), confidence = 0.9 # If model is 0, it's design based inference, if model = 1, it is model based inference using super population model # pop_size: nrow(data)*set_size <= pop_size, > 0, only relevant if replace = FALSE - if(set_size < 1 | is.na(set_size) | is.null(set_size) | !is.numeric(set_size)) { + if (set_size < 1 || is.na(set_size) || is.null(set_size) || !is.numeric(set_size)) { stop("set_size must be a positive numeric value") } - if(!isTRUE(replace) & !isFALSE(replace)) { + if (!isTRUE(replace) && !isFALSE(replace)) { stop("replace must be TRUE or FALSE") } method <- match.arg(toupper(method), c("JPS", "RSS")) - if(confidence > 1 | confidence < 0 | !is.numeric(confidence)) { + if (confidence > 1 || confidence < 0 || !is.numeric(confidence)) { stop("confidence must take a numeric value between 0 and 1, indicating the confidence level") } - if(!model %in% c(1, 0)) { + if (!model %in% c(1, 0)) { stop("model must be 0 for design based inference or 1 for super-population model") } alpha <- 1 - confidence if (!replace) { - if(missing(pop_size) | is.null(pop_size) | !is.numeric(pop_size)) { + if (missing(pop_size) || is.null(pop_size) || !is.numeric(pop_size)) { stop("A numeric population size pop_size must be provided when sampling without replacement") } - else if(pop_size <= nrow(data)*set_size | pop_size <= 0) { + else if(pop_size <= nrow(data)*set_size || pop_size <= 0) { stop("pop_size must be positive and less than data x set_size") } } - if (model == 1 & missing(pop_size)) { + if (model == 1 && missing(pop_size)) { stop("The population size pop_size must be provided for super-population model") } @@ -74,11 +74,11 @@ OneSample <- function(data, set_size, method = c("JPS", "RSS"), confidence = 0.9 ### Ranked set sample ########################################### ################################################################# - else if(method == "RSS") { + else if (method == "RSS") { RV <- data[, 2] GSV <- aggregate(RV, list(RV), length)$x - if (length(GSV) != set_size | min(GSV) <= 1) { + if (length(GSV) != set_size || min(GSV) <= 1) { stop("In ranked set sample, first ranking method should have at least two observations in any judgment ranking group") } diff --git a/R/RSSDF.R b/R/RSSDF.R index e91135f..2dfd0a8 100644 --- a/R/RSSDF.R +++ b/R/RSSDF.R @@ -1,29 +1,31 @@ ############################## 3 -# This function genrates RSS sample with K ranking methods with replacemet +# This function generates RSS sample with K ranking methods with replacement # Pop: has two variables popY, variable of interest # popAux: Auxiliary variable # We assume pop and popAux are correlated # n: sample size n=Hd, H: set size, d: cycle size RSSDF <- function(pop, n, H, K) { - d <- n / H + verify_rss_params(pop, n, H, K) + + n_cycles <- n / H popY <- pop[, 1] popAux <- pop[, 2] N <- length(popY) RSSM <- matrix(0, ncol = (K + 1), nrow = n) ic <- 1 - for (j in (1:d)) { + for (j in (1:n_cycles)) { for (h in (1:H)) { - setid <- sample(1:N, H) - setY <- popY[setid] - setX <- popAux[setid] - orAux <- order(setX) - orY <- setY[orAux] - osetX <- setX[orAux] - setidO <- setid[orAux] + sampled_id <- sample(1:N, H) + setY <- popY[sampled_id] + setX <- popAux[sampled_id] + auxiliary_order <- order(setX) + ordered_setY <- setY[auxiliary_order] + ordered_setX <- setX[auxiliary_order] + ordered_sample_id <- sampled_id[auxiliary_order] # oset=DELLF(set,tauV[1]) - RSSM[ic, c(1, 2)] <- c(orY[h], h) - k1obs <- osetX[h] - redAux <- popAux[-setidO[h]] + RSSM[ic, c(1, 2)] <- c(ordered_setY[h], h) + k1obs <- ordered_setX[h] + redAux <- popAux[-ordered_sample_id[h]] if (K > 1) { for (k in (2:K)) { kset <- c(k1obs, sample(redAux, (H - 1))) diff --git a/R/RSSEF.R b/R/RSSEF.R index 0d33ecb..f0daed5 100644 --- a/R/RSSEF.R +++ b/R/RSSEF.R @@ -1,7 +1,7 @@ ########################################### # This function provides estimator for RSS data # RSSK: n by (K+1) dimensional data matrix, the first column is Y-values, -# the next K coulumns are the ranks of K-ranking methods +# the next K columns are the ranks of K-ranking methods # set_size: set Size # N: population size # model: if Modle=0 design based inference, if model=1, superpopulation model @@ -21,7 +21,8 @@ #' @keywords internal #' RSSEF <- function(data, set_size, replace, model, N, alpha) { - RM <- data[, -1] + # unused variable + # RM <- data[, -1] RV <- data[, 2] # We need to be careful about this. Y <- data[, 1] # We need to be careful about this. Need to ensure response is in col 1. n <- nrow(data) @@ -86,7 +87,7 @@ RSSEF <- function(data, set_size, replace, model, N, alpha) { Jack.Repl.AWi <- apply(matrix(1:n, ncol = 1), 1, FWDel1, AWY = AWY) # Aggrement weight estimator # when the i-th obseervation is deleted if (replace) fc <- 1 else fc <- 1 - n / (N - 1) - J.var <- fc * (n - 1) * var(Jack.Repl.AWi) * ((n - 1) / n)^2 # Jackknife variance estiamte for aggreement weight JPS estimator + J.var <- fc * (n - 1) * var(Jack.Repl.AWi) * ((n - 1) / n)^2 # Jackknife variance estimate for aggreement weight JPS estimator ############################################################## diff --git a/R/RSSNRF.R b/R/RSSNRF.R index a388c3a..220925e 100644 --- a/R/RSSNRF.R +++ b/R/RSSNRF.R @@ -5,15 +5,18 @@ # We assume pop and popAux are correlated # n: sample size n=Hd, H: set size, d: cycle size RSSNRF <- function(pop, n, H, K) { - d <- n / H + verify_rss_params(pop, n, H, K) + + n_cycles <- n / H K1 <- K + 1 - rseq <- rep((1:H), times = d) + rseq <- rep((1:H), times = n_cycles) popY <- pop[, 1] N <- length(popY) popAux <- pop[, 2] popind <- 1:N RSSM <- matrix(0, ncol = (K1), nrow = n) - ic <- 1 + # unused variable + # ic <- 1 ind <- sample(popind, n * H) setY <- matrix(popY[ind], ncol = H, nrow = n) setX <- matrix(popAux[ind], ncol = H, nrow = n) diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..25b13ac --- /dev/null +++ b/R/utils.R @@ -0,0 +1,42 @@ +is_positive_wholenumber <- function(x, tol = .Machine$double.eps^0.5) { + is_wholenumber(x, tol) && x > 0 +} + +is_wholenumber <- function(x, tol = .Machine$double.eps^0.5) { + abs(x - round(x)) < tol +} + +verify_rss_params <- function(pop, n, H, K) { + pop_dimension <- dim(pop) + if (length(pop_dimension) != 2) { + stop("`pop` must be a 2-dimension matrix-like object.") + } + + if (pop_dimension[[2]] < 2) { + stop("`pop` must have at least 2 columns.") + } + + if (!is_positive_wholenumber(n)) { + stop("`n` must be a positive whole number.") + } + + if (!is_positive_wholenumber(H)) { + stop("`H` must be a positive whole number.") + } + + if (!is_positive_wholenumber(K)) { + stop("`K` must be a positive whole number.") + } + + if (pop_dimension[[1]] < n) { + stop("`pop` must have at least `n` rows.") + } + + if (n < H) { + stop("`n` must >= `H`.") + } + + if (n %% H != 0) { + stop("`n` must be a multiple of `H`.") + } +} diff --git a/data/Coombe2019.rda b/data/coombe2019.rda similarity index 100% rename from data/Coombe2019.rda rename to data/coombe2019.rda diff --git a/man/Coombe2019.Rd b/man/coombe2019.Rd similarity index 100% rename from man/Coombe2019.Rd rename to man/coombe2019.Rd diff --git a/tests/population.rda b/tests/population.rda new file mode 100644 index 0000000..05c1187 Binary files /dev/null and b/tests/population.rda differ diff --git a/tests/testthat/test-RSSDF.R b/tests/testthat/test-RSSDF.R new file mode 100644 index 0000000..3a329ae --- /dev/null +++ b/tests/testthat/test-RSSDF.R @@ -0,0 +1,34 @@ +test_that("RSSDF has a correct output.", { + skip_if(getRversion() < 3.4) + # TODO: create a matrix to not rely on `population.rda` + load("../population.rda") + + rss_matrix <- RSSDF(population, 100, 10, 2) + expect_equal(dim(rss_matrix), c(100, 3)) + expect_equal(sort(unique(rss_matrix[, 2])), 1:10) + + sample_counts_in_sets <- table(rss_matrix[, 2]) + expect_equal(sample_counts_in_sets[[2]], 10) + expect_equal(table(sample_counts_in_sets)[[1]], 10) + + # # # in case we are supporting `n %% H > 0` + # rss_matrix_with_dropped_sample <- RSSDF(population, 100, 11, 2) + # expect_equal(sort(unique(rss_matrix_with_dropped_sample[, 2])), 0:11) + # + # sample_counts_in_sets <- table(rss_matrix_with_dropped_sample[, 2]) + # expect_equal(sample_counts_in_sets[[2]], 9) + # expect_equal(table(sample_counts_in_sets)[[2]], 11) +}) + +test_that("Inputs are valid.", { + matrix_ <- matrix(1:10, nrow = 5, ncol = 2) + + expect_error(RSSDF(1:10, -100, 10, 1), "`pop` must be a 2-dimension matrix-like object.") + expect_error(RSSDF(matrix(1:10), -100, 10, 1), "`pop` must have at least 2 columns.") + expect_error(RSSDF(matrix_, -100, 10, 1), "`n` must be a positive whole number.") + expect_error(RSSDF(matrix_, 100, -10, 1), "`H` must be a positive whole number.") + expect_error(RSSDF(matrix_, 100, 10, -1), "`K` must be a positive whole number.") + expect_error(RSSDF(matrix_, 100, 10, 1), "`pop` must have at least `n` rows.") + expect_error(RSSDF(matrix_, 5, 11, 1), "`n` must >= `H`.") + expect_error(RSSDF(matrix_, 5, 4, 1), "`n` must be a multiple of `H`.") +}) diff --git a/tests/testthat/test-RSSNRF.R b/tests/testthat/test-RSSNRF.R new file mode 100644 index 0000000..fd0382a --- /dev/null +++ b/tests/testthat/test-RSSNRF.R @@ -0,0 +1,26 @@ +test_that("RSSDF has a correct output.", { + skip_if(getRversion() < 3.4) + # TODO: create a matrix to not rely on `population.rda` + load("../population.rda") + + rss_matrix <- RSSNRF(population, 100, 10, 2) + expect_equal(dim(rss_matrix), c(100, 3)) + expect_equal(sort(unique(rss_matrix[, 2])), 1:10) + + sample_counts_in_sets <- table(rss_matrix[, 2]) + expect_equal(sample_counts_in_sets[[2]], 10) + expect_equal(table(sample_counts_in_sets)[[1]], 10) +}) + +test_that("Inputs are valid.", { + matrix_ <- matrix(1:10, nrow = 5, ncol = 2) + + expect_error(RSSNRF(1:10, -100, 10, 1), "`pop` must be a 2-dimension matrix-like object.") + expect_error(RSSNRF(matrix(1:10), -100, 10, 1), "`pop` must have at least 2 columns.") + expect_error(RSSNRF(matrix_, -100, 10, 1), "`n` must be a positive whole number.") + expect_error(RSSNRF(matrix_, 100, -10, 1), "`H` must be a positive whole number.") + expect_error(RSSNRF(matrix_, 100, 10, -1), "`K` must be a positive whole number.") + expect_error(RSSNRF(matrix_, 100, 10, 1), "`pop` must have at least `n` rows.") + expect_error(RSSNRF(matrix_, 5, 11, 1), "`n` must >= `H`.") + expect_error(RSSNRF(matrix_, 5, 4, 1), "`n` must be a multiple of `H`.") +})