From ccf713db3b1b92821ae365824e83b7f630f8ebb9 Mon Sep 17 00:00:00 2001 From: wasin pipattungsakul Date: Thu, 16 May 2024 15:48:06 +0930 Subject: [PATCH 01/13] refactor --- R/JPSEDF.R | 12 ++++++------ R/JPSEF.R | 29 +++++++++++++++-------------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/R/JPSEDF.R b/R/JPSEDF.R index 85eabeb..683dcc8 100644 --- a/R/JPSEDF.R +++ b/R/JPSEDF.R @@ -7,15 +7,15 @@ #' @param coef Coefficients used in variance computation when sample size is n. #' @param coef_del Coefficients used in variance computation when the i-th unit is deleted. #' @param replace Logical. Sample with replacement? -#' @param model An inference mode: -#' - `0`: design based inference -#' - `1`: model based inference using super population model +#' @param model_based An inference mode: +#' - `FALSE`: design based inference +#' - `TRUE`: model based inference using super population model #' @param K Number of rankers. #' #' @return #' @keywords internal #' -JPSEDF <- function(ranks, Y, set_size, N, coef, coef_del, replace, model, K) { +JPSEDF <- function(ranks, Y, set_size, N, coef, coef_del, replace, model_based, K) { y_ij <- expand.grid(Y, Y) # count ranks including zeros @@ -61,7 +61,7 @@ JPSEDF <- function(ranks, Y, set_size, N, coef, coef_del, replace, model, K) { estimated_variance <- coef[2] * t1s / (set_size - 1) + coef[3] * t2s } - if (model == 1) { + if (model_based) { estimated_variance <- ((t1s + t2s) / set_size^2 * ((-1 / N) + coef[2] * set_size^2 / (set_size - 1)) + t2s * ((coef[3] + coef[2]) - coef[2] * set_size / (set_size - 1))) @@ -117,7 +117,7 @@ JPSEDF <- function(ranks, Y, set_size, N, coef, coef_del, replace, model, K) { } else { VEST.Del <- coef_del[2] * T1v.Del / (set_size - 1) + coef_del[3] * T2v.Del } - if (model == 1) { + if (model_based) { VEST.Del <- (T1v.Del + T2v.Del) / set_size^2 * ((-1 / N) + coef_del[2] * set_size^2 / (set_size - 1)) + T2v.Del * ((coef_del[3] + coef_del[2]) - coef_del[2] * set_size / (set_size - 1)) # if(VEST.Del <= 0 ) VEST.Del=T2v.del*((coef_del[3]+coef_del[2])+coef_del[2]*set_size/(set_size-1)) VEST.Del[VEST.Del <= 0] <- T2v.Del[VEST.Del <= 0] * ((coef_del[3] + coef_del[2]) - coef_del[2] * set_size / (set_size - 1)) diff --git a/R/JPSEF.R b/R/JPSEF.R index e2ac652..27cad48 100644 --- a/R/JPSEF.R +++ b/R/JPSEF.R @@ -3,9 +3,9 @@ #' @param data The data to use for estimation. #' @param set_size Set size for each raking group. #' @param replace Logical (default `TRUE`). Sample with replacement? -#' @param model An inference mode: -#' - `0`: design based inference -#' - `1`: model based inference using super population model +#' @param model_based An inference mode: +#' - `FALSE`: design based inference +#' - `TRUE`: model based inference using super population model #' @param N The population size. #' @param alpha The significance level. #' @@ -14,7 +14,7 @@ #' @return #' @keywords internal #' -JPSEF <- function(data, set_size, replace = TRUE, model, N, alpha) { +JPSEF <- function(data, set_size, replace = TRUE, model_based, N, alpha) { n_rankers <- ncol(data) - 1 # if (is.null(N)) { @@ -51,17 +51,18 @@ JPSEF <- function(data, set_size, replace = TRUE, model, N, alpha) { fc <- 1 # finite population correction factor } - if (model == 1) { + if (model_based == 1) { coefd <- coefn coef_del <- coef_del1 } + y <- data[, 1] if (n_rankers == 1) { - jps_estimates <- JPSEDF(data[, -1], data[, 1], set_size, N, coefd, coef_del, replace, model, n_rankers) + jps_estimates <- JPSEDF(data[, -1], y, set_size, N, coefd, coef_del, replace, model_based, n_rankers) estimators <- c("JPS", "SRS") - means <- round(c(jps_estimates[1], mean(data[, 1])), digits = 3) + means <- round(c(jps_estimates[1], mean(y)), digits = 3) # TODO: check std error calculation of jps variance - std_errors <- round(c(sqrt(jps_estimates[2]), sd(data[, 1]) / sqrt(n)), digits = 3) + std_errors <- round(c(sqrt(jps_estimates[2]), sd(y) / sqrt(n)), digits = 3) lower_bound <- round(means - qt(1 - alpha / 2, n - 1) * std_errors, digits = 3) upper_bound <- round(means + qt(1 - alpha / 2, n - 1) * std_errors, digits = 3) @@ -79,13 +80,13 @@ JPSEF <- function(data, set_size, replace = TRUE, model, N, alpha) { data[, -1], 2, JPSEDF, - Y = data[, 1], + Y = y, set_size = set_size, N = N, coef = coefd, coef_del = coef_del, replace = replace, - model = model, + model_based = model_based, n_rankers ) @@ -118,19 +119,19 @@ JPSEF <- function(data, set_size, replace = TRUE, model, N, alpha) { # agreement weighted agreement_weights <- t(apply(data.frame(data[, -1]), 1, calculate_agreement_weights, set_size = set_size)) aw_sum <- apply(agreement_weights, 2, sum) - cross_product <- data[, 1] %*% agreement_weights + cross_product <- y %*% agreement_weights agreement_mean <- mean(cross_product[aw_sum > 0] / aw_sum[aw_sum > 0]) - awy <- cbind(data[, 1], agreement_weights) + awy <- cbind(y, agreement_weights) jackknife_agreement_mean <- apply(matrix(1:n, ncol = 1), 1, FWDel1, AWY = awy) jackknife_agreement_var <- fc * (n - 1) * var(jackknife_agreement_mean) * ((n - 1) / n)^2 - estimated_means <- c(jps_mean, jps_var_weighted_mean, agreement_mean, mean_best_ranker, mean(data[, 1])) + estimated_means <- c(jps_mean, jps_var_weighted_mean, agreement_mean, mean_best_ranker, mean(y)) estimated_variances <- c( jackknife_variance, jackknife_var_weighted_var, jackknife_agreement_var, variance_best_ranker, - var(data[, 1]) / n + var(y) / n ) min_variance_index <- which(estimated_variances == min(estimated_variances)) From e50912316fd611bbea44dfcf051158a69b8d23f4 Mon Sep 17 00:00:00 2001 From: Wasin Pipattungsakul Date: Thu, 16 May 2024 15:49:22 +0930 Subject: [PATCH 02/13] stash --- R/One_Sample.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/One_Sample.R b/R/One_Sample.R index 3d15f53..2288720 100644 --- a/R/One_Sample.R +++ b/R/One_Sample.R @@ -1,4 +1,4 @@ -#' Calculate the estimate of the rankings +#' Estimate means from RSS or JPS sample. #' #' @param data A data frame of `JPS` or `RSS` rankings. #' @param set_size The set size of the ranks. From 0e8e8d08d93b0577e88135cda12b457faf0d280b Mon Sep 17 00:00:00 2001 From: Wasin Pipattungsakul Date: Thu, 16 May 2024 17:07:40 +0930 Subject: [PATCH 03/13] rename functions --- R/One_Sample.R | 99 -------------------------- R/{JPSEF.R => jps_estimator.R} | 18 ++--- R/{JPSEDF.R => jps_estimator_single.R} | 12 ++-- R/rss_jps_estimate.R | 64 +++++++++++++++++ R/utils.R | 9 +-- example.R | 22 +++--- tests/testthat/test-One_Sample.R | 74 ++++++++----------- 7 files changed, 122 insertions(+), 176 deletions(-) delete mode 100644 R/One_Sample.R rename R/{JPSEF.R => jps_estimator.R} (92%) rename R/{JPSEDF.R => jps_estimator_single.R} (94%) create mode 100644 R/rss_jps_estimate.R diff --git a/R/One_Sample.R b/R/One_Sample.R deleted file mode 100644 index 2288720..0000000 --- a/R/One_Sample.R +++ /dev/null @@ -1,99 +0,0 @@ -#' Estimate means from RSS or JPS sample. -#' -#' @param data A data frame of `JPS` or `RSS` rankings. -#' @param set_size The set size of the ranks. -#' @param method A method used to sample: -#' - `"JPS"`: Judgment-post stratified sampling -#' - `"RSS"`: Ranked set sampling -#' @param confidence The confidence level to use. -#' @param replace Logical (default `TRUE`). Sample with replacement? -#' @param model An inference mode: -#' - `0`: design based inference -#' - `1`: model based inference using super population model -#' @param pop_size The population size. Must be provided if sampling without replacement, or if `model` is `1`. -#' -#' @return A `data.frame` with the point estimates provided by different types of estimators along with standard -#' error and confidence intervals. -#' @export -#' -#' @examples -#' -#' # Compute the JPS estimators -#' -#' JPS.Estimates <- OneSample( -#' data = emergence_ranks, set_size = 4, -#' method = "JPS", confidence = 0.95, -#' replace = TRUE, model = 0, -#' pop_size = nrow(population) -#' ) -#' -OneSample <- function(data, set_size, method, confidence = 0.95, replace = TRUE, model = 0, pop_size = NULL) { - # Choose the type of estimators to output? - # Rename to estimate? Or something else? - # Break confidence interval into two columns? - # pop_size: nrow(data)*set_size <= pop_size, > 0, only relevant if replace = FALSE - # method <- match.arg(toupper(method), c("JPS", "RSS")) - verify_one_sample_params(data, set_size, method, confidence, replace, model, pop_size) - - # if (set_size < 1 || is.na(set_size) || is.null(set_size) || !is.numeric(set_size)) { - # stop("set_size must be a positive numeric value") - # } - # - # if (!isTRUE(replace) && !isFALSE(replace)) { - # stop("replace must be TRUE or FALSE") - # } - # - # - # if (confidence > 1 || confidence < 0 || !is.numeric(confidence)) { - # stop("confidence must take a numeric value between 0 and 1, indicating the confidence level") - # } - # - # if (!model %in% c(1, 0)) { - # stop("model must be 0 for design based inference or 1 for super-population model") - # } - # - # - # if (!replace) { - # if (missing(pop_size) || is.null(pop_size) || !is.numeric(pop_size)) { - # stop("A numeric population size pop_size must be provided when sampling without replacement") - # } else if (pop_size <= nrow(data) * set_size || pop_size <= 0) { - # stop("pop_size must be positive and less than data x set_size") - # } - # } - # if (model == 1 && missing(pop_size)) { - # stop("The population size pop_size must be provided for super-population model") - # } - alpha <- 1 - confidence - - ################################################################# - ### Judgment-post stratified sample ############################# - ################################################################# - - if (method == "JPS") { - results <- JPSEF(data, set_size, replace, model, pop_size, alpha) - } - - ################################################################# - ### Ranked set sample ########################################### - ################################################################# - - else if (method == "RSS") { - RV <- data[, 2] - GSV <- aggregate(RV, list(RV), length)$x - - if (length(GSV) != set_size || min(GSV) <= 1) { - stop("In ranked set sample, first ranking method should have at least two observations in any judgment ranking group") - } - - results <- RSSEF(data, set_size, replace, model, pop_size, alpha) - } - - if (model == 1) { - colnames(results) <- c("Predictor", "Prediction", "Pred. Error", paste0(confidence * 100, "% Prediction intervals")) - } - - return(results) -} - -#' @rdname OneSample -one_sample <- OneSample diff --git a/R/JPSEF.R b/R/jps_estimator.R similarity index 92% rename from R/JPSEF.R rename to R/jps_estimator.R index 27cad48..afa945c 100644 --- a/R/JPSEF.R +++ b/R/jps_estimator.R @@ -14,12 +14,9 @@ #' @return #' @keywords internal #' -JPSEF <- function(data, set_size, replace = TRUE, model_based, N, alpha) { +jps_estimator <- function(data, set_size, replace = TRUE, model_based, N, alpha) { n_rankers <- ncol(data) - 1 - # if (is.null(N)) { - # stop("Population size N must be provided for sampling without replacement") - # } if (!replace && is.null(N)) { stop("Population size N must be provided for sampling without replacement") } @@ -56,9 +53,12 @@ JPSEF <- function(data, set_size, replace = TRUE, model_based, N, alpha) { coef_del <- coef_del1 } + ranks <- data[, -1] y <- data[, 1] if (n_rankers == 1) { - jps_estimates <- JPSEDF(data[, -1], y, set_size, N, coefd, coef_del, replace, model_based, n_rankers) + jps_estimates <- jps_estimator_single( + ranks, y, set_size, N, coefd, coef_del, replace, model_based, n_rankers + ) estimators <- c("JPS", "SRS") means <- round(c(jps_estimates[1], mean(y)), digits = 3) # TODO: check std error calculation of jps variance @@ -77,10 +77,10 @@ JPSEF <- function(data, set_size, replace = TRUE, model_based, N, alpha) { # more than one ranker jps_estimates <- apply( - data[, -1], + ranks, 2, - JPSEDF, - Y = y, + jps_estimator_single, + y = y, set_size = set_size, N = N, coef = coefd, @@ -117,7 +117,7 @@ JPSEF <- function(data, set_size, replace = TRUE, model_based, N, alpha) { jackknife_variance <- fc * (n - 1) * var(jackknife_mean) * ((n - 1) / n)^2 # agreement weighted - agreement_weights <- t(apply(data.frame(data[, -1]), 1, calculate_agreement_weights, set_size = set_size)) + agreement_weights <- t(apply(data.frame(ranks), 1, calculate_agreement_weights, set_size = set_size)) aw_sum <- apply(agreement_weights, 2, sum) cross_product <- y %*% agreement_weights agreement_mean <- mean(cross_product[aw_sum > 0] / aw_sum[aw_sum > 0]) diff --git a/R/JPSEDF.R b/R/jps_estimator_single.R similarity index 94% rename from R/JPSEDF.R rename to R/jps_estimator_single.R index 683dcc8..91d9ca0 100644 --- a/R/JPSEDF.R +++ b/R/jps_estimator_single.R @@ -1,7 +1,7 @@ #' Computes the estimator and variance for each individual ranker #' #' @param ranks Ranks of Y. -#' @param Y Response measurements. +#' @param y Response measurements. #' @param set_size Set size for each raking group. #' @param N Finite population size. #' @param coef Coefficients used in variance computation when sample size is n. @@ -15,8 +15,8 @@ #' @return #' @keywords internal #' -JPSEDF <- function(ranks, Y, set_size, N, coef, coef_del, replace, model_based, K) { - y_ij <- expand.grid(Y, Y) +jps_estimator_single <- function(ranks, y, set_size, N, coef, coef_del, replace, model_based, K) { + y_ij <- expand.grid(y, y) # count ranks including zeros rank_count <- rep(0, set_size) @@ -48,7 +48,7 @@ JPSEDF <- function(ranks, Y, set_size, N, coef, coef_del, replace, model_based, } ############################################################################ - estimated_mean <- mean(aggregate(Y, list(ranks), mean)$x) + estimated_mean <- mean(aggregate(y, list(ranks), mean)$x) t2s <- set_size * tt2 / (2 * n_two_plus_ranks) t1s <- tt1 / (2 * coef[1] * n_non_empty_ranks^2) # VestD0=coef[2]*T1s/(set_size-1)+coef[3]*T2s @@ -76,7 +76,7 @@ JPSEDF <- function(ranks, Y, set_size, N, coef, coef_del, replace, model_based, ################################################################ ######### This part is new for Jackknife replication, delete one observations ######## reduces the computation time - n <- length(Y) + n <- length(y) # index to determine which observation is to be deleted ID <- 1:n # Index of observations to be deleted @@ -94,7 +94,7 @@ JPSEDF <- function(ranks, Y, set_size, N, coef, coef_del, replace, model_based, # This is used in apply function below INDM <- matrix(1:n, ncol = 1) # compile additional variables in list - PASS <- list(Y, ranks, Ind.ij, rank_count, Y.ij2N, R.hhpN, agg_ss, tt2, tt1) + PASS <- list(y, ranks, Ind.ij, rank_count, Y.ij2N, R.hhpN, agg_ss, tt2, tt1) # This computes TT2, TT1, dn, dn-star for each deleted unit "i".deltM=DeltM DeltM <- t(apply(INDM, 1, DELETi, PASS = PASS)) ################################################## diff --git a/R/rss_jps_estimate.R b/R/rss_jps_estimate.R new file mode 100644 index 0000000..ea19331 --- /dev/null +++ b/R/rss_jps_estimate.R @@ -0,0 +1,64 @@ +#' Estimate means from RSS or JPS sample. +#' +#' @param data A data frame of `JPS` or `RSS` rankings. +#' @param set_size The set size of the ranks. +#' @param method A method used to sample: +#' - `"JPS"`: Judgment-post stratified sampling +#' - `"RSS"`: Ranked set sampling +#' @param confidence The confidence level to use. +#' @param replace Logical (default `TRUE`). Sample with replacement? +#' @param model_based An inference mode: +#' - `FALSE`: design based inference +#' - `TRUE`: model based inference using super population model +#' @param pop_size The population size. Must be provided if sampling without replacement, or if `model` is `1`. +#' +#' @return A `data.frame` with the point estimates provided by different types of estimators along with standard +#' error and confidence intervals. +#' @export +#' +#' @examples +#' +#' # Compute the JPS estimators +#' +#' JPS.Estimates <- OneSample( +#' data = emergence_ranks, set_size = 4, +#' method = "JPS", confidence = 0.95, +#' replace = TRUE, model = 0, +#' pop_size = nrow(population) +#' ) +#' +rss_jps_estimate <- function( + data, set_size, method, confidence = 0.95, replace = TRUE, model_based = FALSE, pop_size = NULL) { + # Choose the type of estimators to output? + # Rename to estimate? Or something else? + # Break confidence interval into two columns? + # pop_size: nrow(data)*set_size <= pop_size, > 0, only relevant if replace = FALSE + # method <- match.arg(toupper(method), c("JPS", "RSS")) + verify_rss_jps_estimate_params(data, set_size, method, confidence, replace, model_based, pop_size) + + alpha <- 1 - confidence + + + if (method == "JPS") { + results <- jps_estimator(data, set_size, replace, model_based, pop_size, alpha) + } else if (method == "RSS") { + RV <- data[, 2] + GSV <- aggregate(RV, list(RV), length)$x + + if (length(GSV) != set_size || min(GSV) <= 1) { + stop("In ranked set sample, first ranking method should have at least two observations in any judgment ranking group") + } + + results <- RSSEF(data, set_size, replace, model_based, pop_size, alpha) + } + + if (model_based) { + ci_col <- paste0(confidence * 100, "% Prediction intervals") + colnames(results) <- c("Predictor", "Prediction", "Pred. Error", ci_col) + } + + return(results) +} + +#' @rdname OneSample +one_sample <- rss_jps_estimate diff --git a/R/utils.R b/R/utils.R index 3fee9e2..5400408 100644 --- a/R/utils.R +++ b/R/utils.R @@ -75,17 +75,14 @@ must_be_ <- function(valid_values) { }) } -verify_one_sample_params <- function(data, set_size, method, confidence, replace, model, pop_size) { +verify_rss_jps_estimate_params <- function(data, set_size, method, confidence, replace, model_based, pop_size) { verify_positive_whole_number(set_size) - verify_boolean(replace) + verify_boolean(replace, model_based) verify_between(confidence, lower = 0, upper = 1) valid_methods <- c("JPS", "RSS") verify_must_be(method, valid_values = valid_methods) - valid_models <- c(0, 1) - verify_must_be(model, valid_values = valid_models) - if (!replace) { if (!is.numeric(pop_size)) { stop("A numeric population size `pop_size` must be provided when sampling without replacement") @@ -94,7 +91,7 @@ verify_one_sample_params <- function(data, set_size, method, confidence, replace } } - if (model == 1 && !is.numeric(pop_size)) { + if (model_based && !is.numeric(pop_size)) { stop("The population size `pop_size` must be provided for super-population model") } } diff --git a/example.R b/example.R index fc64811..0eea57d 100644 --- a/example.R +++ b/example.R @@ -1,19 +1,19 @@ N <- 600 # population size -Setsize <- 3 -H <- Setsize # the number of samples to be ranked in each set +set_size <- 3 +H <- set_size # the number of samples to be ranked in each set with_replacement <- FALSE -Model <- 0 # ( if Model=0, Design based inference, if Model=1, super population model is used) +model_based <- FALSE sampling_method <- "JPS" # JPS, RSS sigma <- 4 # population standard deviation mu <- 10 # population mean -K <- 3 # number of rankers +n_rankers <- 3 # number of rankers n <- 30 # sample size alpha <- 0.05 -rhoV <- rep(0.75, K) -tauV <- sigma * sqrt(1 / rhoV^2 - 1) +rhos <- rep(0.75, n_rankers) +taus <- sigma * sqrt(1 / rhos^2 - 1) pop <- qnorm((1:N) / (N + 1), mu, sigma) rho <- 0.75 @@ -22,25 +22,25 @@ X <- pop + tau * rnorm(N, 0, 1) # JPS sampling if (sampling_method == "JPS") { - data <- JPSD2F(pop, n, H, tauV, K, with_replacement) + data <- JPSD2F(pop, n, H, taus, n_rankers, with_replacement) } # Ranked set sampling if (sampling_method == "RSS") { pop <- cbind(pop, X) - data <- RSS(pop, n, H, K, with_replacement) + data <- RSS(pop, n, H, n_rankers, with_replacement) data <- data[order(data[, 2]), ] } -ONE <- OneSample(data, Setsize, sampling_method, 0.80, with_replacement, Model, N) +ONE <- rss_jps_estimate(data, set_size, sampling_method, 0.80, with_replacement, model_based, N) w_or_wo <- " without " if (with_replacement) { w_or_wo <- " with " } -print(paste0(sampling_method, " sample with number of rankers K=", K, w_or_wo, "replacement")) +print(paste0(sampling_method, " sample with number of rankers K=", n_rankers, w_or_wo, "replacement")) -if (Model == 0) { +if (model_based == 0) { print("Design based inference is developed") } else { print("Super-population model is used") diff --git a/tests/testthat/test-One_Sample.R b/tests/testthat/test-One_Sample.R index 136c95c..dde6f86 100644 --- a/tests/testthat/test-One_Sample.R +++ b/tests/testthat/test-One_Sample.R @@ -1,133 +1,117 @@ test_that("onesample works with JPS", { skip_if(getRversion() < 3.4) load("../jps_data.Rdata") - expect_identical(OneSample(Data, 3, "JPS", 0.95, FALSE, 0, 600), saved_jps_output) + expect_identical(rss_jps_estimate(Data, 3, "JPS", 0.95, FALSE, FALSE, 600), saved_jps_output) }) test_that("onesample works with RSS", { skip_if(getRversion() < 3.4) load("../rss_data.Rdata") - expect_identical(OneSample(Data, 3, "RSS", 0.95, FALSE, 0, 600), saved_rss_output) + expect_identical(rss_jps_estimate(Data, 3, "RSS", 0.95, FALSE, FALSE, 600), saved_rss_output) }) test_that("set_size is positive", { expect_error( - OneSample(emergence_ranks, -4, "JPS", replace = T, pop_size = 2640), + rss_jps_estimate(emergence_ranks, -4, "JPS", replace = T, pop_size = 2640), "`set_size` must be a positive whole number." ) expect_error( - OneSample(emergence_ranks, "4", "JPS", replace = T, pop_size = 2640), + rss_jps_estimate(emergence_ranks, "4", "JPS", replace = T, pop_size = 2640), "`set_size` must be a positive whole number." ) expect_error( - OneSample(emergence_ranks, NA, "JPS", replace = T, pop_size = 2640), + rss_jps_estimate(emergence_ranks, NA, "JPS", replace = T, pop_size = 2640), "`set_size` must be a positive whole number." ) }) test_that("replace is TRUE or FALSE", { expect_error( - OneSample(emergence_ranks, 4, "JPS", replace = NA, pop_size = 2640), + rss_jps_estimate(emergence_ranks, 4, "JPS", replace = NA, pop_size = 2640), "`replace` must be a boolean." ) - # expect_error( - # OneSample(emergence_ranks, 4, "RSS", replace = NA, pop_size = 2640), - # "`replace` must be a boolean." - # ) - # expect_error( - # OneSample(emergence_ranks, 4, "JPS", replace = "NA", pop_size = 2640), - # "`replace` must be a boolean." - # ) expect_error( - OneSample(emergence_ranks, 4, "RSS", replace = "NA", pop_size = 2640), + rss_jps_estimate(emergence_ranks, 4, "RSS", replace = "NA", pop_size = 2640), "`replace` must be a boolean." ) - # expect_error( - # OneSample(emergence_ranks, 4, "JPS", replace = 0, pop_size = 2640), - # "`replace` must be a boolean." - # ) expect_error( - OneSample(emergence_ranks, 4, "RSS", replace = 0, pop_size = 2640), + rss_jps_estimate(emergence_ranks, 4, "RSS", replace = 0, pop_size = 2640), "`replace` must be a boolean." ) }) test_that("pop_size is supplied when needed", { expect_error( - OneSample(emergence_ranks, 4, "JPS", replace = F, 0), + rss_jps_estimate(emergence_ranks, 4, method = "JPS", replace = F, pop_size = NA), "A numeric population size `pop_size` must be provided when sampling without replacement" ) expect_error( - OneSample(emergence_ranks, 4, "JPS", replace = T, model = 1), + rss_jps_estimate(emergence_ranks, 4, method = "JPS", replace = F, pop_size = "A"), + "A numeric population size `pop_size` must be provided when sampling without replacement" + ) + expect_error( + rss_jps_estimate(emergence_ranks, 4, "JPS", replace = T, model_based = TRUE), "The population size `pop_size` must be provided for super-population model" ) }) test_that("method is correct", { expect_error( - OneSample(emergence_ranks, 4, method = "ABC", replace = T), + rss_jps_estimate(emergence_ranks, 4, method = "ABC", replace = T), '`method` must be `"JPS"` or `"RSS"`.' ) expect_error( - OneSample(emergence_ranks, 4, method = 1, replace = T), + rss_jps_estimate(emergence_ranks, 4, method = 1, replace = T), '`method` must be `"JPS"` or `"RSS"`.' ) }) test_that("confidence is between 0 and 1", { - expect_error(OneSample(emergence_ranks, 4, "JPS", replace = T, confidence = 0), NA) + expect_error(rss_jps_estimate(emergence_ranks, 4, "JPS", replace = T, confidence = 0), NA) expect_error( - OneSample(emergence_ranks, 4, "JPS", replace = T, confidence = 5), + rss_jps_estimate(emergence_ranks, 4, "JPS", replace = T, confidence = 5), "`confidence` must be inclusively between 0 and 1." ) expect_error( - OneSample(emergence_ranks, 4, "JPS", replace = T, confidence = "A"), + rss_jps_estimate(emergence_ranks, 4, "JPS", replace = T, confidence = "A"), "`confidence` must be inclusively between 0 and 1." ) expect_error( - OneSample(emergence_ranks, 4, "JPS", replace = T, confidence = NA), + rss_jps_estimate(emergence_ranks, 4, "JPS", replace = T, confidence = NA), "`confidence` must be inclusively between 0 and 1." ) }) test_that("model is 0 or 1", { expect_error( - OneSample(emergence_ranks, 4, method = "JPS", model = 0), + rss_jps_estimate(emergence_ranks, 4, method = "JPS", model_based = FALSE), NA ) expect_error( - OneSample(emergence_ranks, 4, method = "JPS", model = "A"), - "`model` must be `0` or `1`." + rss_jps_estimate(emergence_ranks, 4, method = "JPS", model_based = "A"), + "`model_based` must be a boolean." ) expect_error( - OneSample(emergence_ranks, 4, method = "JPS", model = NA), - "`model` must be `0` or `1`." + rss_jps_estimate(emergence_ranks, 4, method = "JPS", model_based = NA), + "`model_based` must be a boolean." ) }) test_that("pop_size is > 0 and >= set_size*nrow(data)", { expect_error( - OneSample(emergence_ranks, 4, method = "JPS", replace = F, pop_size = 2640), + rss_jps_estimate(emergence_ranks, 4, method = "JPS", replace = F, pop_size = 2640), NA ) expect_error( - OneSample(emergence_ranks, 4, method = "JPS", replace = F, model = 1, pop_size = 2640), + rss_jps_estimate(emergence_ranks, 4, method = "JPS", replace = F, model_based = TRUE, pop_size = 2640), NA ) expect_error( - OneSample(emergence_ranks, 4, method = "JPS", replace = F, pop_size = -5), + rss_jps_estimate(emergence_ranks, 4, method = "JPS", replace = F, pop_size = -5), "`pop_size` must be positive and greater or equal to `data x set_size`" ) expect_error( - OneSample(emergence_ranks, 4, method = "JPS", replace = F, pop_size = 1), + rss_jps_estimate(emergence_ranks, 4, method = "JPS", replace = F, pop_size = 1), "`pop_size` must be positive and greater or equal to `data x set_size`" ) - expect_error( - OneSample(emergence_ranks, 4, method = "JPS", replace = F, pop_size = NA), - "A numeric population size `pop_size` must be provided when sampling without replacement" - ) - expect_error( - OneSample(emergence_ranks, 4, method = "JPS", replace = F, pop_size = "A"), - "A numeric population size `pop_size` must be provided when sampling without replacement" - ) }) From 09020803b6cc313035709b24e83ec65089475d64 Mon Sep 17 00:00:00 2001 From: Wasin Pipattungsakul Date: Fri, 17 May 2024 11:22:44 +0930 Subject: [PATCH 04/13] add dependencies, rename functions --- DESCRIPTION | 18 +++--- NAMESPACE | 3 +- R/IncluProbLv1func.R | 2 +- R/jps_estimator.R | 5 +- R/jps_estimator_single.R | 3 +- R/rss_jps_estimate.R | 7 +-- R/sbs_pps_estimate.R | 2 +- R/sbs_pps_heatmap.R | 3 +- man/DELETi.Rd | 3 - man/JPSD2F.Rd | 21 +++---- man/JPSEDF.Rd | 34 ----------- man/JPSEF.Rd | 28 --------- man/JPSLF.Rd | 3 - man/ListF.Rd | 3 - man/OneSample.Rd | 58 ------------------- man/RSS.Rd | 25 ++++++++ man/RSSDF.Rd | 23 ++++++++ man/RSSED2F.Rd | 3 - man/RSSEF.Rd | 3 - man/RSSNRF.Rd | 23 ++++++++ man/RankedSetSampling.Rd | 4 +- man/TRIPLEF.Rd | 3 - man/bootstrap_sample.Rd | 21 +++++++ ...GHTF.Rd => calculate_agreement_weights.Rd} | 12 ++-- man/{CoefF.Rd => calculate_coefficients.Rd} | 13 ++--- man/calculate_first_order_prob.Rd | 23 ++++++++ man/calculate_inclusion_prob.Rd | 21 +++++++ man/get_empirical_population.Rd | 28 +++++++++ man/get_sbs_pps_sample_indices.Rd | 29 ++++++++++ man/impute_w_knn.Rd | 20 +++++++ man/jps_estimator.Rd | 32 ++++++++++ man/jps_estimator_single.Rd | 49 ++++++++++++++++ man/rss_jps_estimate.Rd | 58 +++++++++++++++++++ man/sbs_pps_estimate.Rd | 48 +++++++++++++++ man/sbs_pps_heatmap.Rd | 28 +++++++++ man/sbs_pps_sample.Rd | 31 ++++++++++ man/single_bootstrap.Rd | 19 ++++++ tests/testthat/test-JPSD2F.R | 2 +- tests/testthat/test-RSS.R | 2 +- tests/testthat/test-RSSDF.R | 2 +- tests/testthat/test-RSSNRF.R | 2 +- ...t-One_Sample.R => test-rss_jps_estimate.R} | 8 +-- tests/testthat/test-sbs_pps_estimate.R | 6 +- tests/testthat/test-sbs_pps_sample.R | 6 +- 44 files changed, 540 insertions(+), 197 deletions(-) delete mode 100644 man/JPSEDF.Rd delete mode 100644 man/JPSEF.Rd delete mode 100644 man/OneSample.Rd create mode 100644 man/RSS.Rd create mode 100644 man/RSSDF.Rd create mode 100644 man/RSSNRF.Rd create mode 100644 man/bootstrap_sample.Rd rename man/{WEIGHTF.Rd => calculate_agreement_weights.Rd} (59%) rename man/{CoefF.Rd => calculate_coefficients.Rd} (64%) create mode 100644 man/calculate_first_order_prob.Rd create mode 100644 man/calculate_inclusion_prob.Rd create mode 100644 man/get_empirical_population.Rd create mode 100644 man/get_sbs_pps_sample_indices.Rd create mode 100644 man/impute_w_knn.Rd create mode 100644 man/jps_estimator.Rd create mode 100644 man/jps_estimator_single.Rd create mode 100644 man/rss_jps_estimate.Rd create mode 100644 man/sbs_pps_estimate.Rd create mode 100644 man/sbs_pps_heatmap.Rd create mode 100644 man/sbs_pps_sample.Rd create mode 100644 man/single_bootstrap.Rd rename tests/testthat/{test-One_Sample.R => test-rss_jps_estimate.R} (95%) diff --git a/DESCRIPTION b/DESCRIPTION index 6c81a8d..83676c4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,16 +20,20 @@ Description: The RankedSetSampling package provides a way for researchers License: MIT + file LICENSE URL: https://biometryhub.github.io/RankedSetSampling/ BugReports: https://github.com/biometryhub/RankedSetSampling/issues -Depends: +Depends: R (>= 3.5.0) -Imports: - Rcpp -Suggests: +Imports: + Rcpp, + ggplot2, + SDaA, + FNN +Suggests: covr, - testthat -LinkingTo: + testthat, + parallel +LinkingTo: Rcpp Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.2 +RoxygenNote: 7.3.1 diff --git a/NAMESPACE b/NAMESPACE index 9166fc9..187a7a2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand -export(OneSample) +export(jps_estimator) +export(rss_jps_estimate) importFrom(Rcpp,sourceCpp) importFrom(stats,aggregate) importFrom(stats,qt) diff --git a/R/IncluProbLv1func.R b/R/IncluProbLv1func.R index 7e1cb90..6bc4126 100644 --- a/R/IncluProbLv1func.R +++ b/R/IncluProbLv1func.R @@ -92,4 +92,4 @@ IncluProbLv1 <- function(popsize,sampsize,set,rank){ } return(unlist(firstorder)) stopCluster(cl) -} \ No newline at end of file +} diff --git a/R/jps_estimator.R b/R/jps_estimator.R index afa945c..44fbebe 100644 --- a/R/jps_estimator.R +++ b/R/jps_estimator.R @@ -11,8 +11,9 @@ #' #' @importFrom stats aggregate qt rnorm sd var #' -#' @return -#' @keywords internal +#' @return A `data.frame` with the point estimates provided by JPS estimators along with standard error and +#' confidence intervals. +#' @export #' jps_estimator <- function(data, set_size, replace = TRUE, model_based, N, alpha) { n_rankers <- ncol(data) - 1 diff --git a/R/jps_estimator_single.R b/R/jps_estimator_single.R index 91d9ca0..620de0e 100644 --- a/R/jps_estimator_single.R +++ b/R/jps_estimator_single.R @@ -12,7 +12,8 @@ #' - `TRUE`: model based inference using super population model #' @param K Number of rankers. #' -#' @return +#' @return A `data.frame` with the point estimates provided by JPS estimators along with standard error and +#' confidence intervals. #' @keywords internal #' jps_estimator_single <- function(ranks, y, set_size, N, coef, coef_del, replace, model_based, K) { diff --git a/R/rss_jps_estimate.R b/R/rss_jps_estimate.R index ea19331..35e16e2 100644 --- a/R/rss_jps_estimate.R +++ b/R/rss_jps_estimate.R @@ -20,10 +20,10 @@ #' #' # Compute the JPS estimators #' -#' JPS.Estimates <- OneSample( +#' jps_estimates <- rss_jps_estimate( #' data = emergence_ranks, set_size = 4, #' method = "JPS", confidence = 0.95, -#' replace = TRUE, model = 0, +#' replace = TRUE, model_based = FALSE, #' pop_size = nrow(population) #' ) #' @@ -59,6 +59,3 @@ rss_jps_estimate <- function( return(results) } - -#' @rdname OneSample -one_sample <- rss_jps_estimate diff --git a/R/sbs_pps_estimate.R b/R/sbs_pps_estimate.R index 5833fb5..ea3fbb9 100644 --- a/R/sbs_pps_estimate.R +++ b/R/sbs_pps_estimate.R @@ -29,7 +29,7 @@ sbs_pps_estimate <- function(population, n, y, sample_matrix, n_bootstraps = 100 sample_matrix <- sample_matrix[!is_duplicated, ] y <- y[!is_duplicated] - estimated_mean <- round(sum(y / sample_matrix[, 6]) / n_population, digit = 3) + estimated_mean <- round(sum(y / sample_matrix[, 6]) / n_population, digits = 3) empirical_population <- get_empirical_population(sample_matrix[, 1], population, y) empirical_inclusion_prob <- calculate_inclusion_prob(empirical_population[, 3], n) diff --git a/R/sbs_pps_heatmap.R b/R/sbs_pps_heatmap.R index 37896e3..2867bc1 100644 --- a/R/sbs_pps_heatmap.R +++ b/R/sbs_pps_heatmap.R @@ -23,7 +23,8 @@ sbs_pps_heatmap <- function(pop, sbs_indices, pps_indices) { measured_sizes <- pop[, 4] heatmap_data <- data.frame(measured_sizes, x1, x2, sbs_or_pps) - sbs_pps_data <- dplyr::filter(heatmap_data, sbs_or_pps != 0) + sbs_pps_data <- subset(heatmap_data, subset = sbs_or_pps != 0) + # sbs_pps_data <- dplyr::filter(heatmap_data, sbs_or_pps != 0) heatmap_ <- ggplot2::ggplot(heatmap_data, ggplot2::aes(x2, x1, fill = measured_sizes)) + ggplot2::geom_tile() + ggplot2::scale_fill_distiller(palette = "RdPu") + diff --git a/man/DELETi.Rd b/man/DELETi.Rd index 993ce44..e09aa7c 100644 --- a/man/DELETi.Rd +++ b/man/DELETi.Rd @@ -10,9 +10,6 @@ DELETi(i, PASS) \item{i}{The index} \item{PASS}{Values passed in} -} -\value{ - } \description{ Title diff --git a/man/JPSD2F.Rd b/man/JPSD2F.Rd index 984ad9a..5019d8b 100644 --- a/man/JPSD2F.Rd +++ b/man/JPSD2F.Rd @@ -2,25 +2,26 @@ % Please edit documentation in R/JPSD2F.R \name{JPSD2F} \alias{JPSD2F} -\title{Title} +\title{Generate JPS sampling without replacement on the provided population.} \usage{ -JPSD2F(pop, n, H, tau, K) +JPSD2F(pop, n, H, tau, K, with_replacement = FALSE) } \arguments{ -\item{pop}{Population} +\item{pop}{Population that will be sampled with an auxiliary parameter in the second column.} -\item{n}{sample size} +\item{n}{Sample size.} -\item{H}{Set size} +\item{H}{Set size for each raking group.} -\item{tau}{controls the ranking quality} +\item{tau}{A parameter which controls ranking quality.} -\item{K}{Number of rankers} +\item{K}{Number of rankers.} + +\item{with_replacement}{A boolean which specifies whether to sample with replacement or not.} } \value{ - +A matrix with ranks from each ranker. } \description{ -Title +Generate JPS sampling without replacement on the provided population. } -\keyword{internal} diff --git a/man/JPSEDF.Rd b/man/JPSEDF.Rd deleted file mode 100644 index a561157..0000000 --- a/man/JPSEDF.Rd +++ /dev/null @@ -1,34 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/JPSEDF.R -\name{JPSEDF} -\alias{JPSEDF} -\title{Computes the estimator and variance for each individual ranker} -\usage{ -JPSEDF(RV, Y, set_size, N, coef, coef_del, replace, model, K) -} -\arguments{ -\item{RV}{Rank values for Y.} - -\item{Y}{Response measurements.} - -\item{set_size}{Set Size.} - -\item{N}{Finite population size.} - -\item{coef}{Coefficients used in variance computation when sample size is n.} - -\item{coef_del}{Coefficients used in variance computation when the i-th unit is deleted.} - -\item{replace}{Logical. Sample with replacement?} - -\item{model}{If model is 0, it's design based inference, if model = 1, it is model based inference using super population model.} - -\item{K}{} -} -\value{ - -} -\description{ -Computes the estimator and variance for each individual ranker -} -\keyword{internal} diff --git a/man/JPSEF.Rd b/man/JPSEF.Rd deleted file mode 100644 index 9ddcd33..0000000 --- a/man/JPSEF.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/JPSEF.R -\name{JPSEF} -\alias{JPSEF} -\title{Computes the estimator for JPS data} -\usage{ -JPSEF(data, set_size, replace = TRUE, model, N, alpha) -} -\arguments{ -\item{data}{The data to use for estimation.} - -\item{set_size}{The set size.} - -\item{replace}{Logical (default \code{TRUE}). Sample with replacement?} - -\item{model}{If model is 0, it's design based inference, if model = 1, it is model based inference using super population model.} - -\item{N}{The population size.} - -\item{alpha}{The significance level.} -} -\value{ - -} -\description{ -Computes the estimator for JPS data -} -\keyword{internal} diff --git a/man/JPSLF.Rd b/man/JPSLF.Rd index 384604a..027a423 100644 --- a/man/JPSLF.Rd +++ b/man/JPSLF.Rd @@ -25,9 +25,6 @@ JPSLF( \item{model}{Whether to use design-based approach or super-population (model based) approach} \item{estimator}{Which type of estimator to return. Default is "SD Weighted". Other options are "Unweighted", "Aggregate Weight", "JPS Estimate", "SRS estimate", "Minimum"} -} -\value{ - } \description{ Judgement Post Stratification List Function diff --git a/man/ListF.Rd b/man/ListF.Rd index f22858e..3e8dbe5 100644 --- a/man/ListF.Rd +++ b/man/ListF.Rd @@ -18,9 +18,6 @@ ListF(data, set_size, Replace, N, Model, CoefD0) \item{Model}{Model} \item{CoefD0}{Coef} -} -\value{ - } \description{ Title diff --git a/man/OneSample.Rd b/man/OneSample.Rd deleted file mode 100644 index bd5c1ab..0000000 --- a/man/OneSample.Rd +++ /dev/null @@ -1,58 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/One_Sample.R -\name{OneSample} -\alias{OneSample} -\alias{one_sample} -\title{Calculate the estimate of the rankings} -\usage{ -OneSample( - data, - set_size, - method = c("JPS", "RSS"), - confidence = 0.95, - replace = TRUE, - model = 0, - pop_size = NULL -) - -one_sample( - data, - set_size, - method = c("JPS", "RSS"), - confidence = 0.95, - replace = TRUE, - model = 0, - pop_size = NULL -) -} -\arguments{ -\item{data}{A data frame of \code{JPS} or \code{RSS} rankings.} - -\item{set_size}{The set size of the ranks.} - -\item{method}{Takes values \code{JPS} (the default) for Judgment Post Stratification, or \code{RSS} for Ranked Set Sample. Partial matching is performed, so \code{J} or \code{R} could be used.} - -\item{confidence}{The confidence level to use.} - -\item{replace}{Logical (default \code{TRUE}). Sample with replacement?} - -\item{model}{If model is 0, it's design based inference, if model = 1, it is model based inference using super population model.} - -\item{pop_size}{The population size. Must be provided if sampling without replacement, or if \code{model} is set to 'super-population'.} -} -\value{ -A \code{data.frame} with the point estimates provided by different types of estimators along with standard error and confidence intervals. -} -\description{ -Calculate the estimate of the rankings -} -\examples{ - -# Compute the JPS estimators - -JPS.Estimates <- OneSample(data = emergence_ranks, set_size = 4, - method = "JPS", confidence = 0.95, - replace = TRUE, model = 0, - pop_size = nrow(population)) - -} diff --git a/man/RSS.Rd b/man/RSS.Rd new file mode 100644 index 0000000..4a776a2 --- /dev/null +++ b/man/RSS.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RSS.R +\name{RSS} +\alias{RSS} +\title{Generate ranked set sampling (RSS) on the population provided.} +\usage{ +RSS(pop, n, H, K, with_replacement = FALSE) +} +\arguments{ +\item{pop}{Population that will be sampled with an auxiliary parameter in the second column.} + +\item{n}{Sample size.} + +\item{H}{Set size for each raking group.} + +\item{K}{Number of rankers.} + +\item{with_replacement}{A boolean which specifies whether to sample with replacement or not.} +} +\value{ +A matrix with ranks from each ranker. +} +\description{ +Generate ranked set sampling (RSS) on the population provided. +} diff --git a/man/RSSDF.Rd b/man/RSSDF.Rd new file mode 100644 index 0000000..f87f7f1 --- /dev/null +++ b/man/RSSDF.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RSSDF.R +\name{RSSDF} +\alias{RSSDF} +\title{Generate ranked set sampling (RSS) with replacement on the population provided.} +\usage{ +RSSDF(pop, n, H, K) +} +\arguments{ +\item{pop}{Population that will be sampled with an auxiliary parameter in the second column.} + +\item{n}{Sample size.} + +\item{H}{Set size for each raking group.} + +\item{K}{Number of rankers.} +} +\value{ +A matrix with ranks from each ranker. +} +\description{ +Generate ranked set sampling (RSS) with replacement on the population provided. +} diff --git a/man/RSSED2F.Rd b/man/RSSED2F.Rd index cbe484b..7f36ed5 100644 --- a/man/RSSED2F.Rd +++ b/man/RSSED2F.Rd @@ -14,9 +14,6 @@ RSSED2F(RV, Y, H, N) \item{H}{Set size} \item{N}{Population size} -} -\value{ - } \description{ This function Computes RSS estimator and its variance without replacement sampling diff --git a/man/RSSEF.Rd b/man/RSSEF.Rd index 83d02b3..36a0b8d 100644 --- a/man/RSSEF.Rd +++ b/man/RSSEF.Rd @@ -18,9 +18,6 @@ RSSEF(data, set_size, replace, model, N, alpha) \item{N}{The population size} \item{alpha}{The significance level} -} -\value{ - } \description{ This function provides estimator for RSS data diff --git a/man/RSSNRF.Rd b/man/RSSNRF.Rd new file mode 100644 index 0000000..a11b5af --- /dev/null +++ b/man/RSSNRF.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/RSSNRF.R +\name{RSSNRF} +\alias{RSSNRF} +\title{Generate ranked set sampling (RSS) without replacement on the population provided.} +\usage{ +RSSNRF(pop, n, H, K) +} +\arguments{ +\item{pop}{Population that will be sampled with an auxiliary parameter in the second column.} + +\item{n}{Sample size.} + +\item{H}{Set size for each raking group.} + +\item{K}{Number of rankers.} +} +\value{ +A matrix with ranks from each ranker. +} +\description{ +Generate ranked set sampling (RSS) without replacement on the population provided. +} diff --git a/man/RankedSetSampling.Rd b/man/RankedSetSampling.Rd index 605627a..92a50ee 100644 --- a/man/RankedSetSampling.Rd +++ b/man/RankedSetSampling.Rd @@ -2,11 +2,11 @@ % Please edit documentation in R/RankedSetSampling-package.R \docType{package} \name{RankedSetSampling} -\alias{RankedSetSampling} \alias{RankedSetSampling-package} +\alias{RankedSetSampling} \title{RankedSetSampling: Easing the Application of Ranked Set Sampling in Practice} \description{ -The RankedSetSampling package provides a way for researchers to easily implement Ranked Set Sampling in practice. Ranked Set Sampling was originally described by McIntyre (1952) (reprinted in 2005) . This package takes work by Omer and Kravchuk (2021) and enables easy use of the methods. +The RankedSetSampling package provides a way for researchers to easily implement Ranked Set Sampling in practice. Ranked Set Sampling was originally described by McIntyre (1952) (reprinted in 2005) \doi{10.1198/000313005X54180}. This package takes work by Omer and Kravchuk (2021) \url{https://doi.org/10.1007/s13253-021-00439-1} and enables easy use of the methods. } \seealso{ Useful links: diff --git a/man/TRIPLEF.Rd b/man/TRIPLEF.Rd index 4ac79dc..42db1a8 100644 --- a/man/TRIPLEF.Rd +++ b/man/TRIPLEF.Rd @@ -10,9 +10,6 @@ TRIPLEF(uv, H, n) \item{H}{The set size} \item{n}{The sample size} -} -\value{ - } \description{ Calculate the triple sum diff --git a/man/bootstrap_sample.Rd b/man/bootstrap_sample.Rd new file mode 100644 index 0000000..f9a2380 --- /dev/null +++ b/man/bootstrap_sample.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bootstrap_sample.R +\name{bootstrap_sample} +\alias{bootstrap_sample} +\title{Generate boostrap sample on the provided population.} +\usage{ +bootstrap_sample(pop, n, n_bootstraps) +} +\arguments{ +\item{pop}{Population data} + +\item{n}{Sample sizes (SBS sample size, PPS sample size).} + +\item{n_bootstraps}{Number of bootstrap samples.} +} +\value{ +A summary data frame of the estimator. +} +\description{ +Generate boostrap sample on the provided population. +} diff --git a/man/WEIGHTF.Rd b/man/calculate_agreement_weights.Rd similarity index 59% rename from man/WEIGHTF.Rd rename to man/calculate_agreement_weights.Rd index 2ab6187..c5d03ea 100644 --- a/man/WEIGHTF.Rd +++ b/man/calculate_agreement_weights.Rd @@ -1,15 +1,15 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/WEIGHTF.R -\name{WEIGHTF} -\alias{WEIGHTF} +% Please edit documentation in R/calculate_agreement_weights.R +\name{calculate_agreement_weights} +\alias{calculate_agreement_weights} \title{Calculate Agreement Weights} \usage{ -WEIGHTF(RV, set_size) +calculate_agreement_weights(ranks, set_size) } \arguments{ -\item{RV}{Ranking values} - \item{set_size}{Set size} + +\item{RV}{Ranking values} } \value{ A vector of agreement weights diff --git a/man/CoefF.Rd b/man/calculate_coefficients.Rd similarity index 64% rename from man/CoefF.Rd rename to man/calculate_coefficients.Rd index 8adeb3c..9048e6c 100644 --- a/man/CoefF.Rd +++ b/man/calculate_coefficients.Rd @@ -1,18 +1,15 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/CoefF.R -\name{CoefF} -\alias{CoefF} +\name{calculate_coefficients} +\alias{calculate_coefficients} \title{This function computes the coefficient of variance estimator} \usage{ -CoefF(H, n) +calculate_coefficients(H, n) } \arguments{ -\item{H}{The set size} - -\item{n}{The sample size} -} -\value{ +\item{H}{Set size for each raking group.} +\item{n}{Sample size.} } \description{ This function computes the coefficient of variance estimator diff --git a/man/calculate_first_order_prob.Rd b/man/calculate_first_order_prob.Rd new file mode 100644 index 0000000..f445d7e --- /dev/null +++ b/man/calculate_first_order_prob.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inclusion_probability.R +\name{calculate_first_order_prob} +\alias{calculate_first_order_prob} +\title{Calculate first order inclusion probability.} +\usage{ +calculate_first_order_prob(i, size_measurement, n, total_size) +} +\arguments{ +\item{i}{Index of the probability being calculated.} + +\item{size_measurement}{Size measurements of population units.} + +\item{n}{Sample sizes (SBS sample size, PPS sample size).} + +\item{total_size}{Sum of size measurement.} +} +\value{ +A vector of inclusion probabilities. +} +\description{ +Calculate first order inclusion probability. +} diff --git a/man/calculate_inclusion_prob.Rd b/man/calculate_inclusion_prob.Rd new file mode 100644 index 0000000..c424bc1 --- /dev/null +++ b/man/calculate_inclusion_prob.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inclusion_probability.R +\name{calculate_inclusion_prob} +\alias{calculate_inclusion_prob} +\title{Calculate inclusion probabilities.} +\usage{ +calculate_inclusion_prob(size_measurement, n, parallelize = TRUE) +} +\arguments{ +\item{size_measurement}{Size measurements of population units.} + +\item{n}{Sample sizes (SBS sample size, PPS sample size).} + +\item{parallelize}{A flag whether to parallelize the computational task.} +} +\value{ +A vector of inclusion probabilities. +} +\description{ +Calculate inclusion probabilities. +} diff --git a/man/get_empirical_population.Rd b/man/get_empirical_population.Rd new file mode 100644 index 0000000..fef0e36 --- /dev/null +++ b/man/get_empirical_population.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_empirical_population.R +\name{get_empirical_population} +\alias{get_empirical_population} +\title{Compute empirical population by imputing reponses and measured sizes using knn.} +\usage{ +get_empirical_population(sample_indices, pop, y) +} +\arguments{ +\item{sample_indices}{Indices of sample.} + +\item{pop}{Population data frame to be sampled with 4 columns. +\enumerate{ +\item Halton numbers +\item X1-coordinate of population unit +\item X2-coordinate of population unit +\item Size measurement of population unit +}} + +\item{y}{Sample response values.} +} +\value{ +A summary data frame of the estimator. +} +\description{ +Compute empirical population by imputing reponses and measured sizes using knn. +} +\keyword{internal} diff --git a/man/get_sbs_pps_sample_indices.Rd b/man/get_sbs_pps_sample_indices.Rd new file mode 100644 index 0000000..f61003e --- /dev/null +++ b/man/get_sbs_pps_sample_indices.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sbs_pps_sample.R +\name{get_sbs_pps_sample_indices} +\alias{get_sbs_pps_sample_indices} +\title{Generate SBS PPS sample indices.} +\usage{ +get_sbs_pps_sample_indices(population, n, with_unique_pps = FALSE) +} +\arguments{ +\item{population}{Population data frame to be sampled with 2 columns. +\enumerate{ +\item Halton numbers +\item Size measurements of population units +}} + +\item{n}{Sample sizes (SBS sample size, PPS sample size).} +} +\value{ +A named list of: +\itemize{ +\item sbs_pps_indices: sample indices +\item sbs_indices: sbs sample indices +\item pps_indices: pps sample indices +\item sizes_wo_sbs: measured sizes without sbs sample +} +} +\description{ +Generate SBS PPS sample indices. +} diff --git a/man/impute_w_knn.Rd b/man/impute_w_knn.Rd new file mode 100644 index 0000000..970ec29 --- /dev/null +++ b/man/impute_w_knn.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_empirical_population.R +\name{impute_w_knn} +\alias{impute_w_knn} +\title{Calculate the mean of the nearest neighbors.} +\usage{ +impute_w_knn(knn_row, y) +} +\arguments{ +\item{knn_row}{A row of knn matrix composed of indices and distances.} + +\item{y}{Sample response values.} +} +\value{ +A summary data frame of the estimator. +} +\description{ +Calculate the mean of the nearest neighbors. +} +\keyword{internal} diff --git a/man/jps_estimator.Rd b/man/jps_estimator.Rd new file mode 100644 index 0000000..959971c --- /dev/null +++ b/man/jps_estimator.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/jps_estimator.R +\name{jps_estimator} +\alias{jps_estimator} +\title{Computes the estimator for JPS data} +\usage{ +jps_estimator(data, set_size, replace = TRUE, model_based, N, alpha) +} +\arguments{ +\item{data}{The data to use for estimation.} + +\item{set_size}{Set size for each raking group.} + +\item{replace}{Logical (default \code{TRUE}). Sample with replacement?} + +\item{model_based}{An inference mode: +\itemize{ +\item \code{FALSE}: design based inference +\item \code{TRUE}: model based inference using super population model +}} + +\item{N}{The population size.} + +\item{alpha}{The significance level.} +} +\value{ +A \code{data.frame} with the point estimates provided by JPS estimators along with standard error and +confidence intervals. +} +\description{ +Computes the estimator for JPS data +} diff --git a/man/jps_estimator_single.Rd b/man/jps_estimator_single.Rd new file mode 100644 index 0000000..de08b46 --- /dev/null +++ b/man/jps_estimator_single.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/jps_estimator_single.R +\name{jps_estimator_single} +\alias{jps_estimator_single} +\title{Computes the estimator and variance for each individual ranker} +\usage{ +jps_estimator_single( + ranks, + y, + set_size, + N, + coef, + coef_del, + replace, + model_based, + K +) +} +\arguments{ +\item{ranks}{Ranks of Y.} + +\item{y}{Response measurements.} + +\item{set_size}{Set size for each raking group.} + +\item{N}{Finite population size.} + +\item{coef}{Coefficients used in variance computation when sample size is n.} + +\item{coef_del}{Coefficients used in variance computation when the i-th unit is deleted.} + +\item{replace}{Logical. Sample with replacement?} + +\item{model_based}{An inference mode: +\itemize{ +\item \code{FALSE}: design based inference +\item \code{TRUE}: model based inference using super population model +}} + +\item{K}{Number of rankers.} +} +\value{ +A \code{data.frame} with the point estimates provided by JPS estimators along with standard error and +confidence intervals. +} +\description{ +Computes the estimator and variance for each individual ranker +} +\keyword{internal} diff --git a/man/rss_jps_estimate.Rd b/man/rss_jps_estimate.Rd new file mode 100644 index 0000000..7713442 --- /dev/null +++ b/man/rss_jps_estimate.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rss_jps_estimate.R +\name{rss_jps_estimate} +\alias{rss_jps_estimate} +\title{Estimate means from RSS or JPS sample.} +\usage{ +rss_jps_estimate( + data, + set_size, + method, + confidence = 0.95, + replace = TRUE, + model_based = FALSE, + pop_size = NULL +) +} +\arguments{ +\item{data}{A data frame of \code{JPS} or \code{RSS} rankings.} + +\item{set_size}{The set size of the ranks.} + +\item{method}{A method used to sample: +\itemize{ +\item \code{"JPS"}: Judgment-post stratified sampling +\item \code{"RSS"}: Ranked set sampling +}} + +\item{confidence}{The confidence level to use.} + +\item{replace}{Logical (default \code{TRUE}). Sample with replacement?} + +\item{model_based}{An inference mode: +\itemize{ +\item \code{FALSE}: design based inference +\item \code{TRUE}: model based inference using super population model +}} + +\item{pop_size}{The population size. Must be provided if sampling without replacement, or if \code{model} is \code{1}.} +} +\value{ +A \code{data.frame} with the point estimates provided by different types of estimators along with standard +error and confidence intervals. +} +\description{ +Estimate means from RSS or JPS sample. +} +\examples{ + +# Compute the JPS estimators + +jps_estimates <- rss_jps_estimate( + data = emergence_ranks, set_size = 4, + method = "JPS", confidence = 0.95, + replace = TRUE, model_based = FALSE, + pop_size = nrow(population) +) + +} diff --git a/man/sbs_pps_estimate.Rd b/man/sbs_pps_estimate.Rd new file mode 100644 index 0000000..58c10eb --- /dev/null +++ b/man/sbs_pps_estimate.Rd @@ -0,0 +1,48 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sbs_pps_estimate.R +\name{sbs_pps_estimate} +\alias{sbs_pps_estimate} +\title{Compute an estimator for SBS PPS sampled data.} +\usage{ +sbs_pps_estimate( + population, + n, + y, + sample_matrix, + n_bootstraps = 100, + alpha = 0.05 +) +} +\arguments{ +\item{population}{Population data frame to be sampled with 4 columns. +\enumerate{ +\item Halton numbers +\item X1-coordinate of population unit +\item X2-coordinate of population unit +\item Size measurement of population unit +}} + +\item{n}{Sample sizes (SBS sample size, PPS sample size).} + +\item{y}{Sample response values.} + +\item{sample_matrix}{Sample data frame to be sampled with 6 columns. +\enumerate{ +\item Halton numbers +\item X1-coordinate of population unit +\item X2-coordinate of population unit +\item Size measurement of population unit +\item Weight +\item Inclusion probability +}} + +\item{n_bootstraps}{Number of bootstrap samples.} + +\item{alpha}{The significance level.} +} +\value{ +A summary data frame of the estimator. +} +\description{ +Compute an estimator for SBS PPS sampled data. +} diff --git a/man/sbs_pps_heatmap.Rd b/man/sbs_pps_heatmap.Rd new file mode 100644 index 0000000..e6c3336 --- /dev/null +++ b/man/sbs_pps_heatmap.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sbs_pps_heatmap.R +\name{sbs_pps_heatmap} +\alias{sbs_pps_heatmap} +\title{Generate a heat map of SBS PPS sample of the provided population.} +\usage{ +sbs_pps_heatmap(pop, sbs_indices, pps_indices) +} +\arguments{ +\item{pop}{Population data frame to be sampled with 5 columns. +\enumerate{ +\item Halton numbers +\item X1-coordinate of population unit +\item X2-coordinate of population unit +\item Size measurements of population units +\item Inclusion probabilities +}} + +\item{sbs_indices}{Indices of SBS sample.} + +\item{pps_indices}{Indices of PPS sample.} +} +\value{ +Heat map of the sample. +} +\description{ +Generate a heat map of SBS PPS sample of the provided population. +} diff --git a/man/sbs_pps_sample.Rd b/man/sbs_pps_sample.Rd new file mode 100644 index 0000000..e19d4b7 --- /dev/null +++ b/man/sbs_pps_sample.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sbs_pps_sample.R +\name{sbs_pps_sample} +\alias{sbs_pps_sample} +\title{Generate probability proportional to size (PPS) and spatially balanced sampling on the population provided.} +\usage{ +sbs_pps_sample(population, n, parallelize = TRUE) +} +\arguments{ +\item{population}{Population data frame to be sampled with 5 columns. +\enumerate{ +\item Halton numbers +\item X1-coordinate of population unit +\item X2-coordinate of population unit +\item Size measurements of population units +}} + +\item{n}{Sample sizes (SBS sample size, PPS sample size).} + +\item{parallelize}{A flag whether to parallelize the computational task.} +} +\value{ +A named list of: +\itemize{ +\item heatmap: heat map of the sample +\item sample: SBS PPS sample of the population +} +} +\description{ +Generate probability proportional to size (PPS) and spatially balanced sampling on the population provided. +} diff --git a/man/single_bootstrap.Rd b/man/single_bootstrap.Rd new file mode 100644 index 0000000..723a731 --- /dev/null +++ b/man/single_bootstrap.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bootstrap_sample.R +\name{single_bootstrap} +\alias{single_bootstrap} +\title{Generate a single boostrap sample on the provided population.} +\usage{ +single_bootstrap(pop, n) +} +\arguments{ +\item{pop}{Population data} + +\item{n}{Sample sizes (SBS sample size, PPS sample size).} +} +\value{ +A summary data frame of the estimator. +} +\description{ +Generate a single boostrap sample on the provided population. +} diff --git a/tests/testthat/test-JPSD2F.R b/tests/testthat/test-JPSD2F.R index d567ef5..d5f301d 100644 --- a/tests/testthat/test-JPSD2F.R +++ b/tests/testthat/test-JPSD2F.R @@ -1,5 +1,5 @@ test_that("JPSD2F has a correct output.", { - skip_if(getRversion() < 3.4) + skip_if(getRversion() < "3.4") population <- 1:100 k <- 3 tau <- rep(1.2, 3) diff --git a/tests/testthat/test-RSS.R b/tests/testthat/test-RSS.R index 3744bf7..2b4f7fe 100644 --- a/tests/testthat/test-RSS.R +++ b/tests/testthat/test-RSS.R @@ -1,5 +1,5 @@ test_that("RSS has a correct output.", { - skip_if(getRversion() < 3.4) + skip_if(getRversion() < "3.4") matrix_ <- matrix(1:200, ncol = 2) rss_matrix_w_replacement <- RSS(matrix_, 100, 10, 2, TRUE) diff --git a/tests/testthat/test-RSSDF.R b/tests/testthat/test-RSSDF.R index 0a6917b..ee658fc 100644 --- a/tests/testthat/test-RSSDF.R +++ b/tests/testthat/test-RSSDF.R @@ -1,5 +1,5 @@ test_that("RSSDF has a correct output.", { - skip_if(getRversion() < 3.4) + skip_if(getRversion() < "3.4") matrix_ <- matrix(1:200, ncol = 2) rss_matrix <- RSSDF(matrix_, 100, 10, 2) diff --git a/tests/testthat/test-RSSNRF.R b/tests/testthat/test-RSSNRF.R index 98085b6..8abee7f 100644 --- a/tests/testthat/test-RSSNRF.R +++ b/tests/testthat/test-RSSNRF.R @@ -1,5 +1,5 @@ test_that("RSSDF has a correct output.", { - skip_if(getRversion() < 3.4) + skip_if(getRversion() < "3.4") matrix_ <- matrix(1:200, ncol = 2) rss_matrix <- RSSNRF(matrix_, 50, 2, 2) diff --git a/tests/testthat/test-One_Sample.R b/tests/testthat/test-rss_jps_estimate.R similarity index 95% rename from tests/testthat/test-One_Sample.R rename to tests/testthat/test-rss_jps_estimate.R index dde6f86..29f8743 100644 --- a/tests/testthat/test-One_Sample.R +++ b/tests/testthat/test-rss_jps_estimate.R @@ -1,11 +1,11 @@ -test_that("onesample works with JPS", { - skip_if(getRversion() < 3.4) +test_that("RSS JPS estimate works with JPS", { + skip_if(getRversion() < "3.4") load("../jps_data.Rdata") expect_identical(rss_jps_estimate(Data, 3, "JPS", 0.95, FALSE, FALSE, 600), saved_jps_output) }) -test_that("onesample works with RSS", { - skip_if(getRversion() < 3.4) +test_that("RSS JPS estimate works with RSS", { + skip_if(getRversion() < "3.4") load("../rss_data.Rdata") expect_identical(rss_jps_estimate(Data, 3, "RSS", 0.95, FALSE, FALSE, 600), saved_rss_output) }) diff --git a/tests/testthat/test-sbs_pps_estimate.R b/tests/testthat/test-sbs_pps_estimate.R index c4db800..466f7a6 100644 --- a/tests/testthat/test-sbs_pps_estimate.R +++ b/tests/testthat/test-sbs_pps_estimate.R @@ -1,5 +1,5 @@ test_that("An SBS PPS estimator works.", { - skip_if(getRversion() < 3.4) + skip_if(getRversion() < "3.4") load("../sbs_pps_input.RData") load("../expected_sbs_pps_estimate.RData") @@ -16,7 +16,7 @@ test_that("An SBS PPS estimator works.", { }) test_that("An SBS PPS estimator works for PPS only.", { - skip_if(getRversion() < 3.4) + skip_if(getRversion() < "3.4") load("../sbs_pps_input.RData") load("../expected_pps_estimate.RData") @@ -33,7 +33,7 @@ test_that("An SBS PPS estimator works for PPS only.", { }) test_that("An SBS PPS estimator works for SBS only.", { - skip_if(getRversion() < 3.4) + skip_if(getRversion() < "3.4") load("../sbs_pps_input.RData") load("../expected_sbs_estimate.RData") diff --git a/tests/testthat/test-sbs_pps_sample.R b/tests/testthat/test-sbs_pps_sample.R index c351ca9..5bed026 100644 --- a/tests/testthat/test-sbs_pps_sample.R +++ b/tests/testthat/test-sbs_pps_sample.R @@ -1,5 +1,5 @@ test_that("An SBS PPS sample works.", { - skip_if(getRversion() < 3.4) + skip_if(getRversion() < "3.4") load("../sbs_pps_input.RData") n_sbs <- 20 @@ -20,7 +20,7 @@ test_that("An SBS PPS sample works.", { }) test_that("An SBS PPS sample works for PPS only.", { - skip_if(getRversion() < 3.4) + skip_if(getRversion() < "3.4") load("../sbs_pps_input.RData") n_sbs <- 0 @@ -33,7 +33,7 @@ test_that("An SBS PPS sample works for PPS only.", { }) test_that("An SBS PPS sample works for SBS only.", { - skip_if(getRversion() < 3.4) + skip_if(getRversion() < "3.4") load("../sbs_pps_input.RData") n_sbs <- 20 From 35065f8c1a7b04d82cf3a49f50479219bb4b9a13 Mon Sep 17 00:00:00 2001 From: Wasin Pipattungsakul Date: Mon, 20 May 2024 09:19:13 +0930 Subject: [PATCH 05/13] add n_cores option, add example, refactor --- R/ORD.R | 2 +- R/RSS.R | 15 --- R/RSSDF.R | 48 -------- R/RSSNRF.R | 44 ------- R/inclusion_probability.R | 13 +- R/rss_sample.R | 112 ++++++++++++++++++ R/sbs_pps_estimate.R | 6 +- R/sbs_pps_sample.R | 9 +- R/utils.R | 2 +- examples/rss_estimate.R | 35 ++++++ man/JPSD2F.Rd | 10 -- man/calculate_inclusion_prob.Rd | 8 +- man/{RSS.Rd => rss_sample.Rd} | 8 +- man/{RSSDF.Rd => rss_sample_w_replacement.Rd} | 9 +- ...RSSNRF.Rd => rss_sample_wo_replacement.Rd} | 9 +- man/sbs_pps_estimate.Rd | 5 +- man/sbs_pps_sample.Rd | 4 +- tests/testthat/test-RSSDF.R | 33 ------ tests/testthat/test-RSSNRF.R | 25 ---- .../{test-RSS.R => test-rss_sample.R} | 6 +- .../testthat/test-rss_sample_w_replacement.R | 33 ++++++ .../testthat/test-rss_sample_wo_replacement.R | 25 ++++ tests/testthat/test-sbs_pps_sample.R | 55 +++++---- 23 files changed, 288 insertions(+), 228 deletions(-) delete mode 100644 R/RSS.R delete mode 100644 R/RSSDF.R delete mode 100644 R/RSSNRF.R create mode 100644 R/rss_sample.R create mode 100644 examples/rss_estimate.R rename man/{RSS.Rd => rss_sample.Rd} (80%) rename man/{RSSDF.Rd => rss_sample_w_replacement.Rd} (74%) rename man/{RSSNRF.Rd => rss_sample_wo_replacement.Rd} (74%) delete mode 100644 tests/testthat/test-RSSDF.R delete mode 100644 tests/testthat/test-RSSNRF.R rename tests/testthat/{test-RSS.R => test-rss_sample.R} (76%) create mode 100644 tests/testthat/test-rss_sample_w_replacement.R create mode 100644 tests/testthat/test-rss_sample_wo_replacement.R diff --git a/R/ORD.R b/R/ORD.R index 42638b3..eea2fe7 100644 --- a/R/ORD.R +++ b/R/ORD.R @@ -1,6 +1,6 @@ ################################# # This function orders the units in comparison set -# in balanced ran ked set sample +# in balanced ranked set sample # u[1]: is the judgment rank to be measured # u[2:(H+1)]: Y values # u[H+2:(2H+1)]: auxilairy value diff --git a/R/RSS.R b/R/RSS.R deleted file mode 100644 index 4346352..0000000 --- a/R/RSS.R +++ /dev/null @@ -1,15 +0,0 @@ -#' Generate ranked set sampling (RSS) on the population provided. -#' -#' @inheritParams RSSDF -#' @param with_replacement A boolean which specifies whether to sample with replacement or not. -#' -#' @return A matrix with ranks from each ranker. -#' -RSS <- function(pop, n, H, K, with_replacement = FALSE) { - verify_boolean(with_replacement) - - if (with_replacement) { - return(RSSDF(pop, n, H, K)) - } - return(RSSNRF(pop, n, H, K)) -} diff --git a/R/RSSDF.R b/R/RSSDF.R deleted file mode 100644 index e98b024..0000000 --- a/R/RSSDF.R +++ /dev/null @@ -1,48 +0,0 @@ -#' Generate ranked set sampling (RSS) with replacement on the population provided. -#' -#' @param pop Population that will be sampled with an auxiliary parameter in the second column. -#' @param n Sample size. -#' @param H Set size for each raking group. -#' @param K Number of rankers. -#' -#' @return A matrix with ranks from each ranker. -#' -RSSDF <- function(pop, n, H, K) { - verify_rss_params(pop, n, H, K) - - n_sets <- n / H - y <- pop[, 1] - auxiliary_param <- pop[, 2] - n_population <- length(y) - rss_matrix <- matrix(0, ncol = (K + 1), nrow = n) - sample_index <- 1 - for (j in (1:n_sets)) { - for (h in (1:H)) { - sampled_ids <- sample(1:n_population, H) - sampled_y <- y[sampled_ids] - sampled_auxiliary <- auxiliary_param[sampled_ids] - - auxiliary_order <- order(sampled_auxiliary) - ordered_sampled_y <- sampled_y[auxiliary_order] - ordered_sampled_auxiliary <- sampled_auxiliary[auxiliary_order] - ordered_sample_ids <- sampled_ids[auxiliary_order] - - rss_matrix[sample_index, c(1, 2)] <- c(ordered_sampled_y[h], h) - base_auxiliary <- ordered_sampled_auxiliary[h] - auxiliary_wo_base <- auxiliary_param[-ordered_sample_ids[h]] - if (K > 1) { - for (k in (2:K)) { - comparison_set <- c(base_auxiliary, sample(auxiliary_wo_base, (H - 1))) - ordered_set <- comparison_set[order(comparison_set)] - rank_k <- which(ordered_set == base_auxiliary) - if (length(rank_k) > 1) { - rank_k <- sample(rank_k, 1) - } - rss_matrix[sample_index, (k + 1)] <- rank_k - } - } - sample_index <- sample_index + 1 - } - } - return(rss_matrix) -} diff --git a/R/RSSNRF.R b/R/RSSNRF.R deleted file mode 100644 index ffa9b4f..0000000 --- a/R/RSSNRF.R +++ /dev/null @@ -1,44 +0,0 @@ -#' Generate ranked set sampling (RSS) without replacement on the population provided. -#' -#' @inheritParams RSSDF -#' -#' @return A matrix with ranks from each ranker. -#' -RSSNRF <- function(pop, n, H, K) { - verify_rssnrf_params(pop, n, H, K) - - n_sets <- n / H - K1 <- K + 1 - rseq <- rep((1:H), times = n_sets) - popY <- pop[, 1] - N <- length(popY) - popAux <- pop[, 2] - popind <- 1:N - RSSM <- matrix(0, ncol = (K1), nrow = n) - - ind <- sample(popind, n * H) - setY <- matrix(popY[ind], ncol = H, nrow = n) - setX <- matrix(popAux[ind], ncol = H, nrow = n) - indexM <- matrix(ind, ncol = H, nrow = n) - setYX <- cbind(rseq, setY, setX, indexM) - ordYX <- t(apply(setYX, 1, ORD)) - RSSM[, c(1, 2)] <- c(ordYX[, 1], rseq) - - Yind <- ordYX[, 3] - redpopind <- popind[-Yind] - Xh <- ordYX[, 2] - if (K1 > 2) { - for (k in (3:K1)) { - indk <- sample(redpopind, n * (H - 1)) - setX <- matrix(popAux[indk], ncol = (H - 1), nrow = n) - indexM <- matrix(indk, ncol = (H - 1), nrow = n) - indexM <- cbind(Yind, indexM) - setX <- cbind(Xh, setX) - setYX <- cbind(Xh, setX, indexM) - RankK <- apply(setYX, 1, ORDk) - RSSM[, k] <- RankK - } - } - - return(RSSM) -} diff --git a/R/inclusion_probability.R b/R/inclusion_probability.R index 8e50cb0..50b88ca 100644 --- a/R/inclusion_probability.R +++ b/R/inclusion_probability.R @@ -44,17 +44,24 @@ calculate_first_order_prob <- function(i, size_measurement, n, total_size) { #' #' @param size_measurement Size measurements of population units. #' @param n Sample sizes (SBS sample size, PPS sample size). -#' @param parallelize A flag whether to parallelize the computational task. +#' @param n_cores The number of cores to be used for computational tasks (specify 0 for max). #' #' @return A vector of inclusion probabilities. #' -calculate_inclusion_prob <- function(size_measurement, n, parallelize = TRUE) { +calculate_inclusion_prob <- function(size_measurement, n, n_cores = getOption("n_cores", 1)) { n_population <- length(size_measurement) total_size <- sum(size_measurement) # switch between a single core and multiple cores - if (n_population > 250 && require(parallel) && parallelize) { + if (n_population > 250 && require(parallel) && n_cores > 1) { n_cores <- parallel::detectCores() - 1 + + # to avoid failure from devtools::check + is_core_limited <- Sys.getenv("_R_CHECK_LIMIT_CORES_", "FALSE") + if (is_core_limited == "TRUE") { + n_cores <- 2 + } + clusters <- parallel::makeCluster(n_cores) parallel::clusterExport(clusters, varlist = c("calculate_first_order_prob"), envir = environment()) diff --git a/R/rss_sample.R b/R/rss_sample.R new file mode 100644 index 0000000..fe3e547 --- /dev/null +++ b/R/rss_sample.R @@ -0,0 +1,112 @@ +#' Generate ranked set sampling (RSS) on the population provided. +#' +#' @inheritParams rss_sample_w_replacement +#' @param with_replacement A boolean which specifies whether to sample with replacement or not. +#' +#' @return A matrix with ranks from each ranker. +#' +rss_sample <- function(pop, n, H, K, with_replacement = FALSE) { + verify_boolean(with_replacement) + + if (with_replacement) { + return(rss_sample_w_replacement(pop, n, H, K)) + } + return(rss_sample_wo_replacement(pop, n, H, K)) +} + +#' Generate ranked set sampling (RSS) with replacement on the population provided. +#' +#' @param pop Population that will be sampled with an auxiliary parameter in the second column. +#' @param n Sample size. +#' @param H Set size for each raking group. +#' @param K Number of rankers. +#' +#' @return A matrix with ranks from each ranker. +#' @keywords internal +#' +rss_sample_w_replacement <- function(pop, n, H, K) { + verify_rss_params(pop, n, H, K) + + n_sets <- n / H + y <- pop[, 1] + auxiliary_param <- pop[, 2] + n_population <- dim(pop)[[1]] + rss_matrix <- matrix(0, ncol = (K + 1), nrow = n) + sample_index <- 1 + for (j in (1:n_sets)) { + for (h in (1:H)) { + sampled_ids <- sample(1:n_population, H) + sampled_y <- y[sampled_ids] + sampled_auxiliary <- auxiliary_param[sampled_ids] + + auxiliary_order <- order(sampled_auxiliary) + ordered_sampled_y <- sampled_y[auxiliary_order] + ordered_sampled_auxiliary <- sampled_auxiliary[auxiliary_order] + ordered_sample_ids <- sampled_ids[auxiliary_order] + + rss_matrix[sample_index, c(1, 2)] <- c(ordered_sampled_y[h], h) + base_auxiliary <- ordered_sampled_auxiliary[h] + auxiliary_wo_base <- auxiliary_param[-ordered_sample_ids[h]] + if (K > 1) { + for (k in (2:K)) { + comparison_set <- c(base_auxiliary, sample(auxiliary_wo_base, (H - 1))) + ordered_set <- comparison_set[order(comparison_set)] + rank_k <- which(ordered_set == base_auxiliary) + if (length(rank_k) > 1) { + rank_k <- sample(rank_k, 1) + } + rss_matrix[sample_index, (k + 1)] <- rank_k + } + } + sample_index <- sample_index + 1 + } + } + return(rss_matrix) +} + +#' Generate ranked set sampling (RSS) without replacement on the population provided. +#' +#' @inheritParams rss_sample_w_replacement +#' +#' @return A matrix with ranks from each ranker. +#' @keywords internal +#' +rss_sample_wo_replacement <- function(pop, n, H, K) { + verify_rss_wo_replace_params(pop, n, H, K) + + n_sets <- n / H + n_cols <- K + 1 + rseq <- rep((1:H), times = n_sets) + y <- pop[, 1] + n_population <- dim(pop)[[1]] + auxiliary_param <- pop[, 2] + all_indices <- 1:n_population + rss_matrix <- matrix(0, ncol = (n_cols), nrow = n) + + sample_indices <- sample(all_indices, n * H) + sample_y <- matrix(y[sample_indices], ncol = H, nrow = n) + sample_x <- matrix(auxiliary_param[sample_indices], ncol = H, nrow = n) + sample_index_matrix <- matrix(sample_indices, ncol = H, nrow = n) + sample_matrix <- cbind(rseq, sample_y, sample_x, sample_index_matrix) + yx_order <- t(apply(sample_matrix, 1, ORD)) + rss_matrix[, c(1, 2)] <- c(yx_order[, 1], rseq) + + # TODO: refactor + y_indices <- yx_order[, 3] + redpopind <- all_indices[-y_indices] + Xh <- yx_order[, 2] + if (n_cols > 2) { + for (k in (3:n_cols)) { + indk <- sample(redpopind, n * (H - 1)) + setX <- matrix(auxiliary_param[indk], ncol = (H - 1), nrow = n) + indexM <- matrix(indk, ncol = (H - 1), nrow = n) + indexM <- cbind(y_indices, indexM) + setX <- cbind(Xh, setX) + setYX <- cbind(Xh, setX, indexM) + RankK <- apply(setYX, 1, ORDk) + rss_matrix[, k] <- RankK + } + } + + return(rss_matrix) +} diff --git a/R/sbs_pps_estimate.R b/R/sbs_pps_estimate.R index ea3fbb9..fadcfc7 100644 --- a/R/sbs_pps_estimate.R +++ b/R/sbs_pps_estimate.R @@ -16,10 +16,12 @@ #' 6. Inclusion probability #' @param n_bootstraps Number of bootstrap samples. #' @param alpha The significance level. +#' @param n_cores The number of cores to be used for computational tasks (specify 0 for max). #' #' @return A summary data frame of the estimator. #' -sbs_pps_estimate <- function(population, n, y, sample_matrix, n_bootstraps = 100, alpha = 0.05) { +sbs_pps_estimate <- function( + population, n, y, sample_matrix, n_bootstraps = 100, alpha = 0.05, n_cores = getOption("n_cores", 1)) { verify_sbs_pps_estimate_params(population, n, y, sample_matrix, n_bootstraps, alpha) n_population <- dim(population)[1] @@ -32,7 +34,7 @@ sbs_pps_estimate <- function(population, n, y, sample_matrix, n_bootstraps = 100 estimated_mean <- round(sum(y / sample_matrix[, 6]) / n_population, digits = 3) empirical_population <- get_empirical_population(sample_matrix[, 1], population, y) - empirical_inclusion_prob <- calculate_inclusion_prob(empirical_population[, 3], n) + empirical_inclusion_prob <- calculate_inclusion_prob(empirical_population[, 3], n, n_cores) empirical_population <- data.frame(empirical_population, empirical_inclusion_prob) estimated_variance <- round(bootstrap_sample(empirical_population, n, n_bootstraps), digits = 3) diff --git a/R/sbs_pps_sample.R b/R/sbs_pps_sample.R index 4fe9c14..2460c06 100644 --- a/R/sbs_pps_sample.R +++ b/R/sbs_pps_sample.R @@ -6,16 +6,15 @@ #' 3. X2-coordinate of population unit #' 4. Size measurements of population units #' @param n Sample sizes (SBS sample size, PPS sample size). -#' @param parallelize A flag whether to parallelize the computational task. +#' @param n_cores The number of cores to be used for computational tasks (specify 0 for max). #' #' @return A named list of: #' - heatmap: heat map of the sample #' - sample: SBS PPS sample of the population #' -sbs_pps_sample <- function(population, n, parallelize = TRUE) { - verify_non_negative_whole(n[1], n[2], var_names = c("SBS sample size", "PPS sample size")) +sbs_pps_sample <- function(population, n, n_cores = getOption("n_cores", 1)) { + verify_non_negative_whole(n[1], n[2], n_cores, var_names = c("SBS sample size", "PPS sample size", "n_cores")) verify_matrix_like(population, n_dimensions = 2, n_rows = sum(n), n_cols = 4) - verify_boolean(parallelize) sampled <- get_sbs_pps_sample_indices(population[, c(1, 4)], n) sbs_pps_indices <- sampled$sbs_pps_indices @@ -26,7 +25,7 @@ sbs_pps_sample <- function(population, n, parallelize = TRUE) { measured_sizes <- population[, 4] weights <- c(rep(0, n[1]), measured_sizes[pps_indices] / sum(sizes_wo_sbs)) - inclusion_probabilities <- calculate_inclusion_prob(population[, 4], n, parallelize) + inclusion_probabilities <- calculate_inclusion_prob(population[, 4], n, n_cores) df_sample <- data.frame( sbs_pps_indices, x1 = population[sbs_pps_indices, 2], diff --git a/R/utils.R b/R/utils.R index 5400408..1a0cb5f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -121,7 +121,7 @@ verify_rss_params <- function(pop, n, H, K) { } } -verify_rssnrf_params <- function(pop, n, H, K) { +verify_rss_wo_replace_params <- function(pop, n, H, K) { verify_rss_params(pop, n, H, K) n_population <- dim(pop)[[1]] diff --git a/examples/rss_estimate.R b/examples/rss_estimate.R new file mode 100644 index 0000000..68c543c --- /dev/null +++ b/examples/rss_estimate.R @@ -0,0 +1,35 @@ +population_size <- 600 +# the number of samples to be ranked in each set +set_size <- 3 + + +sigma <- 4 +mu <- 10 +n_rankers <- 3 +# sample size +n <- 30 + +alpha <- 0.05 +rhos <- rep(0.75, n_rankers) +taus <- sigma * sqrt(1 / rhos^2 - 1) + +population <- qnorm((1:population_size) / (population_size + 1), mu, sigma) +rho <- 0.75 +tau <- sigma * sqrt(1 / rho^2 - 1) +x <- population + tau * rnorm(population_size, 0, 1) + +population <- cbind(population, x) +data <- rss_sample(population, n, set_size, n_rankers, with_replacement) +data <- data[order(data[, 2]), ] + +rss_estimates <- rss_jps_estimate( + data, + set_size, + method = "RSS", + confidence = 0.80, + with_replacement = FALSE, + model_based = FALSE, + population_size = population_size +) + +print(rss_estimates) diff --git a/man/JPSD2F.Rd b/man/JPSD2F.Rd index 5019d8b..48c7a57 100644 --- a/man/JPSD2F.Rd +++ b/man/JPSD2F.Rd @@ -7,17 +7,7 @@ JPSD2F(pop, n, H, tau, K, with_replacement = FALSE) } \arguments{ -\item{pop}{Population that will be sampled with an auxiliary parameter in the second column.} - -\item{n}{Sample size.} - -\item{H}{Set size for each raking group.} - \item{tau}{A parameter which controls ranking quality.} - -\item{K}{Number of rankers.} - -\item{with_replacement}{A boolean which specifies whether to sample with replacement or not.} } \value{ A matrix with ranks from each ranker. diff --git a/man/calculate_inclusion_prob.Rd b/man/calculate_inclusion_prob.Rd index c424bc1..940fc00 100644 --- a/man/calculate_inclusion_prob.Rd +++ b/man/calculate_inclusion_prob.Rd @@ -4,14 +4,18 @@ \alias{calculate_inclusion_prob} \title{Calculate inclusion probabilities.} \usage{ -calculate_inclusion_prob(size_measurement, n, parallelize = TRUE) +calculate_inclusion_prob( + size_measurement, + n, + n_cores = getOption("n_cores", 1) +) } \arguments{ \item{size_measurement}{Size measurements of population units.} \item{n}{Sample sizes (SBS sample size, PPS sample size).} -\item{parallelize}{A flag whether to parallelize the computational task.} +\item{n_cores}{The number of cores to be used for computational tasks (specify 0 for max).} } \value{ A vector of inclusion probabilities. diff --git a/man/RSS.Rd b/man/rss_sample.Rd similarity index 80% rename from man/RSS.Rd rename to man/rss_sample.Rd index 4a776a2..87867d9 100644 --- a/man/RSS.Rd +++ b/man/rss_sample.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RSS.R -\name{RSS} -\alias{RSS} +% Please edit documentation in R/rss_sample.R +\name{rss_sample} +\alias{rss_sample} \title{Generate ranked set sampling (RSS) on the population provided.} \usage{ -RSS(pop, n, H, K, with_replacement = FALSE) +rss_sample(pop, n, H, K, with_replacement = FALSE) } \arguments{ \item{pop}{Population that will be sampled with an auxiliary parameter in the second column.} diff --git a/man/RSSDF.Rd b/man/rss_sample_w_replacement.Rd similarity index 74% rename from man/RSSDF.Rd rename to man/rss_sample_w_replacement.Rd index f87f7f1..a2d3a7f 100644 --- a/man/RSSDF.Rd +++ b/man/rss_sample_w_replacement.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RSSDF.R -\name{RSSDF} -\alias{RSSDF} +% Please edit documentation in R/rss_sample.R +\name{rss_sample_w_replacement} +\alias{rss_sample_w_replacement} \title{Generate ranked set sampling (RSS) with replacement on the population provided.} \usage{ -RSSDF(pop, n, H, K) +rss_sample_w_replacement(pop, n, H, K) } \arguments{ \item{pop}{Population that will be sampled with an auxiliary parameter in the second column.} @@ -21,3 +21,4 @@ A matrix with ranks from each ranker. \description{ Generate ranked set sampling (RSS) with replacement on the population provided. } +\keyword{internal} diff --git a/man/RSSNRF.Rd b/man/rss_sample_wo_replacement.Rd similarity index 74% rename from man/RSSNRF.Rd rename to man/rss_sample_wo_replacement.Rd index a11b5af..a01d458 100644 --- a/man/RSSNRF.Rd +++ b/man/rss_sample_wo_replacement.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RSSNRF.R -\name{RSSNRF} -\alias{RSSNRF} +% Please edit documentation in R/rss_sample.R +\name{rss_sample_wo_replacement} +\alias{rss_sample_wo_replacement} \title{Generate ranked set sampling (RSS) without replacement on the population provided.} \usage{ -RSSNRF(pop, n, H, K) +rss_sample_wo_replacement(pop, n, H, K) } \arguments{ \item{pop}{Population that will be sampled with an auxiliary parameter in the second column.} @@ -21,3 +21,4 @@ A matrix with ranks from each ranker. \description{ Generate ranked set sampling (RSS) without replacement on the population provided. } +\keyword{internal} diff --git a/man/sbs_pps_estimate.Rd b/man/sbs_pps_estimate.Rd index 58c10eb..f821755 100644 --- a/man/sbs_pps_estimate.Rd +++ b/man/sbs_pps_estimate.Rd @@ -10,7 +10,8 @@ sbs_pps_estimate( y, sample_matrix, n_bootstraps = 100, - alpha = 0.05 + alpha = 0.05, + n_cores = getOption("n_cores", 1) ) } \arguments{ @@ -39,6 +40,8 @@ sbs_pps_estimate( \item{n_bootstraps}{Number of bootstrap samples.} \item{alpha}{The significance level.} + +\item{n_cores}{The number of cores to be used for computational tasks (specify 0 for max).} } \value{ A summary data frame of the estimator. diff --git a/man/sbs_pps_sample.Rd b/man/sbs_pps_sample.Rd index e19d4b7..c5f309a 100644 --- a/man/sbs_pps_sample.Rd +++ b/man/sbs_pps_sample.Rd @@ -4,7 +4,7 @@ \alias{sbs_pps_sample} \title{Generate probability proportional to size (PPS) and spatially balanced sampling on the population provided.} \usage{ -sbs_pps_sample(population, n, parallelize = TRUE) +sbs_pps_sample(population, n, n_cores = getOption("n_cores", 1)) } \arguments{ \item{population}{Population data frame to be sampled with 5 columns. @@ -17,7 +17,7 @@ sbs_pps_sample(population, n, parallelize = TRUE) \item{n}{Sample sizes (SBS sample size, PPS sample size).} -\item{parallelize}{A flag whether to parallelize the computational task.} +\item{n_cores}{The number of cores to be used for computational tasks (specify 0 for max).} } \value{ A named list of: diff --git a/tests/testthat/test-RSSDF.R b/tests/testthat/test-RSSDF.R deleted file mode 100644 index ee658fc..0000000 --- a/tests/testthat/test-RSSDF.R +++ /dev/null @@ -1,33 +0,0 @@ -test_that("RSSDF has a correct output.", { - skip_if(getRversion() < "3.4") - matrix_ <- matrix(1:200, ncol = 2) - - rss_matrix <- RSSDF(matrix_, 100, 10, 2) - expect_equal(dim(rss_matrix), c(100, 3)) - expect_equal(sort(unique(rss_matrix[, 2])), 1:10) - - sample_counts_in_sets <- table(rss_matrix[, 2]) - expect_equal(sample_counts_in_sets[[2]], 10) - expect_equal(table(sample_counts_in_sets)[[1]], 10) - - # # # in case we are supporting `n %% H > 0` - # rss_matrix_with_dropped_sample <- RSSDF(population, 100, 11, 2) - # expect_equal(sort(unique(rss_matrix_with_dropped_sample[, 2])), 0:11) - # - # sample_counts_in_sets <- table(rss_matrix_with_dropped_sample[, 2]) - # expect_equal(sample_counts_in_sets[[2]], 9) - # expect_equal(table(sample_counts_in_sets)[[2]], 11) -}) - -test_that("Inputs are valid.", { - matrix_ <- matrix(1:1000, ncol = 2) - - expect_error(RSSDF(1:1000, 100, 10, 1), "`pop` must be a 2-dimension matrix-like object.") - expect_error(RSSDF(matrix(1:1000), 100, 10, 1), "`pop` must have at least 2 columns.") - expect_error(RSSDF(matrix_, -100, 10, 1), "`n` must be a positive whole number.") - expect_error(RSSDF(matrix_, 100, -10, 1), "`H` must be a positive whole number.") - expect_error(RSSDF(matrix_, 100, 10, -1), "`K` must be a positive whole number.") - expect_error(RSSDF(matrix_, 1000, 10, 1), "`pop` must have at least `n` rows.") - expect_error(RSSDF(matrix_, 5, 11, 1), "`n` must >= `H`.") - expect_error(RSSDF(matrix_, 5, 4, 1), "`n` must be a multiple of `H`.") -}) diff --git a/tests/testthat/test-RSSNRF.R b/tests/testthat/test-RSSNRF.R deleted file mode 100644 index 8abee7f..0000000 --- a/tests/testthat/test-RSSNRF.R +++ /dev/null @@ -1,25 +0,0 @@ -test_that("RSSDF has a correct output.", { - skip_if(getRversion() < "3.4") - matrix_ <- matrix(1:200, ncol = 2) - - rss_matrix <- RSSNRF(matrix_, 50, 2, 2) - expect_equal(dim(rss_matrix), c(50, 3)) - expect_equal(sort(unique(rss_matrix[, 2])), 1:2) - - sample_counts_in_sets <- table(rss_matrix[, 2]) - expect_equal(sample_counts_in_sets[[2]], 25) - expect_equal(table(sample_counts_in_sets)[[1]], 2) -}) - -test_that("Inputs are valid.", { - matrix_ <- matrix(1:50, ncol = 2) - - expect_error(RSSNRF(1:10, 10, 2, 1), "`pop` must be a 2-dimension matrix-like object.") - expect_error(RSSNRF(matrix(1:10), 2, 10, 1), "`pop` must have at least 2 columns.") - expect_error(RSSNRF(matrix_, -10, 2, 1), "`n` must be a positive whole number.") - expect_error(RSSNRF(matrix_, 10, -2, 1), "`H` must be a positive whole number.") - expect_error(RSSNRF(matrix_, 10, 2, -1), "`K` must be a positive whole number.") - expect_error(RSSNRF(matrix_, 5, 11, 1), "`n` must >= `H`.") - expect_error(RSSNRF(matrix_, 5, 4, 1), "`n` must be a multiple of `H`.") - expect_error(RSSNRF(matrix_, 20, 5, 1), "The number of population must be at least `nH`.") -}) diff --git a/tests/testthat/test-RSS.R b/tests/testthat/test-rss_sample.R similarity index 76% rename from tests/testthat/test-RSS.R rename to tests/testthat/test-rss_sample.R index 2b4f7fe..8bf1a25 100644 --- a/tests/testthat/test-RSS.R +++ b/tests/testthat/test-rss_sample.R @@ -2,7 +2,7 @@ test_that("RSS has a correct output.", { skip_if(getRversion() < "3.4") matrix_ <- matrix(1:200, ncol = 2) - rss_matrix_w_replacement <- RSS(matrix_, 100, 10, 2, TRUE) + rss_matrix_w_replacement <- rss_sample(matrix_, 100, 10, 2, TRUE) expect_equal(dim(rss_matrix_w_replacement), c(100, 3)) expect_equal(sort(unique(rss_matrix_w_replacement[, 2])), 1:10) @@ -10,7 +10,7 @@ test_that("RSS has a correct output.", { expect_equal(sample_counts_in_sets[[2]], 10) expect_equal(table(sample_counts_in_sets)[[1]], 10) - rss_matrix_wo_replacement <- RSS(matrix_, 10, 5, 2) + rss_matrix_wo_replacement <- rss_sample(matrix_, 10, 5, 2) expect_equal(dim(rss_matrix_wo_replacement), c(10, 3)) expect_equal(sort(unique(rss_matrix_wo_replacement[, 2])), 1:5) @@ -20,5 +20,5 @@ test_that("RSS has a correct output.", { }) test_that("Inputs are valid.", { - expect_error(RSS(1:10, -100, 10, 1, "T"), "`with_replacement` must be a boolean.") + expect_error(rss_sample(1:10, -100, 10, 1, "T"), "`with_replacement` must be a boolean.") }) diff --git a/tests/testthat/test-rss_sample_w_replacement.R b/tests/testthat/test-rss_sample_w_replacement.R new file mode 100644 index 0000000..67bd9fe --- /dev/null +++ b/tests/testthat/test-rss_sample_w_replacement.R @@ -0,0 +1,33 @@ +test_that("RSSDF has a correct output.", { + skip_if(getRversion() < "3.4") + matrix_ <- matrix(1:200, ncol = 2) + + rss_matrix <- rss_sample_w_replacement(matrix_, 100, 10, 2) + expect_equal(dim(rss_matrix), c(100, 3)) + expect_equal(sort(unique(rss_matrix[, 2])), 1:10) + + sample_counts_in_sets <- table(rss_matrix[, 2]) + expect_equal(sample_counts_in_sets[[2]], 10) + expect_equal(table(sample_counts_in_sets)[[1]], 10) + + # # # in case we are supporting `n %% H > 0` + # rss_matrix_with_dropped_sample <- RSSDF(population, 100, 11, 2) + # expect_equal(sort(unique(rss_matrix_with_dropped_sample[, 2])), 0:11) + # + # sample_counts_in_sets <- table(rss_matrix_with_dropped_sample[, 2]) + # expect_equal(sample_counts_in_sets[[2]], 9) + # expect_equal(table(sample_counts_in_sets)[[2]], 11) +}) + +test_that("Inputs are valid.", { + matrix_ <- matrix(1:1000, ncol = 2) + + expect_error(rss_sample_w_replacement(1:1000, 100, 10, 1), "`pop` must be a 2-dimension matrix-like object.") + expect_error(rss_sample_w_replacement(matrix(1:1000), 100, 10, 1), "`pop` must have at least 2 columns.") + expect_error(rss_sample_w_replacement(matrix_, -100, 10, 1), "`n` must be a positive whole number.") + expect_error(rss_sample_w_replacement(matrix_, 100, -10, 1), "`H` must be a positive whole number.") + expect_error(rss_sample_w_replacement(matrix_, 100, 10, -1), "`K` must be a positive whole number.") + expect_error(rss_sample_w_replacement(matrix_, 1000, 10, 1), "`pop` must have at least `n` rows.") + expect_error(rss_sample_w_replacement(matrix_, 5, 11, 1), "`n` must >= `H`.") + expect_error(rss_sample_w_replacement(matrix_, 5, 4, 1), "`n` must be a multiple of `H`.") +}) diff --git a/tests/testthat/test-rss_sample_wo_replacement.R b/tests/testthat/test-rss_sample_wo_replacement.R new file mode 100644 index 0000000..c888ce3 --- /dev/null +++ b/tests/testthat/test-rss_sample_wo_replacement.R @@ -0,0 +1,25 @@ +test_that("RSSDF has a correct output.", { + skip_if(getRversion() < "3.4") + matrix_ <- matrix(1:200, ncol = 2) + + rss_matrix <- rss_sample_wo_replacement(matrix_, 50, 2, 2) + expect_equal(dim(rss_matrix), c(50, 3)) + expect_equal(sort(unique(rss_matrix[, 2])), 1:2) + + sample_counts_in_sets <- table(rss_matrix[, 2]) + expect_equal(sample_counts_in_sets[[2]], 25) + expect_equal(table(sample_counts_in_sets)[[1]], 2) +}) + +test_that("Inputs are valid.", { + matrix_ <- matrix(1:50, ncol = 2) + + expect_error(rss_sample_wo_replacement(1:10, 10, 2, 1), "`pop` must be a 2-dimension matrix-like object.") + expect_error(rss_sample_wo_replacement(matrix(1:10), 2, 10, 1), "`pop` must have at least 2 columns.") + expect_error(rss_sample_wo_replacement(matrix_, -10, 2, 1), "`n` must be a positive whole number.") + expect_error(rss_sample_wo_replacement(matrix_, 10, -2, 1), "`H` must be a positive whole number.") + expect_error(rss_sample_wo_replacement(matrix_, 10, 2, -1), "`K` must be a positive whole number.") + expect_error(rss_sample_wo_replacement(matrix_, 5, 11, 1), "`n` must >= `H`.") + expect_error(rss_sample_wo_replacement(matrix_, 5, 4, 1), "`n` must be a multiple of `H`.") + expect_error(rss_sample_wo_replacement(matrix_, 20, 5, 1), "The number of population must be at least `nH`.") +}) diff --git a/tests/testthat/test-sbs_pps_sample.R b/tests/testthat/test-sbs_pps_sample.R index 5bed026..6f8eddf 100644 --- a/tests/testthat/test-sbs_pps_sample.R +++ b/tests/testthat/test-sbs_pps_sample.R @@ -96,27 +96,40 @@ test_that("Each sample sizes must be non-negative whole numbers.", { ) }) -test_that("parallelize must be a boolean.", { +test_that("The number of cores must be non-negative whole number.", { population <- matrix(1:48, ncol = 4) sample_sizes <- c(3, 3) - - expect_error( - sbs_pps_sample(population, sample_sizes, 0), - "`parallelize` must be a boolean." - ) - - expect_error( - sbs_pps_sample(population, sample_sizes, "TRUE"), - "`parallelize` must be a boolean." - ) - - expect_error( - sbs_pps_sample(population, sample_sizes, NULL), - "`parallelize` must be a boolean." - ) - - expect_error( - sbs_pps_sample(population, sample_sizes, NA), - "`parallelize` must be a boolean." - ) + invalid_n_cores <- list(-1, 0.1, NULL, NA, FALSE, "1") + + for (invalid in invalid_n_cores) { + expect_error( + sbs_pps_sample(population, sample_sizes, invalid), + "`n_cores` must be a non-negative whole number." + ) + } + + # expect_error( + # sbs_pps_sample(population, sample_sizes, -1), + # "`parallelize` must be a boolean." + # ) + # + # expect_error( + # sbs_pps_sample(population, sample_sizes, 0.1), + # "`parallelize` must be a boolean." + # ) + # + # expect_error( + # sbs_pps_sample(population, sample_sizes, 0.1), + # "`parallelize` must be a boolean." + # ) + # + # expect_error( + # sbs_pps_sample(population, sample_sizes, NULL), + # "`parallelize` must be a boolean." + # ) + # + # expect_error( + # sbs_pps_sample(population, sample_sizes, NA), + # "`parallelize` must be a boolean." + # ) }) From 2225d78a1adf4888ec86cfa4264d5e932d6dba1b Mon Sep 17 00:00:00 2001 From: Wasin Pipattungsakul Date: Mon, 20 May 2024 17:53:16 +0930 Subject: [PATCH 06/13] add examples to exported functions, refactor --- NAMESPACE | 6 +- R/JPSD2F.R | 28 ------ R/{jps_estimator.R => jps_estimate.R} | 6 +- ...timator_single.R => jps_estimate_single.R} | 2 +- R/jps_sample.R | 60 ++++++++++++ R/{RSSEF.R => rss_estimate.R} | 2 +- R/rss_jps_estimate.R | 91 ++++++++++++++---- R/rss_sample.R | 32 +++++++ R/sbs_pps_estimate.R | 30 ++++++ R/sbs_pps_sample.R | 29 ++++++ example.R | 94 ------------------- examples/rss_estimate.R | 35 ------- man/JPSD2F.Rd | 17 ---- man/{jps_estimator.Rd => jps_estimate.Rd} | 8 +- ...mator_single.Rd => jps_estimate_single.Rd} | 8 +- man/jps_sample.Rd | 58 ++++++++++++ man/{RSSEF.Rd => rss_estimate.Rd} | 8 +- man/rss_jps_estimate.Rd | 75 +++++++++++++-- man/rss_sample.Rd | 32 +++++++ man/sbs_pps_estimate.Rd | 30 ++++++ man/sbs_pps_sample.Rd | 29 ++++++ tests/testthat/test-JPSD2F.R | 24 ----- tests/testthat/test-jps_sample.R | 24 +++++ 23 files changed, 490 insertions(+), 238 deletions(-) delete mode 100644 R/JPSD2F.R rename R/{jps_estimator.R => jps_estimate.R} (97%) rename R/{jps_estimator_single.R => jps_estimate_single.R} (98%) create mode 100644 R/jps_sample.R rename R/{RSSEF.R => rss_estimate.R} (98%) delete mode 100644 example.R delete mode 100644 examples/rss_estimate.R delete mode 100644 man/JPSD2F.Rd rename man/{jps_estimator.Rd => jps_estimate.Rd} (81%) rename man/{jps_estimator_single.Rd => jps_estimate_single.Rd} (88%) create mode 100644 man/jps_sample.Rd rename man/{RSSEF.Rd => rss_estimate.Rd} (80%) delete mode 100644 tests/testthat/test-JPSD2F.R create mode 100644 tests/testthat/test-jps_sample.R diff --git a/NAMESPACE b/NAMESPACE index 187a7a2..eabfa89 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,11 @@ # Generated by roxygen2: do not edit by hand -export(jps_estimator) +export(jps_estimate) +export(jps_sample) export(rss_jps_estimate) +export(rss_sample) +export(sbs_pps_estimate) +export(sbs_pps_sample) importFrom(Rcpp,sourceCpp) importFrom(stats,aggregate) importFrom(stats,qt) diff --git a/R/JPSD2F.R b/R/JPSD2F.R deleted file mode 100644 index 75d06dd..0000000 --- a/R/JPSD2F.R +++ /dev/null @@ -1,28 +0,0 @@ -#' Generate JPS sampling without replacement on the provided population. -#' -#' @inheritParams RSS -#' @param tau A parameter which controls ranking quality. -#' -#' @return A matrix with ranks from each ranker. -#' -JPSD2F <- function(pop, n, H, tau, K, with_replacement = FALSE) { - verify_jps_params(pop, n, H, tau, K, with_replacement) - - sampling_matrix <- matrix(sample(pop, n * H, replace = with_replacement), ncol = H, nrow = n) - - # rank each SRS unit post experimentally - jps_matrix <- matrix(0, ncol = K + 1, nrow = n) - for (i in (1:n)) { - comparison_set <- sampling_matrix[i, ] - ranks <- rep(0, K) - for (k in (1:K)) { - # adjust for ranking, Dell and Clutter - adjusted_set <- comparison_set + tau[k] * rnorm(H, 0, 1) - ranks[k] <- rank(adjusted_set)[1] - } - jps_matrix[i, ] <- c(comparison_set[1], ranks) - } - - colnames(jps_matrix) <- c("Y", paste0("R", 1:K)) - return(jps_matrix) -} diff --git a/R/jps_estimator.R b/R/jps_estimate.R similarity index 97% rename from R/jps_estimator.R rename to R/jps_estimate.R index 44fbebe..cae5c3a 100644 --- a/R/jps_estimator.R +++ b/R/jps_estimate.R @@ -15,7 +15,7 @@ #' confidence intervals. #' @export #' -jps_estimator <- function(data, set_size, replace = TRUE, model_based, N, alpha) { +jps_estimate <- function(data, set_size, replace = TRUE, model_based, N, alpha) { n_rankers <- ncol(data) - 1 if (!replace && is.null(N)) { @@ -57,7 +57,7 @@ jps_estimator <- function(data, set_size, replace = TRUE, model_based, N, alpha) ranks <- data[, -1] y <- data[, 1] if (n_rankers == 1) { - jps_estimates <- jps_estimator_single( + jps_estimates <- jps_estimate_single( ranks, y, set_size, N, coefd, coef_del, replace, model_based, n_rankers ) estimators <- c("JPS", "SRS") @@ -80,7 +80,7 @@ jps_estimator <- function(data, set_size, replace = TRUE, model_based, N, alpha) jps_estimates <- apply( ranks, 2, - jps_estimator_single, + jps_estimate_single, y = y, set_size = set_size, N = N, diff --git a/R/jps_estimator_single.R b/R/jps_estimate_single.R similarity index 98% rename from R/jps_estimator_single.R rename to R/jps_estimate_single.R index 620de0e..b3e6b9c 100644 --- a/R/jps_estimator_single.R +++ b/R/jps_estimate_single.R @@ -16,7 +16,7 @@ #' confidence intervals. #' @keywords internal #' -jps_estimator_single <- function(ranks, y, set_size, N, coef, coef_del, replace, model_based, K) { +jps_estimate_single <- function(ranks, y, set_size, N, coef, coef_del, replace, model_based, K) { y_ij <- expand.grid(y, y) # count ranks including zeros diff --git a/R/jps_sample.R b/R/jps_sample.R new file mode 100644 index 0000000..a1d92c0 --- /dev/null +++ b/R/jps_sample.R @@ -0,0 +1,60 @@ +#' Generate JPS sampling on the provided population. +#' +#' @inheritParams rss_sample +#' @param tau A parameter which controls ranking quality. +#' +#' @return A matrix with ranks from each ranker. +#' @export +#' +#' @examples +#' set.seed(112) +#' population_size <- 600 +#' # the number of samples to be ranked in each set +#' H <- 3 +#' +#' with_replacement <- FALSE +#' sigma <- 4 +#' mu <- 10 +#' n_rankers <- 3 +#' # sample size +#' n <- 10 +#' +#' rhos <- rep(0.75, n_rankers) +#' taus <- sigma * sqrt(1 / rhos^2 - 1) +#' +#' population <- qnorm((1:population_size) / (population_size + 1), mu, sigma) +#' jps_sample(population, n, H, taus, n_rankers, with_replacement) +#' #> Y R1 R2 R3 +#' #> [1,] 6.384461 1 2 2 +#' #> [2,] 1.485141 1 1 1 +#' #> [3,] 13.640711 2 3 2 +#' #> [4,] 15.809136 3 3 2 +#' #> [5,] 6.769463 2 2 1 +#' #> [6,] 14.355524 3 3 3 +#' #> [7,] 10.729740 2 1 3 +#' #> [8,] 6.152453 1 1 1 +#' #> [9,] 8.701285 2 1 2 +#' #> [10,] 13.323884 3 3 3 +#' +jps_sample <- function(pop, n, H, tau, K, with_replacement = FALSE) { + verify_jps_params(pop, n, H, tau, K, with_replacement) + + sampling_matrix <- matrix(sample(pop, n * H, replace = with_replacement), ncol = H, nrow = n) + + # rank each SRS unit post experimentally + jps_matrix <- matrix(0, ncol = K + 1, nrow = n) + for (i in (1:n)) { + comparison_set <- sampling_matrix[i, ] + ranks <- rep(0, K) + for (k in (1:K)) { + # adjust for ranking, Dell and Clutter + adjusted_set <- comparison_set + tau[k] * rnorm(H, 0, 1) + ranks[k] <- rank(adjusted_set)[1] + } + jps_matrix[i, ] <- c(comparison_set[1], ranks) + } + + colnames(jps_matrix) <- c("Y", paste0("R", 1:K)) + return(jps_matrix) +} +#' @export diff --git a/R/RSSEF.R b/R/rss_estimate.R similarity index 98% rename from R/RSSEF.R rename to R/rss_estimate.R index 56f0b71..4a9c624 100644 --- a/R/RSSEF.R +++ b/R/rss_estimate.R @@ -20,7 +20,7 @@ #' @return #' @keywords internal #' -RSSEF <- function(data, set_size, replace, model, N, alpha) { +rss_estimate <- function(data, set_size, replace, model, N, alpha) { # unused variable # RM <- data[, -1] RV <- data[, 2] # We need to be careful about this. diff --git a/R/rss_jps_estimate.R b/R/rss_jps_estimate.R index 35e16e2..77a8bda 100644 --- a/R/rss_jps_estimate.R +++ b/R/rss_jps_estimate.R @@ -17,20 +17,81 @@ #' @export #' #' @examples +#' # JPS estimator +#' set.seed(112) +#' population_size <- 600 +#' # the number of samples to be ranked in each set +#' H <- 3 #' -#' # Compute the JPS estimators +#' with_replacement <- FALSE +#' sigma <- 4 +#' mu <- 10 +#' n_rankers <- 3 +#' # sample size +#' n <- 30 #' -#' jps_estimates <- rss_jps_estimate( -#' data = emergence_ranks, set_size = 4, -#' method = "JPS", confidence = 0.95, -#' replace = TRUE, model_based = FALSE, -#' pop_size = nrow(population) +#' rhos <- rep(0.75, n_rankers) +#' taus <- sigma * sqrt(1 / rhos^2 - 1) +#' population <- qnorm((1:population_size) / (population_size + 1), mu, sigma) +#' data <- RankedSetSampling::jps_sample(population, n, H, taus, n_rankers, with_replacement) +#' data <- data[order(data[, 2]), ] +#' +#' rss_jps_estimate( +#' data, +#' set_size = H, +#' method = "JPS", +#' confidence = 0.80, +#' replace = with_replacement, +#' model_based = FALSE, +#' pop_size = population_size +#' ) +#' #> Estimator Estimate Standard Error 80% Confidence intervals +#' #> 1 UnWeighted 9.570 0.526 8.88,10.26 +#' #> 2 Sd.Weighted 9.595 0.569 8.849,10.341 +#' #> 3 Aggregate Weight 9.542 0.500 8.887,10.198 +#' #> 4 JPS Estimate 9.502 0.650 8.651,10.354 +#' #> 5 SRS estimate 9.793 0.783 8.766,10.821 +#' #> 6 Minimum 9.542 0.500 8.887,10.198 +#' +#' # RSS estimator +#' set.seed(112) +#' population_size <- 600 +#' # the number of samples to be ranked in each set +#' H <- 3 +#' +#' with_replacement <- FALSE +#' sigma <- 4 +#' mu <- 10 +#' n_rankers <- 3 +#' # sample size +#' n <- 30 +#' +#' population <- qnorm((1:population_size) / (population_size + 1), mu, sigma) +#' rho <- 0.75 +#' tau <- sigma * sqrt(1 / rho^2 - 1) +#' x <- population + tau * rnorm(population_size, 0, 1) +#' +#' population <- cbind(population, x) +#' data <- RankedSetSampling::rss_sample(population, n, H, n_rankers, with_replacement) +#' data <- data[order(data[, 2]), ] +#' +#' rss_estimates <- rss_jps_estimate( +#' data, +#' set_size = H, +#' method = "RSS", +#' confidence = 0.80, +#' replace = with_replacement, +#' model_based = FALSE, +#' pop_size = population_size #' ) #' +#' print(rss_estimates) +#' #> Estimator point.est St.error 80% Confidence Interval +#' #> 1 RSS-1 9.153 0.766 8.148,10.158 +#' #> 2 Aggregate Weighted 9.064 0.652 8.209,9.919 +#' rss_jps_estimate <- function( data, set_size, method, confidence = 0.95, replace = TRUE, model_based = FALSE, pop_size = NULL) { - # Choose the type of estimators to output? - # Rename to estimate? Or something else? # Break confidence interval into two columns? # pop_size: nrow(data)*set_size <= pop_size, > 0, only relevant if replace = FALSE # method <- match.arg(toupper(method), c("JPS", "RSS")) @@ -38,18 +99,16 @@ rss_jps_estimate <- function( alpha <- 1 - confidence - if (method == "JPS") { - results <- jps_estimator(data, set_size, replace, model_based, pop_size, alpha) + results <- jps_estimate(data, set_size, replace, model_based, pop_size, alpha) } else if (method == "RSS") { - RV <- data[, 2] - GSV <- aggregate(RV, list(RV), length)$x + first_ranker_ranks <- data[, 2] + rank_count <- aggregate(first_ranker_ranks, list(first_ranker_ranks), length)$x - if (length(GSV) != set_size || min(GSV) <= 1) { - stop("In ranked set sample, first ranking method should have at least two observations in any judgment ranking group") + if (length(rank_count) != set_size || min(rank_count) <= 1) { + stop("For RSS, ranks from the first ranker must have at least two observations in any ranking group.") } - - results <- RSSEF(data, set_size, replace, model_based, pop_size, alpha) + results <- rss_estimate(data, set_size, replace, model_based, pop_size, alpha) } if (model_based) { diff --git a/R/rss_sample.R b/R/rss_sample.R index fe3e547..b714a4b 100644 --- a/R/rss_sample.R +++ b/R/rss_sample.R @@ -4,6 +4,38 @@ #' @param with_replacement A boolean which specifies whether to sample with replacement or not. #' #' @return A matrix with ranks from each ranker. +#' @export +#' +#' @examples +#' set.seed(112) +#' population_size <- 600 +#' # the number of samples to be ranked in each set +#' H <- 3 +#' +#' with_replacement <- FALSE +#' sigma <- 4 +#' mu <- 10 +#' n_rankers <- 3 +#' # sample size +#' n <- 9 +#' +#' population <- qnorm((1:population_size) / (population_size + 1), mu, sigma) +#' rho <- 0.75 +#' tau <- sigma * sqrt(1 / rho^2 - 1) +#' x <- population + tau * rnorm(population_size, 0, 1) +#' +#' population <- cbind(population, x) +#' rss_sample(population, n, H, n_rankers, with_replacement) +#' #> [,1] [,2] [,3] [,4] +#' #> [1,] 8.910625 1 3 1 +#' #> [2,] 12.317145 2 2 1 +#' #> [3,] 11.619746 3 3 2 +#' #> [4,] 8.307549 1 2 1 +#' #> [5,] 5.089992 2 2 3 +#' #> [6,] 7.233575 3 3 3 +#' #> [7,] 11.601654 1 2 2 +#' #> [8,] 9.134107 2 1 1 +#' #> [9,] 12.960431 3 3 1 #' rss_sample <- function(pop, n, H, K, with_replacement = FALSE) { verify_boolean(with_replacement) diff --git a/R/sbs_pps_estimate.R b/R/sbs_pps_estimate.R index fadcfc7..0ac418e 100644 --- a/R/sbs_pps_estimate.R +++ b/R/sbs_pps_estimate.R @@ -19,6 +19,36 @@ #' @param n_cores The number of cores to be used for computational tasks (specify 0 for max). #' #' @return A summary data frame of the estimator. +#' @export +#' +#' @examples +#' set.seed(112) +#' +#' # SBS sample size, PPS sample size +#' sample_sizes <- c(5, 5) +#' +#' n_population <- 233 +#' k <- 0:(n_population - 1) +#' x1 <- sample(1:13, n_population, replace = TRUE) / 13 +#' x2 <- sample(1:8, n_population, replace = TRUE) / 8 +#' y <- (x1 + x2) * runif(n = n_population, min = 1, max = 2) + 1 +#' measured_sizes <- y * runif(n = n_population, min = 0, max = 4) +#' +#' population <- matrix(cbind(k, x1, x2, measured_sizes), ncol = 4) +#' sample_result <- sbs_pps_sample(population, sample_sizes) +#' +#' # estimate the population mean and construct a confidence interval +#' df_sample <- sample_result$sample +#' sample_id <- df_sample[, 1] +#' y_sample <- y[sample_id] +#' +#' sbs_pps_estimates <- sbs_pps_estimate( +#' population, sample_sizes, y_sample, df_sample, +#' n_bootstrap = 100, alpha = 0.05 +#' ) +#' print(sbs_pps_estimates) +#' #> n1 n2 Estimate St.error 95% Confidence intervals +#' #> 1 5 5 2.849 0.1760682 2.451,3.247 #' sbs_pps_estimate <- function( population, n, y, sample_matrix, n_bootstraps = 100, alpha = 0.05, n_cores = getOption("n_cores", 1)) { diff --git a/R/sbs_pps_sample.R b/R/sbs_pps_sample.R index 2460c06..bdef29c 100644 --- a/R/sbs_pps_sample.R +++ b/R/sbs_pps_sample.R @@ -11,6 +11,35 @@ #' @return A named list of: #' - heatmap: heat map of the sample #' - sample: SBS PPS sample of the population +#' @export + +#' @examples +#' set.seed(112) +#' +#' # SBS sample size, PPS sample size +#' sample_sizes <- c(5, 5) +#' +#' n_population <- 233 +#' k <- 0:(n_population - 1) +#' x1 <- sample(1:13, n_population, replace = TRUE) / 13 +#' x2 <- sample(1:8, n_population, replace = TRUE) / 8 +#' y <- (x1 + x2) * runif(n = n_population, min = 1, max = 2) + 1 +#' measured_sizes <- y * runif(n = n_population, min = 0, max = 4) +#' +#' population <- matrix(cbind(k, x1, x2, measured_sizes), ncol = 4) +#' sample_result <- sbs_pps_sample(population, sample_sizes) +#' print(sample_result$sample) +#' #> sbs_pps_indices x1 x2 size weight inclusion_probability +#' #> 1 87 0.4615385 0.625 0.4665423 0.000000000 0.02319163 +#' #> 2 88 0.1538462 0.625 1.7389902 0.000000000 0.02790409 +#' #> 3 89 0.8461538 0.625 7.0815547 0.000000000 0.04749104 +#' #> 4 90 0.6923077 0.750 9.5428032 0.000000000 0.05640733 +#' #> 5 91 0.2307692 0.750 5.1375136 0.000000000 0.04039996 +#' #> 6 173 0.1538462 0.500 6.6400168 0.005024130 0.04588620 +#' #> 7 26 0.6153846 0.500 4.3146186 0.003264631 0.03738898 +#' #> 8 232 0.8461538 0.750 12.0057856 0.009084108 0.06526583 +#' #> 9 171 0.6153846 0.750 6.9029083 0.005223046 0.04684225 +#' #> 10 29 0.8461538 0.375 4.6324720 0.003505133 0.03855377 #' sbs_pps_sample <- function(population, n, n_cores = getOption("n_cores", 1)) { verify_non_negative_whole(n[1], n[2], n_cores, var_names = c("SBS sample size", "PPS sample size", "n_cores")) diff --git a/example.R b/example.R deleted file mode 100644 index 0eea57d..0000000 --- a/example.R +++ /dev/null @@ -1,94 +0,0 @@ -N <- 600 # population size -set_size <- 3 -H <- set_size # the number of samples to be ranked in each set -with_replacement <- FALSE - -model_based <- FALSE -sampling_method <- "JPS" # JPS, RSS - -sigma <- 4 # population standard deviation -mu <- 10 # population mean -n_rankers <- 3 # number of rankers -n <- 30 # sample size - -alpha <- 0.05 -rhos <- rep(0.75, n_rankers) -taus <- sigma * sqrt(1 / rhos^2 - 1) - -pop <- qnorm((1:N) / (N + 1), mu, sigma) -rho <- 0.75 -tau <- sigma * sqrt(1 / rho^2 - 1) -X <- pop + tau * rnorm(N, 0, 1) - -# JPS sampling -if (sampling_method == "JPS") { - data <- JPSD2F(pop, n, H, taus, n_rankers, with_replacement) -} - -# Ranked set sampling -if (sampling_method == "RSS") { - pop <- cbind(pop, X) - data <- RSS(pop, n, H, n_rankers, with_replacement) - data <- data[order(data[, 2]), ] -} - -ONE <- rss_jps_estimate(data, set_size, sampling_method, 0.80, with_replacement, model_based, N) - -w_or_wo <- " without " -if (with_replacement) { - w_or_wo <- " with " -} -print(paste0(sampling_method, " sample with number of rankers K=", n_rankers, w_or_wo, "replacement")) - -if (model_based == 0) { - print("Design based inference is developed") -} else { - print("Super-population model is used") -} -print(ONE) - - -# # Example case -# N <- 150 -# Setsize <- 3 -# H <- Setsize -# Replace <- 0 # ( if Replace=0, Design is D_0, if Replace =1, Design= D_2 ) -# Model <- 0 # ( if Model=0, Design based inference, if Model=1, super population model is used) -# sig <- 4 -# K <- 6 -# n <- 100 -# sim <- 50000 -# alpha <- 0.05 -# rhoM <- matrix(c( -# rep(0.8, K), -# rep(0.8, K / 2), rep(0.3, K / 2), -# rep(0.5, K), -# rep(0.8, K / 3), rep(0.5, K / 3), rep(0.2, K / 3), -# 1, rep(0.8, (K - 1)) -# ), nrow = 5, ncol = K, byrow = TRUE) -# rhoV <- rhoM[2, ] -# # rhoV=0.9 -# tauV <- sig * sqrt(1 / rhoV^2 - 1) -# # tau=rep(0.5,K) -# pop <- qnorm((1:N) / (N + 1), 50, 3) -# popmean <- mean(pop) -# storE <- matrix(0, ncol = 6, nrow = sim) -# storV <- matrix(0, ncol = 6, nrow = sim) -# EstimatorID <- c("UnWeighted", "Sd.Weighted", "Aggregate Weight", "JPS Estimate", "SRS estimate", "Minimum") -# colnames(storE) <- EstimatorID -# colnames(storV) <- EstimatorID -# # ( iter in (1:sim)){ -# Data0 <- JPSD0F(pop, n, Setsize, K, tauV) # this function generates JPS data from design D_0 -# # Data0 <- JPSD2F(pop, n, Setsize, tauV, N, K) # this function generates JPS data from design D_2 -# -# ESTREP0 <- JPSLF(Data0, Setsize, Replace, N, model = 0) # this function computes the estimators -# ESTREP1 <- JPSLF(testdata, Setsize, Replace, N, model = 0) # this function computes the estimators -# -# -# # storE[iter,]=ESTREP0[,2] -# # storV[iter,]=ESTREP0[,3] -# # } -# # print(apply(storE,2,var)) -# # print(apply(storV,2,mean)) -# -# print(ESTREP0) diff --git a/examples/rss_estimate.R b/examples/rss_estimate.R deleted file mode 100644 index 68c543c..0000000 --- a/examples/rss_estimate.R +++ /dev/null @@ -1,35 +0,0 @@ -population_size <- 600 -# the number of samples to be ranked in each set -set_size <- 3 - - -sigma <- 4 -mu <- 10 -n_rankers <- 3 -# sample size -n <- 30 - -alpha <- 0.05 -rhos <- rep(0.75, n_rankers) -taus <- sigma * sqrt(1 / rhos^2 - 1) - -population <- qnorm((1:population_size) / (population_size + 1), mu, sigma) -rho <- 0.75 -tau <- sigma * sqrt(1 / rho^2 - 1) -x <- population + tau * rnorm(population_size, 0, 1) - -population <- cbind(population, x) -data <- rss_sample(population, n, set_size, n_rankers, with_replacement) -data <- data[order(data[, 2]), ] - -rss_estimates <- rss_jps_estimate( - data, - set_size, - method = "RSS", - confidence = 0.80, - with_replacement = FALSE, - model_based = FALSE, - population_size = population_size -) - -print(rss_estimates) diff --git a/man/JPSD2F.Rd b/man/JPSD2F.Rd deleted file mode 100644 index 48c7a57..0000000 --- a/man/JPSD2F.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/JPSD2F.R -\name{JPSD2F} -\alias{JPSD2F} -\title{Generate JPS sampling without replacement on the provided population.} -\usage{ -JPSD2F(pop, n, H, tau, K, with_replacement = FALSE) -} -\arguments{ -\item{tau}{A parameter which controls ranking quality.} -} -\value{ -A matrix with ranks from each ranker. -} -\description{ -Generate JPS sampling without replacement on the provided population. -} diff --git a/man/jps_estimator.Rd b/man/jps_estimate.Rd similarity index 81% rename from man/jps_estimator.Rd rename to man/jps_estimate.Rd index 959971c..8311128 100644 --- a/man/jps_estimator.Rd +++ b/man/jps_estimate.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/jps_estimator.R -\name{jps_estimator} -\alias{jps_estimator} +% Please edit documentation in R/jps_estimate.R +\name{jps_estimate} +\alias{jps_estimate} \title{Computes the estimator for JPS data} \usage{ -jps_estimator(data, set_size, replace = TRUE, model_based, N, alpha) +jps_estimate(data, set_size, replace = TRUE, model_based, N, alpha) } \arguments{ \item{data}{The data to use for estimation.} diff --git a/man/jps_estimator_single.Rd b/man/jps_estimate_single.Rd similarity index 88% rename from man/jps_estimator_single.Rd rename to man/jps_estimate_single.Rd index de08b46..f3dc971 100644 --- a/man/jps_estimator_single.Rd +++ b/man/jps_estimate_single.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/jps_estimator_single.R -\name{jps_estimator_single} -\alias{jps_estimator_single} +% Please edit documentation in R/jps_estimate_single.R +\name{jps_estimate_single} +\alias{jps_estimate_single} \title{Computes the estimator and variance for each individual ranker} \usage{ -jps_estimator_single( +jps_estimate_single( ranks, y, set_size, diff --git a/man/jps_sample.Rd b/man/jps_sample.Rd new file mode 100644 index 0000000..8a8d66b --- /dev/null +++ b/man/jps_sample.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/jps_sample.R +\name{jps_sample} +\alias{jps_sample} +\title{Generate JPS sampling on the provided population.} +\usage{ +jps_sample(pop, n, H, tau, K, with_replacement = FALSE) +} +\arguments{ +\item{pop}{Population that will be sampled with an auxiliary parameter in the second column.} + +\item{n}{Sample size.} + +\item{H}{Set size for each raking group.} + +\item{tau}{A parameter which controls ranking quality.} + +\item{K}{Number of rankers.} + +\item{with_replacement}{A boolean which specifies whether to sample with replacement or not.} +} +\value{ +A matrix with ranks from each ranker. +} +\description{ +Generate JPS sampling on the provided population. +} +\examples{ +set.seed(112) +population_size <- 600 +# the number of samples to be ranked in each set +H <- 3 + +with_replacement <- FALSE +sigma <- 4 +mu <- 10 +n_rankers <- 3 +# sample size +n <- 10 + +rhos <- rep(0.75, n_rankers) +taus <- sigma * sqrt(1 / rhos^2 - 1) + +population <- qnorm((1:population_size) / (population_size + 1), mu, sigma) +jps_sample(population, n, H, taus, n_rankers, with_replacement) +#> Y R1 R2 R3 +#> [1,] 6.384461 1 2 2 +#> [2,] 1.485141 1 1 1 +#> [3,] 13.640711 2 3 2 +#> [4,] 15.809136 3 3 2 +#> [5,] 6.769463 2 2 1 +#> [6,] 14.355524 3 3 3 +#> [7,] 10.729740 2 1 3 +#> [8,] 6.152453 1 1 1 +#> [9,] 8.701285 2 1 2 +#> [10,] 13.323884 3 3 3 + +} diff --git a/man/RSSEF.Rd b/man/rss_estimate.Rd similarity index 80% rename from man/RSSEF.Rd rename to man/rss_estimate.Rd index 36a0b8d..003146d 100644 --- a/man/RSSEF.Rd +++ b/man/rss_estimate.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RSSEF.R -\name{RSSEF} -\alias{RSSEF} +% Please edit documentation in R/rss_estimate.R +\name{rss_estimate} +\alias{rss_estimate} \title{This function provides estimator for RSS data} \usage{ -RSSEF(data, set_size, replace, model, N, alpha) +rss_estimate(data, set_size, replace, model, N, alpha) } \arguments{ \item{data}{An n by (K+1) dimensional data matrix, the first column is Y-values, the next K coulumns are the ranks of K-ranking methods} diff --git a/man/rss_jps_estimate.Rd b/man/rss_jps_estimate.Rd index 7713442..78023a7 100644 --- a/man/rss_jps_estimate.Rd +++ b/man/rss_jps_estimate.Rd @@ -45,14 +45,77 @@ error and confidence intervals. Estimate means from RSS or JPS sample. } \examples{ +# JPS estimator +set.seed(112) +population_size <- 600 +# the number of samples to be ranked in each set +H <- 3 -# Compute the JPS estimators +with_replacement <- FALSE +sigma <- 4 +mu <- 10 +n_rankers <- 3 +# sample size +n <- 30 -jps_estimates <- rss_jps_estimate( - data = emergence_ranks, set_size = 4, - method = "JPS", confidence = 0.95, - replace = TRUE, model_based = FALSE, - pop_size = nrow(population) +rhos <- rep(0.75, n_rankers) +taus <- sigma * sqrt(1 / rhos^2 - 1) +population <- qnorm((1:population_size) / (population_size + 1), mu, sigma) +data <- RankedSetSampling::jps_sample(population, n, H, taus, n_rankers, with_replacement) +data <- data[order(data[, 2]), ] + +rss_jps_estimate( + data, + set_size = H, + method = "JPS", + confidence = 0.80, + replace = with_replacement, + model_based = FALSE, + pop_size = population_size ) +#> Estimator Estimate Standard Error 80\% Confidence intervals +#> 1 UnWeighted 9.570 0.526 8.88,10.26 +#> 2 Sd.Weighted 9.595 0.569 8.849,10.341 +#> 3 Aggregate Weight 9.542 0.500 8.887,10.198 +#> 4 JPS Estimate 9.502 0.650 8.651,10.354 +#> 5 SRS estimate 9.793 0.783 8.766,10.821 +#> 6 Minimum 9.542 0.500 8.887,10.198 + +# RSS estimator +set.seed(112) +population_size <- 600 +# the number of samples to be ranked in each set +H <- 3 + +with_replacement <- FALSE +sigma <- 4 +mu <- 10 +n_rankers <- 3 +# sample size +n <- 30 + +population <- qnorm((1:population_size) / (population_size + 1), mu, sigma) +rho <- 0.75 +tau <- sigma * sqrt(1 / rho^2 - 1) +x <- population + tau * rnorm(population_size, 0, 1) + +population <- cbind(population, x) +data <- RankedSetSampling::rss_sample(population, n, H, n_rankers, with_replacement) +data <- data[order(data[, 2]), ] + +rss_estimates <- rss_jps_estimate( + data, + set_size = H, + method = "RSS", + confidence = 0.80, + replace = with_replacement, + model_based = FALSE, + pop_size = population_size +) + +print(rss_estimates) +#> Estimator point.est St.error 80\% Confidence Interval +#> 1 RSS-1 9.153 0.766 8.148,10.158 +#> 2 Aggregate Weighted 9.064 0.652 8.209,9.919 } diff --git a/man/rss_sample.Rd b/man/rss_sample.Rd index 87867d9..5cdc3e8 100644 --- a/man/rss_sample.Rd +++ b/man/rss_sample.Rd @@ -23,3 +23,35 @@ A matrix with ranks from each ranker. \description{ Generate ranked set sampling (RSS) on the population provided. } +\examples{ +set.seed(112) +population_size <- 600 +# the number of samples to be ranked in each set +H <- 3 + +with_replacement <- FALSE +sigma <- 4 +mu <- 10 +n_rankers <- 3 +# sample size +n <- 9 + +population <- qnorm((1:population_size) / (population_size + 1), mu, sigma) +rho <- 0.75 +tau <- sigma * sqrt(1 / rho^2 - 1) +x <- population + tau * rnorm(population_size, 0, 1) + +population <- cbind(population, x) +rss_sample(population, n, H, n_rankers, with_replacement) +#> [,1] [,2] [,3] [,4] +#> [1,] 8.910625 1 3 1 +#> [2,] 12.317145 2 2 1 +#> [3,] 11.619746 3 3 2 +#> [4,] 8.307549 1 2 1 +#> [5,] 5.089992 2 2 3 +#> [6,] 7.233575 3 3 3 +#> [7,] 11.601654 1 2 2 +#> [8,] 9.134107 2 1 1 +#> [9,] 12.960431 3 3 1 + +} diff --git a/man/sbs_pps_estimate.Rd b/man/sbs_pps_estimate.Rd index f821755..b5f166c 100644 --- a/man/sbs_pps_estimate.Rd +++ b/man/sbs_pps_estimate.Rd @@ -49,3 +49,33 @@ A summary data frame of the estimator. \description{ Compute an estimator for SBS PPS sampled data. } +\examples{ +set.seed(112) + +# SBS sample size, PPS sample size +sample_sizes <- c(5, 5) + +n_population <- 233 +k <- 0:(n_population - 1) +x1 <- sample(1:13, n_population, replace = TRUE) / 13 +x2 <- sample(1:8, n_population, replace = TRUE) / 8 +y <- (x1 + x2) * runif(n = n_population, min = 1, max = 2) + 1 +measured_sizes <- y * runif(n = n_population, min = 0, max = 4) + +population <- matrix(cbind(k, x1, x2, measured_sizes), ncol = 4) +sample_result <- sbs_pps_sample(population, sample_sizes) + +# estimate the population mean and construct a confidence interval +df_sample <- sample_result$sample +sample_id <- df_sample[, 1] +y_sample <- y[sample_id] + +sbs_pps_estimates <- sbs_pps_estimate( + population, sample_sizes, y_sample, df_sample, + n_bootstrap = 100, alpha = 0.05 +) +print(sbs_pps_estimates) +#> n1 n2 Estimate St.error 95\% Confidence intervals +#> 1 5 5 2.849 0.1760682 2.451,3.247 + +} diff --git a/man/sbs_pps_sample.Rd b/man/sbs_pps_sample.Rd index c5f309a..a27c48b 100644 --- a/man/sbs_pps_sample.Rd +++ b/man/sbs_pps_sample.Rd @@ -29,3 +29,32 @@ A named list of: \description{ Generate probability proportional to size (PPS) and spatially balanced sampling on the population provided. } +\examples{ +set.seed(112) + +# SBS sample size, PPS sample size +sample_sizes <- c(5, 5) + +n_population <- 233 +k <- 0:(n_population - 1) +x1 <- sample(1:13, n_population, replace = TRUE) / 13 +x2 <- sample(1:8, n_population, replace = TRUE) / 8 +y <- (x1 + x2) * runif(n = n_population, min = 1, max = 2) + 1 +measured_sizes <- y * runif(n = n_population, min = 0, max = 4) + +population <- matrix(cbind(k, x1, x2, measured_sizes), ncol = 4) +sample_result <- sbs_pps_sample(population, sample_sizes) +print(sample_result$sample) +#> sbs_pps_indices x1 x2 size weight inclusion_probability +#> 1 87 0.4615385 0.625 0.4665423 0.000000000 0.02319163 +#> 2 88 0.1538462 0.625 1.7389902 0.000000000 0.02790409 +#> 3 89 0.8461538 0.625 7.0815547 0.000000000 0.04749104 +#> 4 90 0.6923077 0.750 9.5428032 0.000000000 0.05640733 +#> 5 91 0.2307692 0.750 5.1375136 0.000000000 0.04039996 +#> 6 173 0.1538462 0.500 6.6400168 0.005024130 0.04588620 +#> 7 26 0.6153846 0.500 4.3146186 0.003264631 0.03738898 +#> 8 232 0.8461538 0.750 12.0057856 0.009084108 0.06526583 +#> 9 171 0.6153846 0.750 6.9029083 0.005223046 0.04684225 +#> 10 29 0.8461538 0.375 4.6324720 0.003505133 0.03855377 + +} diff --git a/tests/testthat/test-JPSD2F.R b/tests/testthat/test-JPSD2F.R deleted file mode 100644 index d5f301d..0000000 --- a/tests/testthat/test-JPSD2F.R +++ /dev/null @@ -1,24 +0,0 @@ -test_that("JPSD2F has a correct output.", { - skip_if(getRversion() < "3.4") - population <- 1:100 - k <- 3 - tau <- rep(1.2, 3) - - jps_matrix <- JPSD2F(population, 30, 2, tau, k) - expect_equal(dim(jps_matrix), c(30, 4)) - expect_equal(sort(unique(jps_matrix[, 2])), 1:2) -}) - -test_that("Inputs are valid.", { - population <- 1:100 - k <- 3 - tau <- rep(1.2, 3) - - expect_error(JPSD2F(population, -30, 2, tau, k), "`n` must be a positive whole number.") - expect_error(JPSD2F(population, 30, -2, tau, k), "`H` must be a positive whole number.") - expect_error(JPSD2F(population, 30, 2, tau, -k), "`K` must be a positive whole number.") - expect_error(JPSD2F(population, 30, 2, tau, 4), "The length of `tau` must equal to `K`.") - expect_error(JPSD2F(population, 50, 4, tau, k), "The number of population must be at least `nH`.") - expect_error(JPSD2F(population, 10, 20, tau, k), "`n` must >= `H`.") - expect_error(JPSD2F(population, 30, 2, tau, k, "T"), "`with_replacement` must be a boolean.") -}) diff --git a/tests/testthat/test-jps_sample.R b/tests/testthat/test-jps_sample.R new file mode 100644 index 0000000..dab78f1 --- /dev/null +++ b/tests/testthat/test-jps_sample.R @@ -0,0 +1,24 @@ +test_that("JPS sample has a correct output.", { + skip_if(getRversion() < "3.4") + population <- 1:100 + k <- 3 + tau <- rep(1.2, 3) + + jps_matrix <- jps_sample(population, 30, 2, tau, k) + expect_equal(dim(jps_matrix), c(30, 4)) + expect_equal(sort(unique(jps_matrix[, 2])), 1:2) +}) + +test_that("Inputs are valid.", { + population <- 1:100 + k <- 3 + tau <- rep(1.2, 3) + + expect_error(jps_sample(population, -30, 2, tau, k), "`n` must be a positive whole number.") + expect_error(jps_sample(population, 30, -2, tau, k), "`H` must be a positive whole number.") + expect_error(jps_sample(population, 30, 2, tau, -k), "`K` must be a positive whole number.") + expect_error(jps_sample(population, 30, 2, tau, 4), "The length of `tau` must equal to `K`.") + expect_error(jps_sample(population, 50, 4, tau, k), "The number of population must be at least `nH`.") + expect_error(jps_sample(population, 10, 20, tau, k), "`n` must >= `H`.") + expect_error(jps_sample(population, 30, 2, tau, k, "T"), "`with_replacement` must be a boolean.") +}) From fd52242fdd66897d453cd34435e1ff0591d1270e Mon Sep 17 00:00:00 2001 From: Wasin Pipattungsakul Date: Tue, 21 May 2024 21:42:37 +0930 Subject: [PATCH 07/13] refactor rss estimate --- R/JPSED0F.R | 164 ++++++++++++------------ R/JPSLF.R | 210 +++++++++++++++---------------- R/ListF.R | 84 ++++++------- R/RSSED2F.R | 42 ------- R/jackknife_agreement_estimate.R | 15 +++ R/jps_estimate.R | 11 +- R/rss_estimate.R | 160 +++++++++++------------ R/rss_estimate_single.R | 38 ++++++ R/rss_sample.R | 12 +- R/utils.R | 15 ++- man/JPSED0F.Rd | 30 ----- man/JPSLF.Rd | 32 ----- man/ListF.Rd | 25 ---- man/RSSED2F.Rd | 21 ---- man/jps_sample.Rd | 2 - man/rss_estimate.Rd | 22 ++-- man/rss_estimate_single.Rd | 21 ++++ man/rss_sample.Rd | 8 +- tests/testthat/test-rss_sample.R | 2 +- 19 files changed, 412 insertions(+), 502 deletions(-) delete mode 100644 R/RSSED2F.R create mode 100644 R/jackknife_agreement_estimate.R create mode 100644 R/rss_estimate_single.R delete mode 100644 man/JPSED0F.Rd delete mode 100644 man/JPSLF.Rd delete mode 100644 man/ListF.Rd delete mode 100644 man/RSSED2F.Rd create mode 100644 man/rss_estimate_single.Rd diff --git a/R/JPSED0F.R b/R/JPSED0F.R index e33e864..943535a 100644 --- a/R/JPSED0F.R +++ b/R/JPSED0F.R @@ -12,46 +12,7 @@ #' #' @keywords internal #' -JPSED0F <- function(RV, Y, set_size, Coef, N, Replace, Model) { - ########################################################### - # This function Computes JPS estimator and its variance ## - ########################################################### - # JPSD0: - # First column: Response - # Second column: Ranks - # print(Coef) - RVD <- data.frame(RV) - M.est <- mean(aggregate(Y, RVD, mean)$x) # JPS estimate - YIYJ <- expand.grid(Y, Y) - GSample.Size <- aggregate(RV, data.frame(RV), length)$x - dn <- length(GSample.Size) - # print(dn) - GSample.Size1 <- GSample.Size[GSample.Size > 1] - dn.star <- length(GSample.Size1) - RhRhp <- expand.grid(RV, RV) - YIYJ2 <- (YIYJ[, 1] - YIYJ[, 2])^2 - group.mean <- aggregate(YIYJ2, RhRhp, mean) - Y2hhT2 <- group.mean[group.mean[, 1] - group.mean[, 2] == 0, ]$x - Y2hhT2 <- Y2hhT2[GSample.Size > 1] - T2s <- set_size * sum(Y2hhT2 * GSample.Size1^2 / (GSample.Size1 * (GSample.Size1 - 1))) / (2 * dn.star) - Y2hhT1 <- group.mean[group.mean[, 1] - group.mean[, 2] != 0, ]$x - T1s <- sum(Y2hhT1) / (2 * Coef[1] * dn^2) - VestD0 <- Coef[2] * T1s / (set_size - 1) + Coef[3] * T2s - if (Replace == 1) { - VEST <- Coef[2] * T2s + Coef[3] * (N - 1) * (T1s + T2s) / (N * (set_size - 1)) - if (VEST <= 0) VEST <- Coef[2] * T2s / 2 - } else { - VEST <- Coef[2] * T1s / (set_size - 1) + Coef[3] * T2s - } - if (Model == 1) { - VEST <- (T1s + T2s) / set_size^2 * ((-1 / N) + Coef[2] * set_size / (set_size - 1)) + T2s * ((Coef[3] + Coef[2]) + Coef[2] * set_size / (set_size - 1)) - if (VEST <= 0) VEST <- T2s * ((Coef[3] + Coef[2]) + Coef[2] * set_size / (set_size - 1)) - } - return(c(M.est, VEST)) -} - -# JPSED0F_tidyverse <- function(RV, Y, set_size, Coef, N, Replace, Model) { -# +# JPSED0F <- function(RV, Y, set_size, Coef, N, Replace, Model) { # ########################################################### # # This function Computes JPS estimator and its variance ## # ########################################################### @@ -59,24 +20,22 @@ JPSED0F <- function(RV, Y, set_size, Coef, N, Replace, Model) { # # First column: Response # # Second column: Ranks # # print(Coef) -# RVD <- data.frame(RV, Y) -# M.est <- mean(dplyr::summarise(dplyr::group_by(RVD, RV), x = mean(Y))$x) # JPS estimate +# RVD <- data.frame(RV) +# M.est <- mean(aggregate(Y, RVD, mean)$x) # JPS estimate # YIYJ <- expand.grid(Y, Y) -# GSample.Size <- dplyr::summarise(dplyr::group_by(RVD, RV), x = n())$x -# # dn <- length(GSample.Size) +# GSample.Size <- aggregate(RV, data.frame(RV), length)$x +# dn <- length(GSample.Size) # # print(dn) # GSample.Size1 <- GSample.Size[GSample.Size > 1] -# # dn.star <- length(GSample.Size1) +# dn.star <- length(GSample.Size1) # RhRhp <- expand.grid(RV, RV) # YIYJ2 <- (YIYJ[, 1] - YIYJ[, 2])^2 -# # group.mean <- aggregate(YIYJ2, RhRhp, mean) -# data <- cbind(RhRhp, YIYJ2) -# group.mean <- dplyr::summarise(dplyr::group_by(data, Var1, Var2), x = mean(YIYJ2)) +# group.mean <- aggregate(YIYJ2, RhRhp, mean) # Y2hhT2 <- group.mean[group.mean[, 1] - group.mean[, 2] == 0, ]$x # Y2hhT2 <- Y2hhT2[GSample.Size > 1] -# T2s <- set_size * sum(Y2hhT2 * GSample.Size1^2 / (GSample.Size1 * (GSample.Size1 - 1))) / (2 * length(GSample.Size1)) +# T2s <- set_size * sum(Y2hhT2 * GSample.Size1^2 / (GSample.Size1 * (GSample.Size1 - 1))) / (2 * dn.star) # Y2hhT1 <- group.mean[group.mean[, 1] - group.mean[, 2] != 0, ]$x -# T1s <- sum(Y2hhT1) / (2 * Coef[1] * length(GSample.Size)^2) +# T1s <- sum(Y2hhT1) / (2 * Coef[1] * dn^2) # VestD0 <- Coef[2] * T1s / (set_size - 1) + Coef[3] * T2s # if (Replace == 1) { # VEST <- Coef[2] * T2s + Coef[3] * (N - 1) * (T1s + T2s) / (N * (set_size - 1)) @@ -90,35 +49,76 @@ JPSED0F <- function(RV, Y, set_size, Coef, N, Replace, Model) { # } # return(c(M.est, VEST)) # } - -# JPSD0F_new <- function(pop, n, H, tau, N, K) { -# # tau: controls the ranking quality -# # n:sample size -# # H: Set szie -# # pop: population -# N <- length(pop) # population size -# # SRSI=sample(1:N,n,replace=TRUE) -# # SRS=pop[SRSI] # first create a simple randopm sample -# # redpop=pop[-SRSI] # remove the slected SRS from, the population -# # NR=length(redpop) # reduced population size -# pRIn <- 1:N # reduced population index -# ################################################# -# # below consruct rank for each SRS unit post experimentally -# JPS <- matrix(0, ncol = (K + 1), nrow = n) # store JPS sample -# ############################################## -# for (i in (1:n)) { -# # Yi=SRS[i] # measured unit -# Compi <- sample(pRIn, H) # select H-1 unit to construct comparison set -# Set <- pop[Compi] # combine H-1 unit with the measured unit Y-i -# Yi <- Set[1] -# JPS[i, 1] <- Yi -# for (k in (2:(K + 1))) { -# DCSet <- Set + tau[k - 1] * rnorm(H, 0, 1) # adjust ranking quality using Dell-Clutter -# # model -# RankSet <- rank(DCSet) # rank the units -# JPS[i, k] <- RankSet[1] # JPS sample for the i-th unit -# } -# } -# colnames(JPS) <- c("Y", paste("R", 1:K, sep = "")) -# return(JPS) -# } +# +# # JPSED0F_tidyverse <- function(RV, Y, set_size, Coef, N, Replace, Model) { +# # +# # ########################################################### +# # # This function Computes JPS estimator and its variance ## +# # ########################################################### +# # # JPSD0: +# # # First column: Response +# # # Second column: Ranks +# # # print(Coef) +# # RVD <- data.frame(RV, Y) +# # M.est <- mean(dplyr::summarise(dplyr::group_by(RVD, RV), x = mean(Y))$x) # JPS estimate +# # YIYJ <- expand.grid(Y, Y) +# # GSample.Size <- dplyr::summarise(dplyr::group_by(RVD, RV), x = n())$x +# # # dn <- length(GSample.Size) +# # # print(dn) +# # GSample.Size1 <- GSample.Size[GSample.Size > 1] +# # # dn.star <- length(GSample.Size1) +# # RhRhp <- expand.grid(RV, RV) +# # YIYJ2 <- (YIYJ[, 1] - YIYJ[, 2])^2 +# # # group.mean <- aggregate(YIYJ2, RhRhp, mean) +# # data <- cbind(RhRhp, YIYJ2) +# # group.mean <- dplyr::summarise(dplyr::group_by(data, Var1, Var2), x = mean(YIYJ2)) +# # Y2hhT2 <- group.mean[group.mean[, 1] - group.mean[, 2] == 0, ]$x +# # Y2hhT2 <- Y2hhT2[GSample.Size > 1] +# # T2s <- set_size * sum(Y2hhT2 * GSample.Size1^2 / (GSample.Size1 * (GSample.Size1 - 1))) / (2 * length(GSample.Size1)) +# # Y2hhT1 <- group.mean[group.mean[, 1] - group.mean[, 2] != 0, ]$x +# # T1s <- sum(Y2hhT1) / (2 * Coef[1] * length(GSample.Size)^2) +# # VestD0 <- Coef[2] * T1s / (set_size - 1) + Coef[3] * T2s +# # if (Replace == 1) { +# # VEST <- Coef[2] * T2s + Coef[3] * (N - 1) * (T1s + T2s) / (N * (set_size - 1)) +# # if (VEST <= 0) VEST <- Coef[2] * T2s / 2 +# # } else { +# # VEST <- Coef[2] * T1s / (set_size - 1) + Coef[3] * T2s +# # } +# # if (Model == 1) { +# # VEST <- (T1s + T2s) / set_size^2 * ((-1 / N) + Coef[2] * set_size / (set_size - 1)) + T2s * ((Coef[3] + Coef[2]) + Coef[2] * set_size / (set_size - 1)) +# # if (VEST <= 0) VEST <- T2s * ((Coef[3] + Coef[2]) + Coef[2] * set_size / (set_size - 1)) +# # } +# # return(c(M.est, VEST)) +# # } +# +# # JPSD0F_new <- function(pop, n, H, tau, N, K) { +# # # tau: controls the ranking quality +# # # n:sample size +# # # H: Set szie +# # # pop: population +# # N <- length(pop) # population size +# # # SRSI=sample(1:N,n,replace=TRUE) +# # # SRS=pop[SRSI] # first create a simple randopm sample +# # # redpop=pop[-SRSI] # remove the slected SRS from, the population +# # # NR=length(redpop) # reduced population size +# # pRIn <- 1:N # reduced population index +# # ################################################# +# # # below consruct rank for each SRS unit post experimentally +# # JPS <- matrix(0, ncol = (K + 1), nrow = n) # store JPS sample +# # ############################################## +# # for (i in (1:n)) { +# # # Yi=SRS[i] # measured unit +# # Compi <- sample(pRIn, H) # select H-1 unit to construct comparison set +# # Set <- pop[Compi] # combine H-1 unit with the measured unit Y-i +# # Yi <- Set[1] +# # JPS[i, 1] <- Yi +# # for (k in (2:(K + 1))) { +# # DCSet <- Set + tau[k - 1] * rnorm(H, 0, 1) # adjust ranking quality using Dell-Clutter +# # # model +# # RankSet <- rank(DCSet) # rank the units +# # JPS[i, k] <- RankSet[1] # JPS sample for the i-th unit +# # } +# # } +# # colnames(JPS) <- c("Y", paste("R", 1:K, sep = "")) +# # return(JPS) +# # } diff --git a/R/JPSLF.R b/R/JPSLF.R index 3e59644..15dfb2f 100644 --- a/R/JPSLF.R +++ b/R/JPSLF.R @@ -10,108 +10,108 @@ #' @return #' @keywords internal #' -JPSLF <- function(data, set_size, replace, population_size, model, estimator = "SD Weighted") { - # JPS using List function - # Set size provided by user, if replace is FALSE, then set_size*samplesize <= pop size - # If replace = TRUE, no restriction on set size. - # Check if one ranker - just return JPS estimate - # multiple rankers: return sd.weighted by default, optionally return all estimates - # model is switch for design-based or model based (super population) - - # model <- match.arg(type) - - estimator <- match.arg(estimator, - arg = c( - "SD Weighted", - "Unweighted", - "Aggregate Weight", - "JPS Estimate", - "SRS estimate", - "Minimum" - ), - several.ok = TRUE - ) - - # Check arguments - # if(!replace & set_size*nrow(data) > population_size) { - # stop("population_size must not be less than set_size*nrow(data)") - # } - - # nK <- dim(data) - n <- dim(data)[1] - K <- dim(data)[2] - 1 - Coefn <- calculate_coefficients(set_size, n) - ####################################### - # Compute coefficient to go into JPSED0G - if (replace) { - if (is.null(population_size)) { - print("Population size population_size must be provided for without replacement sampling") - } - coef1D2 <- Coefn[1] - coef2D2 <- 1 / (set_size * (set_size - 1)) + Coefn[3] + Coefn[2] - (Coefn[2] + 1 / set_size^2) * set_size / (set_size - 1) - coef3D2 <- Coefn[2] - 1 / (population_size - 1) * (1 / set_size - (Coefn[1] + 1 / set_size^2)) - CoefD <- c(coef1D2, coef2D2, coef3D2) - } else { - CoefD <- Coefn - } - if (model == 1) CoefD <- Coefn - ############################################ - if (K == 1) { - # print(K) - JPSE.V <- JPSED0F(data[, 2], data[, 1], set_size, CoefD, population_size, replace, model) # single ranking method estiamte - # print(JPSE.V) - Estimator <- "JPS" - Point.Est <- JPSE.V[1] - Variance.Point <- JPSE.V[2] - Lower.Limit <- Point.Est - qt(1 - alpha / 2, n - 1) * sqrt(Variance.Point) - Upper.Limit <- Point.Est + qt(1 - alpha / 2, n - 1) * sqrt(Variance.Point) - Summary.return <- data.frame(Estimator, Point.Est, Variance.Point, Lower.Limit, Upper.Limit) - return(Summary.return) - } - JPSE.E <- ListF(data, set_size, replace, population_size, model, Coefn) # estimate based on combined ranking method - ################ - # create list of data frames by deleting each row - # this is used for Jackknife variance estimate - DataL <- vector("list", n) - for (i in (1:n)) { - DataL[[i]] <- data[-i, ] - } - Coefn1 <- calculate_coefficients(set_size, n - 1) - ########################################### - ### This line is the slow part - delet1 <- lapply(DataL, ListF, set_size, replace, population_size, model, Coefn1) # Compute n different combined estimate - # by deleting one observation at a time - - - JPSE.V <- JPSED0F(data[, 2], data[, 1], set_size, CoefD, population_size, replace, model) # single ranking method estiamte - # print(JPSE.V) - - delet1M <- do.call(rbind, delet1) # Convert list to matrix - delet1M <- rbind(JPSE.E, delet1M) - fc <- 1 - if (replace == 1) { - fc <- (1 - n / population_size) - } - JackV <- fc * apply(delet1M, 2, JACKVF) # Jackknife variance estimate of the combined estimator - Variance.est <- c(JackV, JPSE.V[2]) # bind the variance of JPS estimator based on single ranking method - Estimate.est <- c(JPSE.E, JPSE.V[1]) # bind the JPS estimator based on single ranking method - min.ind <- which(Variance.est == min(Variance.est)) # Find the estimator having minimum variance - # among four estimator - Variance.est <- c(Variance.est, var(data[, 1]) / n, Variance.est[min.ind]) # bind the variance of minimum - # variance estimator - St.error <- sqrt(Variance.est) - Estimate.est <- c(Estimate.est, mean(data[, 1]), Estimate.est[min.ind]) # bind the minimum variance estiamtor - Lower.Limit <- Estimate.est - qt(1 - alpha / 2, n - 1) * sqrt(Variance.est) - Upper.Limit <- Estimate.est + qt(1 - alpha / 2, n - 1) * sqrt(Variance.est) - Lower.Limit <- round(Lower.Limit, digits = 3) - Upper.Limit <- round(Upper.Limit, digits = 3) - # test=(LL-popmean)*(UL-popmean) - # coef=1*(test <0) - CI <- paste(Lower.Limit, Upper.Limit, sep = ",") - Estimator <- c("UnWeighted", "Sd.Weighted", "Aggregate Weight", "JPS Estimate", "SRS estimate", "Minimum") - # Summary.ret=data.frame(Estimator,Estimate.est,Variance.est,Lower.Limit,Upper.Limit) - Summary.ret <- data.frame(Estimator, round(Estimate.est, digits = 3), round(St.error, digits = 3), CI, stringsAsFactors = F) - colnames(Summary.ret) <- c("Estimator", "Estimate", "Standard Error", paste((1 - alpha) * 100, "% Confidence intervals", sep = "")) - # Summary.ret=round(Summary.ret,digits=3) - return(Summary.ret) -} +# JPSLF <- function(data, set_size, replace, population_size, model, estimator = "SD Weighted") { +# # JPS using List function +# # Set size provided by user, if replace is FALSE, then set_size*samplesize <= pop size +# # If replace = TRUE, no restriction on set size. +# # Check if one ranker - just return JPS estimate +# # multiple rankers: return sd.weighted by default, optionally return all estimates +# # model is switch for design-based or model based (super population) +# +# # model <- match.arg(type) +# +# estimator <- match.arg(estimator, +# arg = c( +# "SD Weighted", +# "Unweighted", +# "Aggregate Weight", +# "JPS Estimate", +# "SRS estimate", +# "Minimum" +# ), +# several.ok = TRUE +# ) +# +# # Check arguments +# # if(!replace & set_size*nrow(data) > population_size) { +# # stop("population_size must not be less than set_size*nrow(data)") +# # } +# +# # nK <- dim(data) +# n <- dim(data)[1] +# K <- dim(data)[2] - 1 +# Coefn <- calculate_coefficients(set_size, n) +# ####################################### +# # Compute coefficient to go into JPSED0G +# if (replace) { +# if (is.null(population_size)) { +# print("Population size population_size must be provided for without replacement sampling") +# } +# coef1D2 <- Coefn[1] +# coef2D2 <- 1 / (set_size * (set_size - 1)) + Coefn[3] + Coefn[2] - (Coefn[2] + 1 / set_size^2) * set_size / (set_size - 1) +# coef3D2 <- Coefn[2] - 1 / (population_size - 1) * (1 / set_size - (Coefn[1] + 1 / set_size^2)) +# CoefD <- c(coef1D2, coef2D2, coef3D2) +# } else { +# CoefD <- Coefn +# } +# if (model == 1) CoefD <- Coefn +# ############################################ +# if (K == 1) { +# # print(K) +# JPSE.V <- JPSED0F(data[, 2], data[, 1], set_size, CoefD, population_size, replace, model) # single ranking method estiamte +# # print(JPSE.V) +# Estimator <- "JPS" +# Point.Est <- JPSE.V[1] +# Variance.Point <- JPSE.V[2] +# Lower.Limit <- Point.Est - qt(1 - alpha / 2, n - 1) * sqrt(Variance.Point) +# Upper.Limit <- Point.Est + qt(1 - alpha / 2, n - 1) * sqrt(Variance.Point) +# Summary.return <- data.frame(Estimator, Point.Est, Variance.Point, Lower.Limit, Upper.Limit) +# return(Summary.return) +# } +# JPSE.E <- ListF(data, set_size, replace, population_size, model, Coefn) # estimate based on combined ranking method +# ################ +# # create list of data frames by deleting each row +# # this is used for Jackknife variance estimate +# DataL <- vector("list", n) +# for (i in (1:n)) { +# DataL[[i]] <- data[-i, ] +# } +# Coefn1 <- calculate_coefficients(set_size, n - 1) +# ########################################### +# ### This line is the slow part +# delet1 <- lapply(DataL, ListF, set_size, replace, population_size, model, Coefn1) # Compute n different combined estimate +# # by deleting one observation at a time +# +# +# JPSE.V <- JPSED0F(data[, 2], data[, 1], set_size, CoefD, population_size, replace, model) # single ranking method estiamte +# # print(JPSE.V) +# +# delet1M <- do.call(rbind, delet1) # Convert list to matrix +# delet1M <- rbind(JPSE.E, delet1M) +# fc <- 1 +# if (replace == 1) { +# fc <- (1 - n / population_size) +# } +# JackV <- fc * apply(delet1M, 2, JACKVF) # Jackknife variance estimate of the combined estimator +# Variance.est <- c(JackV, JPSE.V[2]) # bind the variance of JPS estimator based on single ranking method +# Estimate.est <- c(JPSE.E, JPSE.V[1]) # bind the JPS estimator based on single ranking method +# min.ind <- which(Variance.est == min(Variance.est)) # Find the estimator having minimum variance +# # among four estimator +# Variance.est <- c(Variance.est, var(data[, 1]) / n, Variance.est[min.ind]) # bind the variance of minimum +# # variance estimator +# St.error <- sqrt(Variance.est) +# Estimate.est <- c(Estimate.est, mean(data[, 1]), Estimate.est[min.ind]) # bind the minimum variance estiamtor +# Lower.Limit <- Estimate.est - qt(1 - alpha / 2, n - 1) * sqrt(Variance.est) +# Upper.Limit <- Estimate.est + qt(1 - alpha / 2, n - 1) * sqrt(Variance.est) +# Lower.Limit <- round(Lower.Limit, digits = 3) +# Upper.Limit <- round(Upper.Limit, digits = 3) +# # test=(LL-popmean)*(UL-popmean) +# # coef=1*(test <0) +# CI <- paste(Lower.Limit, Upper.Limit, sep = ",") +# Estimator <- c("UnWeighted", "Sd.Weighted", "Aggregate Weight", "JPS Estimate", "SRS estimate", "Minimum") +# # Summary.ret=data.frame(Estimator,Estimate.est,Variance.est,Lower.Limit,Upper.Limit) +# Summary.ret <- data.frame(Estimator, round(Estimate.est, digits = 3), round(St.error, digits = 3), CI, stringsAsFactors = F) +# colnames(Summary.ret) <- c("Estimator", "Estimate", "Standard Error", paste((1 - alpha) * 100, "% Confidence intervals", sep = "")) +# # Summary.ret=round(Summary.ret,digits=3) +# return(Summary.ret) +# } diff --git a/R/ListF.R b/R/ListF.R index 7fad5cf..72f00c6 100644 --- a/R/ListF.R +++ b/R/ListF.R @@ -10,45 +10,45 @@ #' @return #' @keywords internal #' -ListF <- function(data, set_size, Replace, N, Model, CoefD0) { - Y <- data[, 1] - Ranks <- data[, -1] - n <- length(Y) - # set_size <- set_size - # CoefD0=CoefD0F(set_size,n) - # print(c(N,Replace)) - ################################################################## - # For design D2 coefficients - if (Replace == 1) { - if (is.null(N)) { - print("Population size N must be provided for without replacement sampling") - } - coef1D2 <- CoefD0[1] - coef2D2 <- 1 / (set_size * (set_size - 1)) + CoefD0[3] + CoefD0[2] - (CoefD0[2] + 1 / set_size^2) * set_size / (set_size - 1) - coef3D2 <- CoefD0[2] - 1 / (N - 1) * (1 / set_size - (CoefD0[1] + 1 / set_size^2)) - CoefD <- c(coef1D2, coef2D2, coef3D2) - } else { - CoefD <- CoefD0 - } - if (Model == 1) CoefD <- CoefD0 - ################################################################## - # print(Ranks) - WEIGHT <- t(apply(data.frame(Ranks), 1, calculate_agreement_weights, H = set_size)) - # print(WEIGHT) - WY <- Y %*% WEIGHT - Eff.Ssize <- colSums(WEIGHT) - W.est <- mean(WY[Eff.Ssize > 0] / Eff.Ssize[Eff.Ssize > 0]) - - # This line is the culprit for time - EST <- t(apply(data.frame(Ranks), 2, JPSED0F, Y = Y, set_size = set_size, Coef = CoefD, N, Replace, Model)) # Estimate of population mean and variance - # for each ranking method - # print("From ListF" ) - # print(EST) - # print(EST) - prec.weight <- (1 / EST[, 2]) / sum(1 / EST[, 2]) # precison weight-- inverse of the variance - Cest <- sum(prec.weight * EST[, 1]) # point estimate for the population mean - # JackNrep=matrix(0,nrow=n,ncol=2) - # CoefD0n.1=CoefD0F(set_size,(n-1)) - retest <- c(mean(EST[, 1]), Cest, W.est) - return(retest) -} +# ListF <- function(data, set_size, Replace, N, Model, CoefD0) { +# Y <- data[, 1] +# Ranks <- data[, -1] +# n <- length(Y) +# # set_size <- set_size +# # CoefD0=CoefD0F(set_size,n) +# # print(c(N,Replace)) +# ################################################################## +# # For design D2 coefficients +# if (Replace == 1) { +# if (is.null(N)) { +# print("Population size N must be provided for without replacement sampling") +# } +# coef1D2 <- CoefD0[1] +# coef2D2 <- 1 / (set_size * (set_size - 1)) + CoefD0[3] + CoefD0[2] - (CoefD0[2] + 1 / set_size^2) * set_size / (set_size - 1) +# coef3D2 <- CoefD0[2] - 1 / (N - 1) * (1 / set_size - (CoefD0[1] + 1 / set_size^2)) +# CoefD <- c(coef1D2, coef2D2, coef3D2) +# } else { +# CoefD <- CoefD0 +# } +# if (Model == 1) CoefD <- CoefD0 +# ################################################################## +# # print(Ranks) +# WEIGHT <- t(apply(data.frame(Ranks), 1, calculate_agreement_weights, H = set_size)) +# # print(WEIGHT) +# WY <- Y %*% WEIGHT +# Eff.Ssize <- colSums(WEIGHT) +# W.est <- mean(WY[Eff.Ssize > 0] / Eff.Ssize[Eff.Ssize > 0]) +# +# # This line is the culprit for time +# EST <- t(apply(data.frame(Ranks), 2, JPSED0F, Y = Y, set_size = set_size, Coef = CoefD, N, Replace, Model)) # Estimate of population mean and variance +# # for each ranking method +# # print("From ListF" ) +# # print(EST) +# # print(EST) +# prec.weight <- (1 / EST[, 2]) / sum(1 / EST[, 2]) # precison weight-- inverse of the variance +# Cest <- sum(prec.weight * EST[, 1]) # point estimate for the population mean +# # JackNrep=matrix(0,nrow=n,ncol=2) +# # CoefD0n.1=CoefD0F(set_size,(n-1)) +# retest <- c(mean(EST[, 1]), Cest, W.est) +# return(retest) +# } diff --git a/R/RSSED2F.R b/R/RSSED2F.R deleted file mode 100644 index 154baf0..0000000 --- a/R/RSSED2F.R +++ /dev/null @@ -1,42 +0,0 @@ -#' This function Computes RSS estimator and its variance without replacement sampling -#' -#' @param RV Rank values -#' @param Y Response variable -#' @param H Set size -#' @param N Population size -#' -#' @return -#' @keywords internal -#' -RSSED2F <- function(RV, Y, H, N) { - n <- length(Y) # Sample size - # d=n/H - RVD <- data.frame(RV) - ind <- 1:n - ij <- expand.grid(ind, ind) # creates i,j index for pairevwise differences - M.est <- mean(aggregate(Y, RVD, mean)$x) # JPS estimate - YIYJ <- expand.grid(Y, Y) # create Y_i, Y_j pairs - YIYJ <- YIYJ[ij[, 1] != ij[, 2], ] # remove pairs Y_i=Y_j - GSample.Size <- aggregate(RV, data.frame(RV), length)$x # ranking group sample sizes - # dn=length(GSample.Size) - # print(dn) - # GSample.Size1=GSample.Size[GSample.Size>1] - # dn.star=length(GSample.Size1) - RhRhp <- expand.grid(RV, RV) # create the pairs of ranks h, h' that corresponds to Y_{[h]i} and Y_{[h']j} - RhRhp <- RhRhp[ij[, 1] != ij[, 2], ] # remove pairs that Y_{[h]i}=Y_{[h]i} - YIYJ2 <- (YIYJ[, 1] - YIYJ[, 2])^2 # Computes sum_i Sum_j (Y_{[h]i}- Y_{[h']j})^2 - group.mean <- aggregate(YIYJ2, RhRhp, mean) # Computes sum_i Sum_j (Y_{[h]i}- Y_{[h']j})^2/nh nh' if h !=h' - # Computes sum_i Sum_j (Y_{[h]i}- Y_{[h']j})^2/nh(nh-1) if h=h' - aggreq.Ssiz <- cbind(GSample.Size[group.mean[, 1]], GSample.Size[group.mean[, 2]]) # create group sample size vectors - # that corresponds to h and h' in sum_i Sum_j (Y_{[h]i}- Y_{[h']j})^2 - Y2hhT2 <- group.mean[group.mean[, 1] - group.mean[, 2] == 0, ]$x # Computes sum_i Sum_j (Y_{[h]i}- Y_{[h]j})^2/nh(nh-1), h=1,,,H - nhnh <- aggreq.Ssiz[group.mean[, 1] == group.mean[, 2], ][, 1] #### compute (n_1,n_2, ... n_h) - # nhnh= nh.nhV[,1] - T2ss <- sum(Y2hhT2 / (nhnh)) / (2 * H^2) - T2s <- sum(Y2hhT2) / (2 * H^2) # \sum_h \sum_i \sum_j( Y_{[h]i]}- Y_{[h]j})^2/(nh(nh-1)) - Y2hhT1 <- group.mean[group.mean[, 1] - group.mean[, 2] != 0, ]$x # \sum_h\sum_h'' \sum_i \sum_j( Y_{[h]i]}- Y_{[h'']j})^2/(nh(nh)) - T1s <- sum(Y2hhT1) / (2 * H^2) - VEST <- (T2ss - (T2s + T1s) / N) - if (VEST < 0) VEST <- (T2ss) / 2 - return(c(M.est, VEST)) -} diff --git a/R/jackknife_agreement_estimate.R b/R/jackknife_agreement_estimate.R new file mode 100644 index 0000000..e0df96a --- /dev/null +++ b/R/jackknife_agreement_estimate.R @@ -0,0 +1,15 @@ +# TODO: add doc +jackknife_agreement_estimate <- function(ranks, y, set_size, pop_size, fc) { + n <- nrow(ranks) + + agreement_weights <- t(apply(data.frame(ranks), 1, calculate_agreement_weights, set_size)) + aw_sum <- apply(agreement_weights, 2, sum) + cross_product <- y %*% agreement_weights + agreement_mean <- mean(cross_product[aw_sum > 0] / aw_sum[aw_sum > 0]) + + awy <- cbind(y, agreement_weights) + jackknife_mean <- apply(matrix(1:n, ncol = 1), 1, FWDel1, AWY = awy) + jackknife_var <- fc * (n - 1) * var(jackknife_mean) * ((n - 1) / n)^2 + + return(list(agreement_mean = agreement_mean, jackknife_var = jackknife_var)) +} diff --git a/R/jps_estimate.R b/R/jps_estimate.R index cae5c3a..d0efb83 100644 --- a/R/jps_estimate.R +++ b/R/jps_estimate.R @@ -99,7 +99,6 @@ jps_estimate <- function(data, set_size, replace = TRUE, model_based, N, alpha) # observation is deleted jps_var_ith_deleted <- jps_estimates[c(3:(n + 2)), ] jps_mean_ith_deleted <- jps_estimates[-c(1:(n + 2)), ] - # if (replace) fc <- 1 else fc <- 1 - n / N # finite population correction factor # variance weighted jps_var_weighted_mean <- sum((1 / jps_variance_n) * jps_mean_n) / sum(1 / jps_variance_n) @@ -118,13 +117,9 @@ jps_estimate <- function(data, set_size, replace = TRUE, model_based, N, alpha) jackknife_variance <- fc * (n - 1) * var(jackknife_mean) * ((n - 1) / n)^2 # agreement weighted - agreement_weights <- t(apply(data.frame(ranks), 1, calculate_agreement_weights, set_size = set_size)) - aw_sum <- apply(agreement_weights, 2, sum) - cross_product <- y %*% agreement_weights - agreement_mean <- mean(cross_product[aw_sum > 0] / aw_sum[aw_sum > 0]) - awy <- cbind(y, agreement_weights) - jackknife_agreement_mean <- apply(matrix(1:n, ncol = 1), 1, FWDel1, AWY = awy) - jackknife_agreement_var <- fc * (n - 1) * var(jackknife_agreement_mean) * ((n - 1) / n)^2 + jackknife_estimated <- jackknife_agreement_estimate(data[, -1], y, set_size, N, fc) + agreement_mean <- jackknife_estimated$agreement_mean + jackknife_agreement_var <- jackknife_estimated$jackknife_var estimated_means <- c(jps_mean, jps_var_weighted_mean, agreement_mean, mean_best_ranker, mean(y)) estimated_variances <- c( diff --git a/R/rss_estimate.R b/R/rss_estimate.R index 4a9c624..ead12f3 100644 --- a/R/rss_estimate.R +++ b/R/rss_estimate.R @@ -1,111 +1,91 @@ -########################################### -# This function provides estimator for RSS data -# RSSK: n by (K+1) dimensional data matrix, the first column is Y-values, -# the next K columns are the ranks of K-ranking methods -# set_size: set Size -# N: population size -# model: if Modle=0 design based inference, if model=1, superpopulation model -# replace: if replace=TRUE with replacmenet selection is used. -# If replace=FALSE without replacement selection is used -# B: Bootstrap replication -#' This function provides estimator for RSS data +#' This function provides an estimator for RSS data #' -#' @param data An n by (K+1) dimensional data matrix, the first column is Y-values, the next K coulumns are the ranks of K-ranking methods -#' @param set_size The set size -#' @param replace Logical. Sample with replacement? -#' @param model If Modle=0, use design based inference, if model=1, use superpopulation model -#' @param N The population size -#' @param alpha The significance level +#' @param data A matrix with a response variable in the first column and ranks in the following columns. +#' @param set_size Set size for each ranking group. +#' @param replace A boolean which specifies whether to sample with replacement or not. +#' @param model_based An inference mode: +#' - `FALSE`: design based inference +#' - `TRUE`: model based inference using super population model +#' @param pop_size The population size. Must be provided if sampling without replacement, or if `model` is `1`. +#' @param alpha A significance level. #' #' @return #' @keywords internal #' -rss_estimate <- function(data, set_size, replace, model, N, alpha) { - # unused variable - # RM <- data[, -1] - RV <- data[, 2] # We need to be careful about this. - Y <- data[, 1] # We need to be careful about this. Need to ensure response is in col 1. +rss_estimate <- function(data, set_size, replace, model_based, pop_size, alpha) { + first_ranker_ranks <- data[, 2] + y <- data[, 1] n <- nrow(data) - K <- ncol(data) - 1 - ######################################################################### - ########## Single ranker RSS estimator without replacement design + n_rankers <- ncol(data) - 1 + if (!replace) { - RSS.oneE <- mean(aggregate(Y, list(RV), FUN = mean)$x) # Single ranker RSS estimate - EST <- RSSED2F(RV, Y, set_size, N) - ### Confidence interval balanced RSS - LC <- EST[1] - qt(1 - alpha / 2, n - 1) * sqrt(EST[2]) - UC <- EST[1] + qt(1 - alpha / 2, n - 1) * sqrt(EST[2]) - } - ############################################################################# - ############################################################################### + rss_estimated <- rss_estimate_single(first_ranker_ranks, y, set_size, pop_size) + rss_mean <- rss_estimated[1] + rss_variance <- rss_estimated[2] + } else { + rss_mean <- mean(aggregate(y, list(first_ranker_ranks), FUN = mean)$x) - #################################################################################### - ########### Single ranker RSS estimator with replacement design - if (replace) { - RSS.oneE <- mean(aggregate(Y, list(RV), FUN = mean)$x) # Single ranker RSS estimate - RSS.oneCount <- aggregate(RV, list(RV), FUN = length)$x - RSS.oneVar <- aggregate(Y, list(RV), FUN = var)$x - Hd <- length(RSS.oneVar[RSS.oneCount > 1]) - RSS.oneVE <- sum(RSS.oneVar[RSS.oneCount > 1] / RSS.oneCount[RSS.oneCount > 1]) / Hd^2 - EST <- c(RSS.oneE, RSS.oneVE) - LC <- EST[1] - qt(1 - alpha / 2, n - 1) * sqrt(EST[2]) - UC <- EST[1] + qt(1 - alpha / 2, n - 1) * sqrt(EST[2]) + rank_count <- aggregate(first_ranker_ranks, list(first_ranker_ranks), FUN = length)$x + rss_variances <- aggregate(y, list(first_ranker_ranks), FUN = var)$x[rank_count > 1] + rss_variance <- sum(rss_variances / rank_count[rank_count > 1]) / length(rss_variances)^2 } - ###################################################################################### - ######################################################################################## - ########################################################################################## - ######## Single ranker RSS estimator with under super population model ################## - if (model == 1) { - RSS.oneE <- mean(aggregate(Y, list(RV), FUN = mean)$x) # Single ranker RSS estimate - EST <- RSSED2F(RV, Y, set_size, N) - LC <- EST[1] - qt(1 - alpha / 2, n - 1) * sqrt(EST[2]) - UC <- EST[1] + qt(1 - alpha / 2, n - 1) * sqrt(EST[2]) + if (model_based) { + rss_estimated <- rss_estimate_single(first_ranker_ranks, y, set_size, pop_size) + rss_mean <- rss_estimated[1] + rss_variance <- rss_estimated[2] } - Lower.limit <- round(c(LC), digits = 3) - Upper.limit <- round(c(UC), digits = 3) - ST.error <- sqrt(EST[2]) - Point.est <- EST[1] - if (K == 1) { - Confinterval <- paste(Lower.limit, Upper.limit, sep = ",") - Estimator <- c("RSS-1") - summary.table <- data.frame(Estimator, round(Point.est, digits = 3), round(ST.error, digits = 3), Confinterval, stringsAsFactors = F) - colnames(summary.table) <- c("Estimator", "point.est", "St.error", paste((1 - alpha) * 100, "% Confidence Interval", sep = "")) - return(summary.table) - } + ci_col <- paste0((1 - alpha) * 100, "% Confidence Interval") + std_error <- sqrt(rss_variance) + interval <- get_interval(rss_mean, n, std_error, alpha, round_digits = 3) + lower_bound <- interval$lower_bound + upper_bound <- interval$upper_bound + if (n_rankers == 1) { + ci <- paste(lower_bound, upper_bound, sep = ",") + estimator_name <- c("RSS-1") + summary.table <- data.frame( + Estimator = estimator_name, + point.est = round(rss_mean, digits = 3), + St.error = round(std_error, digits = 3), + ci, + stringsAsFactors = FALSE + ) + colnames(summary.table)[4] <- ci_col - #################################### 33 - # agreement weight estimator - AW <- data[, -1] # Ranks - AW <- t(apply(data.frame(data[, -1]), 1, calculate_agreement_weights, set_size)) # agreemeent weights - eff.SS <- apply(AW, 2, sum) - Crosprod <- data[, 1] %*% AW - W.est <- mean(Crosprod[eff.SS > 0] / eff.SS[eff.SS > 0]) # RSS agreement weight estiamtor - AWY <- cbind(data[, 1], AW) - Jack.Repl.AWi <- apply(matrix(1:n, ncol = 1), 1, FWDel1, AWY = AWY) # Aggrement weight estimator - # when the i-th obseervation is deleted - if (replace) fc <- 1 else fc <- 1 - n / (N - 1) - J.var <- fc * (n - 1) * var(Jack.Repl.AWi) * ((n - 1) / n)^2 # Jackknife variance estimate for aggreement weight JPS estimator - ############################################################## + return(summary.table) + } + if (replace) { + fc <- 1 + } else { + fc <- 1 - n / (pop_size - 1) + } + jackknife_estimated <- jackknife_agreement_estimate(data[, -1], y, set_size, pop_size, fc) + agreement_mean <- jackknife_estimated$agreement_mean + jackknife_var <- jackknife_estimated$jackknife_var + jackknife_std_error <- sqrt(jackknife_var) + jackknife_interval <- get_interval(agreement_mean, n, jackknife_std_error, alpha, round_digits = 3) + jackknife_lower_bound <- jackknife_interval$lower_bound + jackknife_upper_bound <- jackknife_interval$upper_bound - # Jackknife confidence interval based on weighted estimator - LWC <- W.est - qt(1 - alpha / 2, n - 1) * sqrt(J.var) - UWC <- W.est + qt(1 - alpha / 2, n - 1) * sqrt(J.var) + lower_bounds <- c(lower_bound, jackknife_lower_bound) + upper_bounds <- c(upper_bound, jackknife_upper_bound) + std_errors <- c(std_error, jackknife_std_error) + means <- c(rss_mean, agreement_mean) + cis <- paste(lower_bounds, upper_bounds, sep = ",") + estimator_names <- c("RSS-1", " Aggregate Weighted") + summary.table <- data.frame( + Estimator = estimator_names, + point.est = round(means, digits = 3), + St.error = round(std_errors, digits = 3), + cis, + stringsAsFactors = F + ) + colnames(summary.table)[4] <- ci_col - ############################################################################################ - ############################################################################################# - Lower.limit <- round(c(LC, LWC), digits = 3) - Upper.limit <- round(c(UC, UWC), digits = 3) - ST.error <- c(sqrt(EST[2]), sqrt(J.var)) - Point.est <- c(EST[1], W.est) - Confinterval <- paste(Lower.limit, Upper.limit, sep = ",") - Estimator <- c("RSS-1", " Aggregate Weighted") - summary.table <- data.frame(Estimator, round(Point.est, digits = 3), round(ST.error, digits = 3), Confinterval, stringsAsFactors = F) - colnames(summary.table) <- c("Estimator", "point.est", "St.error", paste((1 - alpha) * 100, "% Confidence Interval", sep = "")) return(summary.table) } diff --git a/R/rss_estimate_single.R b/R/rss_estimate_single.R new file mode 100644 index 0000000..e507f03 --- /dev/null +++ b/R/rss_estimate_single.R @@ -0,0 +1,38 @@ +#' This function Computes RSS estimator and its variance without replacement sampling +#' +#' @param first_ranker_ranks Ranks from the first ranker. +#' @param y Response variable. +#' @param set_size Set size. +#' @param pop_size Population size. +#' +#' @return +#' @keywords internal +#' +rss_estimate_single <- function(first_ranker_ranks, y, set_size, pop_size) { + n <- length(y) + indices <- 1:n + + # i, j index for pair-wise differences + ij <- expand.grid(indices, indices) + df_rank <- data.frame(first_ranker_ranks) + estimated_mean <- mean(aggregate(y, df_rank, mean)$x) + + # Y_i, Y_j pairs + yiyj <- expand.grid(y, y)[ij[, 1] != ij[, 2], ] + rank_count <- aggregate(first_ranker_ranks, df_rank, length)$x + # create the pairs of ranks h, h' that corresponds to Y_{[h]i} and Y_{[h']j} + rank_pairs <- expand.grid(first_ranker_ranks, first_ranker_ranks)[ij[, 1] != ij[, 2], ] + + rss_y <- (yiyj[, 1] - yiyj[, 2])^2 + rss_by_rank <- aggregate(rss_y, rank_pairs, mean) + + rss_equal_h <- rss_by_rank[rss_by_rank[, 1] == rss_by_rank[, 2], ]$x + total_rss_equal_h <- sum(rss_equal_h / rank_count) / (2 * set_size^2) + + estimated_variance <- total_rss_equal_h - sum(rss_by_rank$x) / (2 * set_size^2) / pop_size + if (estimated_variance < 0) { + estimated_variance <- total_rss_equal_h / 2 + } + + return(c(estimated_mean, estimated_variance)) +} diff --git a/R/rss_sample.R b/R/rss_sample.R index b714a4b..4407b15 100644 --- a/R/rss_sample.R +++ b/R/rss_sample.R @@ -1,7 +1,7 @@ #' Generate ranked set sampling (RSS) on the population provided. #' #' @inheritParams rss_sample_w_replacement -#' @param with_replacement A boolean which specifies whether to sample with replacement or not. +#' @param replace A boolean which specifies whether to sample with replacement or not. #' #' @return A matrix with ranks from each ranker. #' @export @@ -12,7 +12,7 @@ #' # the number of samples to be ranked in each set #' H <- 3 #' -#' with_replacement <- FALSE +#' replace <- FALSE #' sigma <- 4 #' mu <- 10 #' n_rankers <- 3 @@ -25,7 +25,7 @@ #' x <- population + tau * rnorm(population_size, 0, 1) #' #' population <- cbind(population, x) -#' rss_sample(population, n, H, n_rankers, with_replacement) +#' rss_sample(population, n, H, n_rankers, replace) #' #> [,1] [,2] [,3] [,4] #' #> [1,] 8.910625 1 3 1 #' #> [2,] 12.317145 2 2 1 @@ -37,10 +37,10 @@ #' #> [8,] 9.134107 2 1 1 #' #> [9,] 12.960431 3 3 1 #' -rss_sample <- function(pop, n, H, K, with_replacement = FALSE) { - verify_boolean(with_replacement) +rss_sample <- function(pop, n, H, K, replace = FALSE) { + verify_boolean(replace) - if (with_replacement) { + if (replace) { return(rss_sample_w_replacement(pop, n, H, K)) } return(rss_sample_wo_replacement(pop, n, H, K)) diff --git a/R/utils.R b/R/utils.R index 1a0cb5f..3fe2078 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,8 +1,17 @@ default_tolerance <- .Machine$double.eps^0.5 -# is_between <- function(x, lower, upper) { -# return(is_between_(lower, upper)(x)) -# } +get_interval <- function(sample_mean, sample_size, std_error, alpha, round_digits = NULL) { + qt_term <- qt(1 - alpha / 2, sample_size - 1) + lower_bound <- sample_mean - qt_term * std_error + upper_bound <- sample_mean + qt_term * std_error + + if (!is.null(round_digits)) { + lower_bound <- round(lower_bound, digits = round_digits) + upper_bound <- round(upper_bound, digits = round_digits) + } + + return(list(lower_bound = lower_bound, upper_bound = upper_bound)) +} is_between_ <- function(lower, upper, lower_exclude = FALSE, upper_exclude = FALSE) { return(function(x) { diff --git a/man/JPSED0F.Rd b/man/JPSED0F.Rd deleted file mode 100644 index f3dc27a..0000000 --- a/man/JPSED0F.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/JPSED0F.R -\name{JPSED0F} -\alias{JPSED0F} -\title{This function Computes JPS estimator and its variance} -\usage{ -JPSED0F(RV, Y, set_size, Coef, N, Replace, Model) -} -\arguments{ -\item{RV}{Ranking values provided} - -\item{Y}{The data values that were ranked} - -\item{set_size}{The set size} - -\item{Coef}{Coefficients} - -\item{N}{Population size} - -\item{Replace}{Logical. Sample with replacement?} - -\item{Model}{If model is 0, it's design based inference, if model = 1, it is model based inference using super population model.} -} -\value{ -A vector with two elements: the mean and variance estimate from JPS -} -\description{ -This function Computes JPS estimator and its variance -} -\keyword{internal} diff --git a/man/JPSLF.Rd b/man/JPSLF.Rd deleted file mode 100644 index 027a423..0000000 --- a/man/JPSLF.Rd +++ /dev/null @@ -1,32 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/JPSLF.R -\name{JPSLF} -\alias{JPSLF} -\title{Judgement Post Stratification List Function} -\usage{ -JPSLF( - data, - set_size, - replace, - population_size, - model, - estimator = "SD Weighted" -) -} -\arguments{ -\item{data}{Data to use to estimate.} - -\item{set_size}{Number of samples which are ranked in each set.} - -\item{replace}{Logical. Are samples drawn with replacement?} - -\item{population_size}{Population size of the population which \code{data} is taken from.} - -\item{model}{Whether to use design-based approach or super-population (model based) approach} - -\item{estimator}{Which type of estimator to return. Default is "SD Weighted". Other options are "Unweighted", "Aggregate Weight", "JPS Estimate", "SRS estimate", "Minimum"} -} -\description{ -Judgement Post Stratification List Function -} -\keyword{internal} diff --git a/man/ListF.Rd b/man/ListF.Rd deleted file mode 100644 index 3e8dbe5..0000000 --- a/man/ListF.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/ListF.R -\name{ListF} -\alias{ListF} -\title{Title} -\usage{ -ListF(data, set_size, Replace, N, Model, CoefD0) -} -\arguments{ -\item{data}{The data} - -\item{set_size}{set size} - -\item{Replace}{replacement} - -\item{N}{Population size} - -\item{Model}{Model} - -\item{CoefD0}{Coef} -} -\description{ -Title -} -\keyword{internal} diff --git a/man/RSSED2F.Rd b/man/RSSED2F.Rd deleted file mode 100644 index 7f36ed5..0000000 --- a/man/RSSED2F.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RSSED2F.R -\name{RSSED2F} -\alias{RSSED2F} -\title{This function Computes RSS estimator and its variance without replacement sampling} -\usage{ -RSSED2F(RV, Y, H, N) -} -\arguments{ -\item{RV}{Rank values} - -\item{Y}{Response variable} - -\item{H}{Set size} - -\item{N}{Population size} -} -\description{ -This function Computes RSS estimator and its variance without replacement sampling -} -\keyword{internal} diff --git a/man/jps_sample.Rd b/man/jps_sample.Rd index 8a8d66b..44396ec 100644 --- a/man/jps_sample.Rd +++ b/man/jps_sample.Rd @@ -16,8 +16,6 @@ jps_sample(pop, n, H, tau, K, with_replacement = FALSE) \item{tau}{A parameter which controls ranking quality.} \item{K}{Number of rankers.} - -\item{with_replacement}{A boolean which specifies whether to sample with replacement or not.} } \value{ A matrix with ranks from each ranker. diff --git a/man/rss_estimate.Rd b/man/rss_estimate.Rd index 003146d..b1c87b6 100644 --- a/man/rss_estimate.Rd +++ b/man/rss_estimate.Rd @@ -2,24 +2,28 @@ % Please edit documentation in R/rss_estimate.R \name{rss_estimate} \alias{rss_estimate} -\title{This function provides estimator for RSS data} +\title{This function provides an estimator for RSS data} \usage{ -rss_estimate(data, set_size, replace, model, N, alpha) +rss_estimate(data, set_size, replace, model_based, pop_size, alpha) } \arguments{ -\item{data}{An n by (K+1) dimensional data matrix, the first column is Y-values, the next K coulumns are the ranks of K-ranking methods} +\item{data}{A matrix with a response variable in the first column and ranks in the following columns.} -\item{set_size}{The set size} +\item{set_size}{Set size for each ranking group.} -\item{replace}{Logical. Sample with replacement?} +\item{replace}{A boolean which specifies whether to sample with replacement or not.} -\item{model}{If Modle=0, use design based inference, if model=1, use superpopulation model} +\item{model_based}{An inference mode: +\itemize{ +\item \code{FALSE}: design based inference +\item \code{TRUE}: model based inference using super population model +}} -\item{N}{The population size} +\item{pop_size}{The population size. Must be provided if sampling without replacement, or if \code{model} is \code{1}.} -\item{alpha}{The significance level} +\item{alpha}{A significance level.} } \description{ -This function provides estimator for RSS data +This function provides an estimator for RSS data } \keyword{internal} diff --git a/man/rss_estimate_single.Rd b/man/rss_estimate_single.Rd new file mode 100644 index 0000000..050313a --- /dev/null +++ b/man/rss_estimate_single.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rss_estimate_single.R +\name{rss_estimate_single} +\alias{rss_estimate_single} +\title{This function Computes RSS estimator and its variance without replacement sampling} +\usage{ +rss_estimate_single(first_ranker_ranks, y, set_size, pop_size) +} +\arguments{ +\item{first_ranker_ranks}{Ranks from the first ranker.} + +\item{y}{Response variable.} + +\item{set_size}{Set size.} + +\item{pop_size}{Population size.} +} +\description{ +This function Computes RSS estimator and its variance without replacement sampling +} +\keyword{internal} diff --git a/man/rss_sample.Rd b/man/rss_sample.Rd index 5cdc3e8..2c2f23a 100644 --- a/man/rss_sample.Rd +++ b/man/rss_sample.Rd @@ -4,7 +4,7 @@ \alias{rss_sample} \title{Generate ranked set sampling (RSS) on the population provided.} \usage{ -rss_sample(pop, n, H, K, with_replacement = FALSE) +rss_sample(pop, n, H, K, replace = FALSE) } \arguments{ \item{pop}{Population that will be sampled with an auxiliary parameter in the second column.} @@ -15,7 +15,7 @@ rss_sample(pop, n, H, K, with_replacement = FALSE) \item{K}{Number of rankers.} -\item{with_replacement}{A boolean which specifies whether to sample with replacement or not.} +\item{replace}{A boolean which specifies whether to sample with replacement or not.} } \value{ A matrix with ranks from each ranker. @@ -29,7 +29,7 @@ population_size <- 600 # the number of samples to be ranked in each set H <- 3 -with_replacement <- FALSE +replace <- FALSE sigma <- 4 mu <- 10 n_rankers <- 3 @@ -42,7 +42,7 @@ tau <- sigma * sqrt(1 / rho^2 - 1) x <- population + tau * rnorm(population_size, 0, 1) population <- cbind(population, x) -rss_sample(population, n, H, n_rankers, with_replacement) +rss_sample(population, n, H, n_rankers, replace) #> [,1] [,2] [,3] [,4] #> [1,] 8.910625 1 3 1 #> [2,] 12.317145 2 2 1 diff --git a/tests/testthat/test-rss_sample.R b/tests/testthat/test-rss_sample.R index 8bf1a25..6bbd000 100644 --- a/tests/testthat/test-rss_sample.R +++ b/tests/testthat/test-rss_sample.R @@ -20,5 +20,5 @@ test_that("RSS has a correct output.", { }) test_that("Inputs are valid.", { - expect_error(rss_sample(1:10, -100, 10, 1, "T"), "`with_replacement` must be a boolean.") + expect_error(rss_sample(1:10, -100, 10, 1, "T"), "`replace` must be a boolean.") }) From 8c2fc61a567f2345be6f91424ffba7dd4e96b09d Mon Sep 17 00:00:00 2001 From: Wasin Pipattungsakul Date: Tue, 21 May 2024 21:53:52 +0930 Subject: [PATCH 08/13] update examples --- R/rss_jps_estimate.R | 1 + Readme.md | 114 +++++++++++++++++++++++++++++++------------ 2 files changed, 85 insertions(+), 30 deletions(-) diff --git a/R/rss_jps_estimate.R b/R/rss_jps_estimate.R index 77a8bda..cd2147f 100644 --- a/R/rss_jps_estimate.R +++ b/R/rss_jps_estimate.R @@ -33,6 +33,7 @@ #' rhos <- rep(0.75, n_rankers) #' taus <- sigma * sqrt(1 / rhos^2 - 1) #' population <- qnorm((1:population_size) / (population_size + 1), mu, sigma) +#' #' data <- RankedSetSampling::jps_sample(population, n, H, taus, n_rankers, with_replacement) #' data <- data[order(data[, 2]), ] #' diff --git a/Readme.md b/Readme.md index 9ec2fb7..cbfe264 100644 --- a/Readme.md +++ b/Readme.md @@ -31,7 +31,9 @@ implement Ranked Set Sampling in practice. * [JPS Sampling](#jps-sampling) * [RSS Sampling](#rss-sampling) * [Installation](#installation) -* [Example of use](#example-of-use) +* [Examples](#examples) + * [JPS Sample and Estimator](#jps-sample-and-estimator) + * [JPS Sample and Estimator](#jps-sample-and-estimator-1) * [Citing this package](#citing-this-package) * [Related Reference](#related-reference) @@ -60,35 +62,87 @@ if(!require("remotes")) install.packages("remotes") remotes::install_github("biometryhub/RankedSetSampling", upgrade = FALSE) ``` -## Example of use - - - -This package includes some example data files, which can be seen at -[population](reference/population.html) and -[emergence\_ranks](reference/emergence_ranks.html). After installing the -package as above, the package can be used as in the following example: - -``` r -# load the package -library(RankedSetSampling) - -# Compute the JPS estimators - -JPS.Estimates <- OneSample(data = emergence_ranks, set_size = 4, - method = "JPS", confidence = 0.95, - replace = TRUE, model = 0, - pop_size = nrow(population)) - -print(JPS.Estimates) -#> Estimator Estimate Standard Error 95% Confidence intervals -#> 1 UnWeighted 1.117 0.238 0.606,1.627 -#> 2 Sd.Weighted 1.114 0.253 0.572,1.656 -#> 3 Aggregate Weight 1.108 0.210 0.657,1.559 -#> 4 JPS Estimate 1.021 0.269 0.443,1.599 -#> 5 SRS estimate 1.200 0.262 0.638,1.762 -#> 6 Minimum 1.108 0.210 0.657,1.559 -``` +## Examples + +### JPS Sample and Estimator + +
+ snippet + + ``` r + set.seed(112) + population_size <- 600 + # the number of samples to be ranked in each set + H <- 3 + + with_replacement <- FALSE + sigma <- 4 + mu <- 10 + n_rankers <- 3 + # sample size + n <- 30 + + rhos <- rep(0.75, n_rankers) + taus <- sigma * sqrt(1 / rhos^2 - 1) + population <- qnorm((1:population_size) / (population_size + 1), mu, sigma) + + data <- RankedSetSampling::jps_sample(population, n, H, taus, n_rankers, with_replacement) + data <- data[order(data[, 2]), ] + + RankedSetSampling::rss_jps_estimate( + data, + set_size = H, + method = "JPS", + confidence = 0.80, + replace = with_replacement, + model_based = FALSE, + pop_size = population_size + ) + #> Estimator Estimate Standard Error 80% Confidence intervals + #> 1 UnWeighted 9.570 0.526 8.88,10.26 + #> 2 Sd.Weighted 9.595 0.569 8.849,10.341 + #> 3 Aggregate Weight 9.542 0.500 8.887,10.198 + #> 4 JPS Estimate 9.502 0.650 8.651,10.354 + #> 5 SRS estimate 9.793 0.783 8.766,10.821 + #> 6 Minimum 9.542 0.500 8.887,10.198 + ``` +
+ +### JPS Sample and Estimator + +
+ snippet + + ``` r + set.seed(112) + + # SBS sample size, PPS sample size + sample_sizes <- c(5, 5) + + n_population <- 233 + k <- 0:(n_population - 1) + x1 <- sample(1:13, n_population, replace = TRUE) / 13 + x2 <- sample(1:8, n_population, replace = TRUE) / 8 + y <- (x1 + x2) * runif(n = n_population, min = 1, max = 2) + 1 + measured_sizes <- y * runif(n = n_population, min = 0, max = 4) + + population <- matrix(cbind(k, x1, x2, measured_sizes), ncol = 4) + sample_result <- sbs_pps_sample(population, sample_sizes) + + # estimate the population mean and construct a confidence interval + df_sample <- sample_result$sample + sample_id <- df_sample[, 1] + y_sample <- y[sample_id] + + sbs_pps_estimates <- sbs_pps_estimate( + population, sample_sizes, y_sample, df_sample, + n_bootstrap = 100, alpha = 0.05 + ) + print(sbs_pps_estimates) + #> n1 n2 Estimate St.error 95% Confidence intervals + #> 1 5 5 2.849 0.1760682 2.451,3.247 + ``` +
## Citing this package From ed22be5f8e7c7a10c615e9b4ce5e2019b44a202c Mon Sep 17 00:00:00 2001 From: Wasin Pipattungsakul Date: Tue, 21 May 2024 21:56:49 +0930 Subject: [PATCH 09/13] fix typo --- Readme.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Readme.md b/Readme.md index cbfe264..e06584f 100644 --- a/Readme.md +++ b/Readme.md @@ -33,7 +33,7 @@ implement Ranked Set Sampling in practice. * [Installation](#installation) * [Examples](#examples) * [JPS Sample and Estimator](#jps-sample-and-estimator) - * [JPS Sample and Estimator](#jps-sample-and-estimator-1) + * [SBS PPS Sample and Estimator](#sbs-pps-sample-and-estimator) * [Citing this package](#citing-this-package) * [Related Reference](#related-reference) @@ -67,7 +67,7 @@ remotes::install_github("biometryhub/RankedSetSampling", upgrade = FALSE) ### JPS Sample and Estimator
- snippet + JPS sample and estimator ``` r set.seed(112) @@ -108,10 +108,10 @@ remotes::install_github("biometryhub/RankedSetSampling", upgrade = FALSE) ```
-### JPS Sample and Estimator +### SBS PPS Sample and Estimator
- snippet + SBS PPS sample and estimator ``` r set.seed(112) From f59f59bb2b598e228d2f791974ed8cc01c661c17 Mon Sep 17 00:00:00 2001 From: Wasin Pipattungsakul Date: Wed, 22 May 2024 08:24:29 +0930 Subject: [PATCH 10/13] fix undocumented arguments --- R/calculate_agreement_weights.R | 2 +- R/jps_sample.R | 6 +++--- R/sbs_pps_sample.R | 1 + man/calculate_agreement_weights.Rd | 4 ++-- man/get_sbs_pps_sample_indices.Rd | 2 ++ man/jps_sample.Rd | 4 +++- man/rss_jps_estimate.Rd | 1 + 7 files changed, 13 insertions(+), 7 deletions(-) diff --git a/R/calculate_agreement_weights.R b/R/calculate_agreement_weights.R index e2e813f..c42bfab 100644 --- a/R/calculate_agreement_weights.R +++ b/R/calculate_agreement_weights.R @@ -1,6 +1,6 @@ #' Calculate Agreement Weights #' -#' @param RV Ranking values +#' @param ranks Ranking values #' @param set_size Set size #' #' @return A vector of agreement weights diff --git a/R/jps_sample.R b/R/jps_sample.R index a1d92c0..97fd3e0 100644 --- a/R/jps_sample.R +++ b/R/jps_sample.R @@ -36,10 +36,10 @@ #' #> [9,] 8.701285 2 1 2 #' #> [10,] 13.323884 3 3 3 #' -jps_sample <- function(pop, n, H, tau, K, with_replacement = FALSE) { - verify_jps_params(pop, n, H, tau, K, with_replacement) +jps_sample <- function(pop, n, H, tau, K, replace = FALSE) { + verify_jps_params(pop, n, H, tau, K, replace) - sampling_matrix <- matrix(sample(pop, n * H, replace = with_replacement), ncol = H, nrow = n) + sampling_matrix <- matrix(sample(pop, n * H, replace = replace), ncol = H, nrow = n) # rank each SRS unit post experimentally jps_matrix <- matrix(0, ncol = K + 1, nrow = n) diff --git a/R/sbs_pps_sample.R b/R/sbs_pps_sample.R index bdef29c..dbb6359 100644 --- a/R/sbs_pps_sample.R +++ b/R/sbs_pps_sample.R @@ -74,6 +74,7 @@ sbs_pps_sample <- function(population, n, n_cores = getOption("n_cores", 1)) { #' 1. Halton numbers #' 2. Size measurements of population units #' @param n Sample sizes (SBS sample size, PPS sample size). +#' @param with_unique_pps A boolean to specify whether to use unique indices for pps sample or not. #' #' @return A named list of: #' - sbs_pps_indices: sample indices diff --git a/man/calculate_agreement_weights.Rd b/man/calculate_agreement_weights.Rd index c5d03ea..141d8fe 100644 --- a/man/calculate_agreement_weights.Rd +++ b/man/calculate_agreement_weights.Rd @@ -7,9 +7,9 @@ calculate_agreement_weights(ranks, set_size) } \arguments{ -\item{set_size}{Set size} +\item{ranks}{Ranking values} -\item{RV}{Ranking values} +\item{set_size}{Set size} } \value{ A vector of agreement weights diff --git a/man/get_sbs_pps_sample_indices.Rd b/man/get_sbs_pps_sample_indices.Rd index f61003e..1999595 100644 --- a/man/get_sbs_pps_sample_indices.Rd +++ b/man/get_sbs_pps_sample_indices.Rd @@ -14,6 +14,8 @@ get_sbs_pps_sample_indices(population, n, with_unique_pps = FALSE) }} \item{n}{Sample sizes (SBS sample size, PPS sample size).} + +\item{with_unique_pps}{A boolean to specify whether to use unique indices for pps sample or not.} } \value{ A named list of: diff --git a/man/jps_sample.Rd b/man/jps_sample.Rd index 44396ec..f78ebba 100644 --- a/man/jps_sample.Rd +++ b/man/jps_sample.Rd @@ -4,7 +4,7 @@ \alias{jps_sample} \title{Generate JPS sampling on the provided population.} \usage{ -jps_sample(pop, n, H, tau, K, with_replacement = FALSE) +jps_sample(pop, n, H, tau, K, replace = FALSE) } \arguments{ \item{pop}{Population that will be sampled with an auxiliary parameter in the second column.} @@ -16,6 +16,8 @@ jps_sample(pop, n, H, tau, K, with_replacement = FALSE) \item{tau}{A parameter which controls ranking quality.} \item{K}{Number of rankers.} + +\item{replace}{A boolean which specifies whether to sample with replacement or not.} } \value{ A matrix with ranks from each ranker. diff --git a/man/rss_jps_estimate.Rd b/man/rss_jps_estimate.Rd index 78023a7..c85295b 100644 --- a/man/rss_jps_estimate.Rd +++ b/man/rss_jps_estimate.Rd @@ -61,6 +61,7 @@ n <- 30 rhos <- rep(0.75, n_rankers) taus <- sigma * sqrt(1 / rhos^2 - 1) population <- qnorm((1:population_size) / (population_size + 1), mu, sigma) + data <- RankedSetSampling::jps_sample(population, n, H, taus, n_rankers, with_replacement) data <- data[order(data[, 2]), ] From f3f5e40c4286d8a26752ddd7220988c2ba34cb58 Mon Sep 17 00:00:00 2001 From: Wasin Pipattungsakul Date: Wed, 22 May 2024 08:27:50 +0930 Subject: [PATCH 11/13] remove unrelated data file --- data/POP_Corrugated.RData | Bin 4226 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 data/POP_Corrugated.RData diff --git a/data/POP_Corrugated.RData b/data/POP_Corrugated.RData deleted file mode 100644 index b969384d0fdf5fbc849129e7bf953809a96dfde5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4226 zcmYMmbyO1y!+>!R*rcV~(H%piK|oScK?w)Kl}Sjq)EMk4jdUX;q(h{H(IO$O2$vWm zCX55eJ*DE{rKRWc##&Vh}v$;=1q zeX|1V@~RDClSI9gk5nSUCOhJM!_lc3O%U>UO%tcS9qhURT#Az?tmGypU^ni zBKm1<;x*4Sj~jNmU3nN4Bp4-V-U^yBmC!u-Tc4_c^@*|#gu*1jN$>IB;)1M?biS*q zsa(n=#J_hOrnlu`mIiY=hLDUY&w^=Ij4)@Ed;HPW@X^7-gzALRf!RK&^J`~+aFG0| zJiUCfycyU`egZrpF9UY{YjUV%un?ij5pcJHZ8Kt=DwmQ4hTS}HPeyYSAMVjM$qn?} zM$rfJ>?JnI4S6S`*@?`1CZFWK^{^7}?Tvks8}8vn@dc~z5&qe4YV7N|MKs=%YEpL! z>BGmNnZQv=^N|)}So4vhRCR$61ANjlSBHW>2Ud2(Ar-|RqK{8lX6umhSH^mepi@x- zA%^&rWxfveqh}*F0*-GZ*!&r>cSkf+!}-tj@bSwm9b*1+*cT(+so?@=2KeM|LH{BS#(x=&1D1mR#ow?ympV9L3Fu!W!N8aOIA9s*U&O#}Ukc!2k)VH( z0299~!Nrz>{);L7;=~6Wk+PTeCmpzenXOXLI~eEXU%1$k{~-mYa=C_!Ed%|FXc*-s z6V4C``VS(`B)+7OYRj%t^-Pk-_ndzlZJqQvC zjlM#x_BSAOYCeUm!hHT*aLc2n!+U*OS0!2=N#SpHcnCtELzSF$?q^h5)K)X#oE;$ErlP63o#3b_%Tdxqaoj> z#ZBeS&ztR=lc>uZ-~+08c_d_Iqq_^OH#7y>m28*>hUHz&{c3!Ft5xrfpw*jP)6DWn zSnx2To|35fZ<99i`fcZ}c)K3xK-vM$wwVm~GEoy>oUzPvT_@FGh0)5PrxS%k z10FAtcI#dmM}3=`4Yk{os9g;cD~h4gk(8y6g1X_YDu7zSpM`U5Sm6kvHmp{G8o-vCnwx#nMXHvUxHY-L}uE|Oo~M@kUP6& zyc=!f;)f7QuDC1T(WQ_?vi4L#xy|z*JAsze2mNWng7|ezs;RWM{?*2-ZA_U_0pl!W5j)6EcC^` z<{Nt4WRRcBpQNPZo`d50Ryt0}s4>*tddkgA)!<0M_t9xoKlyTIGooiRMNm$_X%ns1 z$(#Fn=jf?1&;5t+!#-8Nrc9nV$L}VSpD$ScR?n|$306$K3K-}xNFhJ5fuPzcWrUo6 zW^WE&THF!}MI_i|se+Rm{guis$n>}4dzLm#2g*`)ffh&65RmY{cp0ejP+HA6ZnZ1&fdms6ll;h>2R^^hCJ=^t%sl= zo>QDEyIy)x)I9A>HhC>D{dwy=A0K8cN@HWTD6PhL{}Cgq+|E!7^&Hx$Atm zRzeNr!0J{9VylMjbm!5HQ@{-|+&kttV6pyc1%RBN`u?J6gC;L^0ftMNB1eYTFY-41 z@o`#S4h(u*sXVR4)x`dcChc17cS}u`i<9_7F(l7he<+<7)bWSvfaR>{820M}W9iS3 zqVWzd;KN;u)1f*Gpx;nS2Ya9*GJ}@()Y>rh(3Wss`6Uq94(Umd{7B)vZr;3^9?JhC z@G!{TS$STnAlKyhVJ=icSHokg5xg)ks=J&Ie z-|vx&Vet#L>GyO5hks;shZx5(wIgEu?GZc63du2Nr|!vL_dbCir&_6_PZKQOjQVk% zILE?OWyUO_-l|;Z`?e;B)xu5@U3R3CKn?LOCXVdd8Gc>o0a2>6t<+H#Dn2144$67N zwe2X00ypbV{jB`B|v92lZ6XH%+pQh*Sds zPowQZ*ZQ8)dHS|#nu{tHe{%8Z)ic6rWfL8TKuO=7ACbO_Nr6QQXPJXANj?-F9OvQ>y~mRDmm3?dUj{C zQCpS}EX;O=w58W?MJYV!fT+I?~4ZR4l-YPYiVVv#(u?8)`-*;?py=lzmr#(zRfeB>`RddQ*B7&U{C-BfZv z`ccBdKD6p82QATr&?ZJ)k6Ds-p@AN8&n>qVoic}SKAO}}6=%#<2gkd@`>p)go?Yt! zsv85^_dG8`_^f!CUb}3y#th9aMe@aF$M}U*sc)?)rD}ez6rQ(dV#SE;&D2_vhrNq1 zWGrKPn*NVZ?4F2@+v-U^r3yvvh^WZ`2(GWA7wySXs`F^|PwCy* zNKGdAY5M7AQ)~aJc5>26!y(nNj5Y>8XmI+Q8yYy&XxG^#3tm_fU01pU#u>XczM&{P1YKn0Ga;DbecK5pTj$==iI(h_(`5@dI0T^rf{yMtu4~R?VHB0bs>alLv1B0r5!5`G>S99bJNF#C`8;8s3~j zsp44~coQt6wE5M2T*!U79S0wvmE>gD&=d`YkSW|FmZX0|wMZdxXu%pCT>& zxRZ=u@@x)VPt(5Xvry8H_GTHV7ozH63cZX0u_uDRqe%7soQsiI9);r%EEwpzp z)1KGQDuHDiv-qx-c!e9qsZng3h#%xG!BuUHxkt4zV(fSTKt#Ybp_K;*i0Jg>E+ebU z_WYRe`C+cLXL_r$5L;%}{5Vc!iDr?)xDwJ$AUoao`kol8(&wjDVS8)=;8fR8pUStn zJrpLRsC2fgeSzJ;YK@mgck32#sqN^8xpMAhO!-oEYcxa+2j1mO`nVi%@`^?L5coON z+*QXx%qy&VUUxNKbd1MBF1paC(F6Fjzm-0q-diyd#h_#(s>6 zWl)-Eg+6I~Bbs5osZNbJK_)r}Dy~qZo`Xz`72iLP=|#;YP55Dx$(Dp63;Sp`aC}Om z(c?_u#8WQDdRBJMVy%&x?ngibpS}XrhYw(OQ$ka=!-H);^mSt^V{ob9lXicWCfLQa z{asc!{IUCpG;$w9_Pm?XqyE7UI4L;i8!r=?b*VYJK;9ATO}8w3*OQVpERROFh#F8| zs`^)?-8AA|X-dzYt*zpF->m0a0%l zc((|G-|^qTWpjpui5o%?yP(P{`kK=|dgRPhnd>&TRjeKIOxA*RB4b?Zu<{Z&( zOuK$7^mjab3`sS?V-8x<8B$ODsp&>Qa;Sd$)0p>DJ8G?mR6(kQ|C;(RM`)pOb2iTk zrJ{~GZm_#!C$A0IxcB{+%Tmn3;qF{Q!BuqqZo!keCDC@jaWhquQXdC5+_>d=B4&HS`t7sK{JB`|IM(bSSpt@V|)s7~;( zl*mC4K*4G&(HbVxM~HmPWtT2vTl}#tpj-)_FhX!yd*_s!9u8f7Js}33QxOV06rr&7 zi0Tb^<$edut|YC%Sa<7`*6d;|k^pa5f8vrge!EIzE*yG?kNQ#rp^&2%g2Z00Muc`Km8@e%x0u97My ztR{V?8^Q(wF)HKtbD5s&ROO1Wr0{PXFkrJ zZOkN&9{4IJhwr7m2Uz;T|2-s(lkO%yyoAkef&0(bVk->T54Vgj@ZOOhM8QK2 Date: Wed, 22 May 2024 09:17:10 +0930 Subject: [PATCH 12/13] minor changes --- .Rbuildignore | 4 +++- R/inclusion_probability.R | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 2725271..d3912c6 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -8,4 +8,6 @@ ^Readme.Rmd ^Readme.md JPS_multiranker_Min.pdf -example.R +^.devcontainer/ +^assets/ +.lintr diff --git a/R/inclusion_probability.R b/R/inclusion_probability.R index 50b88ca..81c61b4 100644 --- a/R/inclusion_probability.R +++ b/R/inclusion_probability.R @@ -53,7 +53,7 @@ calculate_inclusion_prob <- function(size_measurement, n, n_cores = getOption("n total_size <- sum(size_measurement) # switch between a single core and multiple cores - if (n_population > 250 && require(parallel) && n_cores > 1) { + if (n_population > 250 && requireNamespace("parallel") && n_cores > 1) { n_cores <- parallel::detectCores() - 1 # to avoid failure from devtools::check From 2bdaae322bcce266107cb2336146b5c31ceb1100 Mon Sep 17 00:00:00 2001 From: Wasin Pipattungsakul Date: Wed, 22 May 2024 09:51:09 +0930 Subject: [PATCH 13/13] update doc --- R/sbs_pps_sample.R | 5 +++-- man/sbs_pps_sample.Rd | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/R/sbs_pps_sample.R b/R/sbs_pps_sample.R index dbb6359..03b62d1 100644 --- a/R/sbs_pps_sample.R +++ b/R/sbs_pps_sample.R @@ -6,13 +6,14 @@ #' 3. X2-coordinate of population unit #' 4. Size measurements of population units #' @param n Sample sizes (SBS sample size, PPS sample size). -#' @param n_cores The number of cores to be used for computational tasks (specify 0 for max). +#' @param n_cores The number of cores to be used for computational tasks (specify 0 for max). This can also be +#' set by calling `options`, e.g., `options(n_cores = 2)`. #' #' @return A named list of: #' - heatmap: heat map of the sample #' - sample: SBS PPS sample of the population #' @export - +#' #' @examples #' set.seed(112) #' diff --git a/man/sbs_pps_sample.Rd b/man/sbs_pps_sample.Rd index a27c48b..e06e40a 100644 --- a/man/sbs_pps_sample.Rd +++ b/man/sbs_pps_sample.Rd @@ -17,7 +17,8 @@ sbs_pps_sample(population, n, n_cores = getOption("n_cores", 1)) \item{n}{Sample sizes (SBS sample size, PPS sample size).} -\item{n_cores}{The number of cores to be used for computational tasks (specify 0 for max).} +\item{n_cores}{The number of cores to be used for computational tasks (specify 0 for max). This can also be +set by calling \code{options}, e.g., \code{options(n_cores = 2)}.} } \value{ A named list of: