Merge pull request #3 from biometryhub/feature/unit-test

Feature/unit test
biometryhub · May 16, 2024 · 3965b63 · 3965b63
2 parents 79e43b5 + fb6e69f
commit 3965b63
Show file tree

Hide file tree

Showing 22 changed files with 849 additions and 561 deletions.
diff --git a/R/CoefF.R b/R/CoefF.R
@@ -1,23 +1,21 @@
 #' This function computes the coefficient of variance estimator
 #'
-#' @param H The set size
-#' @param n The sample size
+#' @param H Set size for each raking group.
+#' @param n Sample size.
 #'
 #' @return
 #' @keywords internal
 
-CoefF <- function(H, n) {
-
+calculate_coefficients <- function(H, n) {
   kv <- 1:H
   ######################################################
   # Expected value of I_1^2/d_n^2    ###################
-  E.I2.dn2 <- sum((kv / H)^(n - 1)) / H^2 # E{(I_1/d_n)^2}    ##
+  E.I2.dn2 <- sum((kv / H)^(n - 1)) / H^2
   ######################################################
 
   ######################################################
   # Compute the expected value I_1^2/(n_1 d_n^2)      ##
   ######################################################
-
   indM <- rep(0, 3)
   for (k in (2:H)) {
     AA <- expand.grid((1:(k - 1)), 1:(n - k + 1))

diff --git a/R/FWDel1.R b/R/FWDel1.R
@@ -1,3 +1,4 @@
+# TODO: find out what this does
 FWDel1 <- function(u, AWY) {
   AWY1 <- AWY[-u, ]
   eff.SamSiz <- apply(AWY1[, -1], 2, sum)

diff --git a/R/JPSD2F.R b/R/JPSD2F.R
@@ -1,40 +1,28 @@
-#' Title
+#' Generate JPS sampling without replacement on the provided population.
 #'
-#' @param pop Population
-#' @param n sample size
-#' @param H Set size
-#' @param tau controls the ranking quality
-#' @param K Number of rankers
+#' @inheritParams RSS
+#' @param tau A parameter which controls ranking quality.
 #'
-#' @return
-#' @keywords internal
+#' @return A matrix with ranks from each ranker.
 #'
-JPSD2F <- function(pop, n, H, tau, K) {
-  #############################################
-  # Ths function generates JPS sample        ##
-  #############################################
-  # K: Number of rankers
-  # tau: controls the ranking quality
-  # n:sample size
-  # H: Set szie
-  # pop: population
-  pop_size <- length(pop) # population size
-  nsets <- matrix(sample(pop, n * H), ncol = H, nrow = n)
-  #################################################
-  # below  consruct rank for each SRS unit post experimentally
-  JPS <- matrix(0, ncol = K + 1, nrow = n) # store JPS sample
-  ##############################################
+JPSD2F <- function(pop, n, H, tau, K, with_replacement = FALSE) {
+  verify_jps_params(pop, n, H, tau, K, with_replacement)
+
+  sampling_matrix <- matrix(sample(pop, n * H, replace = with_replacement), ncol = H, nrow = n)
+
+  # rank each SRS unit post experimentally
+  jps_matrix <- matrix(0, ncol = K + 1, nrow = n)
   for (i in (1:n)) {
-    Set <- nsets[i, ] # select compariosn set i
-    tem <- rep(0, K) # initialize to store ranks of he rankers for ocm,parion set i
+    comparison_set <- sampling_matrix[i, ]
+    ranks <- rep(0, K)
     for (k in (1:K)) {
-      DCSet <- Set + tau[k] * rnorm(H, 0, 1) # adjust ranking quality using Dell-Clutter
-      # model
-      RankSet <- rank(DCSet) # ranks the units in the comparion set i by ranker k
-      tem[k] <- RankSet[1] #  the rank of the i-th mesured unit by ranker k
+      # adjust for ranking, Dell and Clutter
+      adjusted_set <- comparison_set + tau[k] * rnorm(H, 0, 1)
+      ranks[k] <- rank(adjusted_set)[1]
     }
-    JPS[i, ] <- c(Set[1], tem) # meaured value of unit i and ranks by k rankers
+    jps_matrix[i, ] <- c(comparison_set[1], ranks)
   }
-  colnames(JPS) <- c("Y", paste("R", 1:K, sep = ""))
-  return(JPS)
+
+  colnames(jps_matrix) <- c("Y", paste0("R", 1:K))
+  return(jps_matrix)
 }
diff --git a/R/JPSEDF.R b/R/JPSEDF.R
@@ -1,82 +1,102 @@
 #' Computes the estimator and variance for each individual ranker
 #'
-#' @param RV Rank values for Y.
+#' @param ranks Ranks of Y.
 #' @param Y Response measurements.
-#' @param set_size Set Size.
+#' @param set_size Set size for each raking group.
 #' @param N Finite population size.
 #' @param coef Coefficients used in variance computation when sample size is n.
 #' @param coef_del Coefficients used in variance computation when the i-th unit is deleted.
 #' @param replace Logical. Sample with replacement?
-#' @param model If model is 0, it's design based inference, if model = 1, it is model based inference using super population model.
-#' @param K
+#' @param model An inference mode:
+#' - `0`: design based inference
+#' - `1`: model based inference using super population model
+#' @param K Number of rankers.
 #'
 #' @return
 #' @keywords internal
 #'
-JPSEDF <- function(RV, Y, set_size, N, coef, coef_del, replace, model, K) {
-  n <- length(Y)
-  M.est <- mean(aggregate(Y, data.frame(RV), mean)$x) # JPS estimate
-  Y.ij <- expand.grid(Y, Y)
+JPSEDF <- function(ranks, Y, set_size, N, coef, coef_del, replace, model, K) {
+  y_ij <- expand.grid(Y, Y)
+
+  # count ranks including zeros
+  rank_count <- rep(0, set_size)
+  agg_rank <- aggregate(ranks, data.frame(ranks), length)
+  rank_count[agg_rank$ranks] <- agg_rank$x
+
+  n_non_empty_ranks <- dim(agg_rank)[[1]]
+  n_two_plus_ranks <- length(rank_count[rank_count > 1])
+
+  rank_ij <- expand.grid(ranks, ranks)
+  y_ij_diff2 <- (y_ij[, 1] - y_ij[, 2])^2
+
+  # group squared differences based on judgement classes
+  agg_ss <- aggregate(y_ij_diff2, rank_ij, sum)
+  paired_count <- cbind(rank_count[agg_ss[, 1]], rank_count[agg_ss[, 2]])
+  count_ii <- paired_count[agg_ss[, 1] == agg_ss[, 2], 1]
+  count_non_ii <- paired_count[agg_ss[, 1] != agg_ss[, 2], ]
+
+  # nh(nh - 1)
+  nh_nh1 <- count_ii * (count_ii - 1)
+  agg_ss_ii <- agg_ss[agg_ss[, 1] == agg_ss[, 2], 3]
+  # sum_h sum_i sum_j (Y_i=Y_j)^2I(R_i=set_size)I(R_j=set_size')/(nh(nh-1))
+  tt2 <- sum(agg_ss_ii[nh_nh1 > 0] / nh_nh1[nh_nh1 > 0])
+  if (dim(count_non_ii)[1] != 0) {
+    # sum_h sum_h'sum_i sum_j ( Y_i=Y_j)^2I(R_i=set_size)I(R_j=set_size')/(nh nh'))
+    tt1 <- sum(agg_ss[agg_ss[, 1] != agg_ss[, 2], 3] / (count_non_ii[, 1] * (count_non_ii[, 2])))
+  } else {
+    tt1 <- 0
+  }
 
-  GSV <- rep(0, set_size) # Group sample size vector
-  TemSS <- aggregate(RV, data.frame(RV), length)
-  GSV[TemSS[, 1]] <- TemSS$x # Judgment group sample sizes. Some would be zero.
-  dn <- length(GSV[GSV > 0]) # the nonempty judgment groups
-  dn.star <- length(GSV[GSV > 1]) # the number of judgment groups having at least two observations
-  R.hhp <- expand.grid(RV, RV)
-  Y.ij2 <- (Y.ij[, 1] - Y.ij[, 2])^2 #  squared differences of (Yi-Yj)^2
-  G.sum <- aggregate(Y.ij2, R.hhp, sum) # group  squared differences based on judgment classes
-  GS.size <- cbind(GSV[G.sum[, 1]], GSV[G.sum[, 2]]) # attach  judmgent group sample sizes
-  # to each group
-  denT2 <- GS.size[G.sum[, 1] == G.sum[, 2], 1] # determine nh from the judgment group set_size
-  denT1 <- GS.size[G.sum[, 1] != G.sum[, 2], ] # determine sample sizes  n_h and n_h'for judgment groups set_size and set_size'
-  den2 <- denT2 * (denT2 - 1) # determine the denumerator nh(nh-1) for T2
-  numT2 <- G.sum[G.sum[, 1] == G.sum[, 2], 3] # sum_h sum_i sum_j ( Y_i=Y_j)^2I(R_i=set_size)I(R_j=set_size)
-  TT2 <- sum(numT2[den2 > 0] / den2[den2 > 0]) # sum_h sum_i sum_j ( Y_i=Y_j)^2I(R_i=set_size)I(R_j=set_size')/(nh(nh-1))
-  # Line below #sum_h sum_h'sum_i sum_j ( Y_i=Y_j)^2I(R_i=set_size)I(R_j=set_size')/(nh nh'))
-  if (dim(denT1)[1] != 0) TT1 <- sum(G.sum[G.sum[, 1] != G.sum[, 2], 3] / (denT1[, 1] * (denT1[, 2]))) else TT1 <- 0
   ############################################################################
-  # Variance estimate with full data
-  M.Est <- mean(aggregate(Y, list(RV), mean)$x) # JPS estiamte with full data
-  T2s <- set_size * TT2 / (2 * dn.star)
-  T1s <- TT1 / (2 * coef[1] * dn^2)
+  estimated_mean <- mean(aggregate(Y, list(ranks), mean)$x)
+  t2s <- set_size * tt2 / (2 * n_two_plus_ranks)
+  t1s <- tt1 / (2 * coef[1] * n_non_empty_ranks^2)
   # VestD0=coef[2]*T1s/(set_size-1)+coef[3]*T2s
   if (!replace) {
-    VEST <- coef[2] * T2s + coef[3] * (N - 1) * (T1s + T2s) / (N * (set_size - 1))
-    if (VEST <= 0) VEST <- coef[2] * T2s / 2
+    estimated_variance <- coef[2] * t2s + coef[3] * (N - 1) * (t1s + t2s) / (N * (set_size - 1))
+    if (estimated_variance <= 0) {
+      estimated_variance <- coef[2] * t2s / 2
+    }
   } else {
-    VEST <- coef[2] * T1s / (set_size - 1) + coef[3] * T2s
+    estimated_variance <- coef[2] * t1s / (set_size - 1) + coef[3] * t2s
   }
+
   if (model == 1) {
-    VEST <- (T1s + T2s) / set_size^2 * ((-1 / N) + coef[2] * set_size^2 / (set_size - 1)) + T2s * ((coef[3] + coef[2]) - coef[2] * set_size / (set_size - 1))
-    if (VEST <= 0) VEST <- T2s * ((coef[3] + coef[2]) - coef[2] * set_size / (set_size - 1))
+    estimated_variance <- ((t1s + t2s) / set_size^2 * ((-1 / N) + coef[2] * set_size^2 / (set_size - 1))
+      + t2s * ((coef[3] + coef[2]) - coef[2] * set_size / (set_size - 1)))
+
+    if (estimated_variance <= 0) {
+      estimated_variance <- t2s * ((coef[3] + coef[2]) - coef[2] * set_size / (set_size - 1))
+    }
   }
   if (K == 1) {
-    ret <- c(M.Est, VEST)
-    return(ret)
+    return(c(estimated_mean, estimated_variance))
   }
 
-
-  ################################################################
-
-
   ################################################################
   ######### This part is new for Jackknife replication, delete one observations
   ########  reduces the computation time
-  ID <- 1:n # index to determine which observation is to be deleted
-  Ind.ij <- expand.grid(ID, ID) # Index of observations to be deleted
-  Y.ij2N <- Y.ij2[Ind.ij[, 1] - Ind.ij[, 2] != 0] # Remove Y_i=Y_ii
-  R.hhpN <- R.hhp[Ind.ij[, 1] - Ind.ij[, 2] != 0, ] #  Remove R_i=R_i
-  Ind.ij <- Ind.ij[Ind.ij[, 1] != Ind.ij[, 2], ] # Remove i=i
+  n <- length(Y)
+  # index to determine which observation is to be deleted
+  ID <- 1:n
+  # Index of observations to be deleted
+  Ind.ij <- expand.grid(ID, ID)
+  # Remove Y_i=Y_ii
+  Y.ij2N <- y_ij_diff2[Ind.ij[, 1] - Ind.ij[, 2] != 0]
+  #  Remove R_i=R_i
+  R.hhpN <- rank_ij[Ind.ij[, 1] - Ind.ij[, 2] != 0, ]
+  # Remove i=i
+  Ind.ij <- Ind.ij[Ind.ij[, 1] != Ind.ij[, 2], ]
   #  deltM=matrix(0,ncol=6,nrow=n) # This stores the contribtuion of each
   # deleted observation to
   # T1 and T2
 
-  INDM <- matrix(1:n, ncol = 1) # This is used in apply function  below
-  PASS <- list(Y, RV, Ind.ij, GSV, Y.ij2N, R.hhpN, G.sum, TT2, TT1) # compile additional variables in list
-  DeltM <- t(apply(INDM, 1, DELETi, PASS = PASS)) # This computes TT2, TT1, dn, dn-star
-  # for each  deleted unit "i".
-  # deltM=DeltM
+  # This is used in apply function  below
+  INDM <- matrix(1:n, ncol = 1)
+  # compile additional variables in list
+  PASS <- list(Y, ranks, Ind.ij, rank_count, Y.ij2N, R.hhpN, agg_ss, tt2, tt1)
+  # This computes TT2, TT1, dn, dn-star for each  deleted unit "i".deltM=DeltM
+  DeltM <- t(apply(INDM, 1, DELETi, PASS = PASS))
   ##################################################
   T2v.Del <- set_size * DeltM[, 4] / (2 * DeltM[, 3]) # T2 when we delete the i-th unit
   # deltM[,4]: TT2i, deltM[,3]= dn_str, the number
@@ -103,7 +123,7 @@ JPSEDF <- function(RV, Y, set_size, N, coef, coef_del, replace, model, K) {
     VEST.Del[VEST.Del <= 0] <- T2v.Del[VEST.Del <= 0] * ((coef_del[3] + coef_del[2]) - coef_del[2] * set_size / (set_size - 1))
   }
 
-  Est <- c(M.Est, VEST) # M.Est: JPS estimate with sample size n
+  Est <- c(estimated_mean, estimated_variance) # M.Est: JPS estimate with sample size n
   # VEST:  varaince estimate of JPS estimator with sample size n
   Est.Del <- VEST.Del # n-dimentional vector containing
   # varince estimate of JPS estimator for each deleted observation