Skip to content

Commit

Permalink
Merge pull request #3 from biometryhub/feature/unit-test
Browse files Browse the repository at this point in the history
Feature/unit test
  • Loading branch information
rogerssam authored May 16, 2024
2 parents 79e43b5 + fb6e69f commit 3965b63
Show file tree
Hide file tree
Showing 22 changed files with 849 additions and 561 deletions.
10 changes: 4 additions & 6 deletions R/CoefF.R
Original file line number Diff line number Diff line change
@@ -1,23 +1,21 @@
#' This function computes the coefficient of variance estimator
#'
#' @param H The set size
#' @param n The sample size
#' @param H Set size for each raking group.
#' @param n Sample size.
#'
#' @return
#' @keywords internal

CoefF <- function(H, n) {

calculate_coefficients <- function(H, n) {
kv <- 1:H
######################################################
# Expected value of I_1^2/d_n^2 ###################
E.I2.dn2 <- sum((kv / H)^(n - 1)) / H^2 # E{(I_1/d_n)^2} ##
E.I2.dn2 <- sum((kv / H)^(n - 1)) / H^2
######################################################

######################################################
# Compute the expected value I_1^2/(n_1 d_n^2) ##
######################################################

indM <- rep(0, 3)
for (k in (2:H)) {
AA <- expand.grid((1:(k - 1)), 1:(n - k + 1))
Expand Down
1 change: 1 addition & 0 deletions R/FWDel1.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# TODO: find out what this does
FWDel1 <- function(u, AWY) {
AWY1 <- AWY[-u, ]
eff.SamSiz <- apply(AWY1[, -1], 2, sum)
Expand Down
52 changes: 20 additions & 32 deletions R/JPSD2F.R
Original file line number Diff line number Diff line change
@@ -1,40 +1,28 @@
#' Title
#' Generate JPS sampling without replacement on the provided population.
#'
#' @param pop Population
#' @param n sample size
#' @param H Set size
#' @param tau controls the ranking quality
#' @param K Number of rankers
#' @inheritParams RSS
#' @param tau A parameter which controls ranking quality.
#'
#' @return
#' @keywords internal
#' @return A matrix with ranks from each ranker.
#'
JPSD2F <- function(pop, n, H, tau, K) {
#############################################
# Ths function generates JPS sample ##
#############################################
# K: Number of rankers
# tau: controls the ranking quality
# n:sample size
# H: Set szie
# pop: population
pop_size <- length(pop) # population size
nsets <- matrix(sample(pop, n * H), ncol = H, nrow = n)
#################################################
# below consruct rank for each SRS unit post experimentally
JPS <- matrix(0, ncol = K + 1, nrow = n) # store JPS sample
##############################################
JPSD2F <- function(pop, n, H, tau, K, with_replacement = FALSE) {
verify_jps_params(pop, n, H, tau, K, with_replacement)

sampling_matrix <- matrix(sample(pop, n * H, replace = with_replacement), ncol = H, nrow = n)

# rank each SRS unit post experimentally
jps_matrix <- matrix(0, ncol = K + 1, nrow = n)
for (i in (1:n)) {
Set <- nsets[i, ] # select compariosn set i
tem <- rep(0, K) # initialize to store ranks of he rankers for ocm,parion set i
comparison_set <- sampling_matrix[i, ]
ranks <- rep(0, K)
for (k in (1:K)) {
DCSet <- Set + tau[k] * rnorm(H, 0, 1) # adjust ranking quality using Dell-Clutter
# model
RankSet <- rank(DCSet) # ranks the units in the comparion set i by ranker k
tem[k] <- RankSet[1] # the rank of the i-th mesured unit by ranker k
# adjust for ranking, Dell and Clutter
adjusted_set <- comparison_set + tau[k] * rnorm(H, 0, 1)
ranks[k] <- rank(adjusted_set)[1]
}
JPS[i, ] <- c(Set[1], tem) # meaured value of unit i and ranks by k rankers
jps_matrix[i, ] <- c(comparison_set[1], ranks)
}
colnames(JPS) <- c("Y", paste("R", 1:K, sep = ""))
return(JPS)

colnames(jps_matrix) <- c("Y", paste0("R", 1:K))
return(jps_matrix)
}
122 changes: 71 additions & 51 deletions R/JPSEDF.R
Original file line number Diff line number Diff line change
@@ -1,82 +1,102 @@
#' Computes the estimator and variance for each individual ranker
#'
#' @param RV Rank values for Y.
#' @param ranks Ranks of Y.
#' @param Y Response measurements.
#' @param set_size Set Size.
#' @param set_size Set size for each raking group.
#' @param N Finite population size.
#' @param coef Coefficients used in variance computation when sample size is n.
#' @param coef_del Coefficients used in variance computation when the i-th unit is deleted.
#' @param replace Logical. Sample with replacement?
#' @param model If model is 0, it's design based inference, if model = 1, it is model based inference using super population model.
#' @param K
#' @param model An inference mode:
#' - `0`: design based inference
#' - `1`: model based inference using super population model
#' @param K Number of rankers.
#'
#' @return
#' @keywords internal
#'
JPSEDF <- function(RV, Y, set_size, N, coef, coef_del, replace, model, K) {
n <- length(Y)
M.est <- mean(aggregate(Y, data.frame(RV), mean)$x) # JPS estimate
Y.ij <- expand.grid(Y, Y)
JPSEDF <- function(ranks, Y, set_size, N, coef, coef_del, replace, model, K) {
y_ij <- expand.grid(Y, Y)

# count ranks including zeros
rank_count <- rep(0, set_size)
agg_rank <- aggregate(ranks, data.frame(ranks), length)
rank_count[agg_rank$ranks] <- agg_rank$x

n_non_empty_ranks <- dim(agg_rank)[[1]]
n_two_plus_ranks <- length(rank_count[rank_count > 1])

rank_ij <- expand.grid(ranks, ranks)
y_ij_diff2 <- (y_ij[, 1] - y_ij[, 2])^2

# group squared differences based on judgement classes
agg_ss <- aggregate(y_ij_diff2, rank_ij, sum)
paired_count <- cbind(rank_count[agg_ss[, 1]], rank_count[agg_ss[, 2]])
count_ii <- paired_count[agg_ss[, 1] == agg_ss[, 2], 1]
count_non_ii <- paired_count[agg_ss[, 1] != agg_ss[, 2], ]

# nh(nh - 1)
nh_nh1 <- count_ii * (count_ii - 1)
agg_ss_ii <- agg_ss[agg_ss[, 1] == agg_ss[, 2], 3]
# sum_h sum_i sum_j (Y_i=Y_j)^2I(R_i=set_size)I(R_j=set_size')/(nh(nh-1))
tt2 <- sum(agg_ss_ii[nh_nh1 > 0] / nh_nh1[nh_nh1 > 0])
if (dim(count_non_ii)[1] != 0) {
# sum_h sum_h'sum_i sum_j ( Y_i=Y_j)^2I(R_i=set_size)I(R_j=set_size')/(nh nh'))
tt1 <- sum(agg_ss[agg_ss[, 1] != agg_ss[, 2], 3] / (count_non_ii[, 1] * (count_non_ii[, 2])))
} else {
tt1 <- 0
}

GSV <- rep(0, set_size) # Group sample size vector
TemSS <- aggregate(RV, data.frame(RV), length)
GSV[TemSS[, 1]] <- TemSS$x # Judgment group sample sizes. Some would be zero.
dn <- length(GSV[GSV > 0]) # the nonempty judgment groups
dn.star <- length(GSV[GSV > 1]) # the number of judgment groups having at least two observations
R.hhp <- expand.grid(RV, RV)
Y.ij2 <- (Y.ij[, 1] - Y.ij[, 2])^2 # squared differences of (Yi-Yj)^2
G.sum <- aggregate(Y.ij2, R.hhp, sum) # group squared differences based on judgment classes
GS.size <- cbind(GSV[G.sum[, 1]], GSV[G.sum[, 2]]) # attach judmgent group sample sizes
# to each group
denT2 <- GS.size[G.sum[, 1] == G.sum[, 2], 1] # determine nh from the judgment group set_size
denT1 <- GS.size[G.sum[, 1] != G.sum[, 2], ] # determine sample sizes n_h and n_h'for judgment groups set_size and set_size'
den2 <- denT2 * (denT2 - 1) # determine the denumerator nh(nh-1) for T2
numT2 <- G.sum[G.sum[, 1] == G.sum[, 2], 3] # sum_h sum_i sum_j ( Y_i=Y_j)^2I(R_i=set_size)I(R_j=set_size)
TT2 <- sum(numT2[den2 > 0] / den2[den2 > 0]) # sum_h sum_i sum_j ( Y_i=Y_j)^2I(R_i=set_size)I(R_j=set_size')/(nh(nh-1))
# Line below #sum_h sum_h'sum_i sum_j ( Y_i=Y_j)^2I(R_i=set_size)I(R_j=set_size')/(nh nh'))
if (dim(denT1)[1] != 0) TT1 <- sum(G.sum[G.sum[, 1] != G.sum[, 2], 3] / (denT1[, 1] * (denT1[, 2]))) else TT1 <- 0
############################################################################
# Variance estimate with full data
M.Est <- mean(aggregate(Y, list(RV), mean)$x) # JPS estiamte with full data
T2s <- set_size * TT2 / (2 * dn.star)
T1s <- TT1 / (2 * coef[1] * dn^2)
estimated_mean <- mean(aggregate(Y, list(ranks), mean)$x)
t2s <- set_size * tt2 / (2 * n_two_plus_ranks)
t1s <- tt1 / (2 * coef[1] * n_non_empty_ranks^2)
# VestD0=coef[2]*T1s/(set_size-1)+coef[3]*T2s
if (!replace) {
VEST <- coef[2] * T2s + coef[3] * (N - 1) * (T1s + T2s) / (N * (set_size - 1))
if (VEST <= 0) VEST <- coef[2] * T2s / 2
estimated_variance <- coef[2] * t2s + coef[3] * (N - 1) * (t1s + t2s) / (N * (set_size - 1))
if (estimated_variance <= 0) {
estimated_variance <- coef[2] * t2s / 2
}
} else {
VEST <- coef[2] * T1s / (set_size - 1) + coef[3] * T2s
estimated_variance <- coef[2] * t1s / (set_size - 1) + coef[3] * t2s
}

if (model == 1) {
VEST <- (T1s + T2s) / set_size^2 * ((-1 / N) + coef[2] * set_size^2 / (set_size - 1)) + T2s * ((coef[3] + coef[2]) - coef[2] * set_size / (set_size - 1))
if (VEST <= 0) VEST <- T2s * ((coef[3] + coef[2]) - coef[2] * set_size / (set_size - 1))
estimated_variance <- ((t1s + t2s) / set_size^2 * ((-1 / N) + coef[2] * set_size^2 / (set_size - 1))
+ t2s * ((coef[3] + coef[2]) - coef[2] * set_size / (set_size - 1)))

if (estimated_variance <= 0) {
estimated_variance <- t2s * ((coef[3] + coef[2]) - coef[2] * set_size / (set_size - 1))
}
}
if (K == 1) {
ret <- c(M.Est, VEST)
return(ret)
return(c(estimated_mean, estimated_variance))
}


################################################################


################################################################
######### This part is new for Jackknife replication, delete one observations
######## reduces the computation time
ID <- 1:n # index to determine which observation is to be deleted
Ind.ij <- expand.grid(ID, ID) # Index of observations to be deleted
Y.ij2N <- Y.ij2[Ind.ij[, 1] - Ind.ij[, 2] != 0] # Remove Y_i=Y_ii
R.hhpN <- R.hhp[Ind.ij[, 1] - Ind.ij[, 2] != 0, ] # Remove R_i=R_i
Ind.ij <- Ind.ij[Ind.ij[, 1] != Ind.ij[, 2], ] # Remove i=i
n <- length(Y)
# index to determine which observation is to be deleted
ID <- 1:n
# Index of observations to be deleted
Ind.ij <- expand.grid(ID, ID)
# Remove Y_i=Y_ii
Y.ij2N <- y_ij_diff2[Ind.ij[, 1] - Ind.ij[, 2] != 0]
# Remove R_i=R_i
R.hhpN <- rank_ij[Ind.ij[, 1] - Ind.ij[, 2] != 0, ]
# Remove i=i
Ind.ij <- Ind.ij[Ind.ij[, 1] != Ind.ij[, 2], ]
# deltM=matrix(0,ncol=6,nrow=n) # This stores the contribtuion of each
# deleted observation to
# T1 and T2

INDM <- matrix(1:n, ncol = 1) # This is used in apply function below
PASS <- list(Y, RV, Ind.ij, GSV, Y.ij2N, R.hhpN, G.sum, TT2, TT1) # compile additional variables in list
DeltM <- t(apply(INDM, 1, DELETi, PASS = PASS)) # This computes TT2, TT1, dn, dn-star
# for each deleted unit "i".
# deltM=DeltM
# This is used in apply function below
INDM <- matrix(1:n, ncol = 1)
# compile additional variables in list
PASS <- list(Y, ranks, Ind.ij, rank_count, Y.ij2N, R.hhpN, agg_ss, tt2, tt1)
# This computes TT2, TT1, dn, dn-star for each deleted unit "i".deltM=DeltM
DeltM <- t(apply(INDM, 1, DELETi, PASS = PASS))
##################################################
T2v.Del <- set_size * DeltM[, 4] / (2 * DeltM[, 3]) # T2 when we delete the i-th unit
# deltM[,4]: TT2i, deltM[,3]= dn_str, the number
Expand All @@ -103,7 +123,7 @@ JPSEDF <- function(RV, Y, set_size, N, coef, coef_del, replace, model, K) {
VEST.Del[VEST.Del <= 0] <- T2v.Del[VEST.Del <= 0] * ((coef_del[3] + coef_del[2]) - coef_del[2] * set_size / (set_size - 1))
}

Est <- c(M.Est, VEST) # M.Est: JPS estimate with sample size n
Est <- c(estimated_mean, estimated_variance) # M.Est: JPS estimate with sample size n
# VEST: varaince estimate of JPS estimator with sample size n
Est.Del <- VEST.Del # n-dimentional vector containing
# varince estimate of JPS estimator for each deleted observation
Expand Down
Loading

0 comments on commit 3965b63

Please sign in to comment.