Skip to content

Commit

Permalink
Merge pull request #139 from katilingban/dev
Browse files Browse the repository at this point in the history
finalise results outputs
  • Loading branch information
ernestguevarra authored Dec 17, 2022
2 parents 33ec3ce + c93f6f8 commit 83a2ec9
Show file tree
Hide file tree
Showing 24 changed files with 2,430 additions and 121 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ outputs
!outputs/.gitkeep
!outputs/data_processing_report.html
!outputs/s3m_recoded_data.csv
!outputs.s3m_recoded_data.xlsx
!outputs/s3m_recoded_data.xlsx
!outputs/choropleth/.gitkeep
!outputs/interpolation/.gitkeep
_targets
Rplots.pdf
295 changes: 285 additions & 10 deletions R/bootstrap_indicators.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@
################################################################################

boot_estimate <- function(.data,
w,
w,
statistic = bbw::bootClassic,
vars,
labs,
indicator_set,
replicates = 399) {
currentDF <- .data[c("spid", "ea_code", "district", vars)] |>
(\(x) x[!is.na(x$spid) & x$spid != 0, ])()
Expand All @@ -47,7 +49,7 @@ boot_estimate <- function(.data,
temp <- bbw::bootBW(
x = currentDF,
w = w,
statistic = bbw::bootClassic,
statistic = statistic,
params = params,
outputColumns = outputColumns,
replicates = replicates
Expand All @@ -60,35 +62,308 @@ boot_estimate <- function(.data,
probs = c(0.5, 0.025, 0.975), na.rm = TRUE
)

se <- apply(
X = temp,
MARGIN = 2,
FUN = sd
)

est <- t(est)

est <- data.frame(unique(currentDF$district), labs, est)
est <- data.frame(
unique(currentDF$district), indicator_set, vars, labs, est, se
)

row.names(est) <- 1:nrow(est)

names(est) <- c("district", "indicators", "estimate", "lcl", "ucl")
names(est) <- c(
"district", "indicator_set", "indicator_variable",
"indicator", "estimate", "lcl", "ucl", "sd"
)

est
}

## Bootstrap estimation by district --------------------------------------------

boot_estimates <- function(.data,
w, vars, labs,
w,
statistic = bbw::bootClassic,
vars, labs, indicator_set,
replicates = 399) {
.data <- split(x = .data, f = .data$district)
w <- rep(list(w), length(.data))
vars <- rep(list(vars), length(.data))
labs <- rep(list(labs), length(.data))
.data <- split(x = .data, f = .data$district)
w <- rep(list(w), length(.data))
statistic <- rep(list(statistic), length(.data))
vars <- rep(list(vars), length(.data))
labs <- rep(list(labs), length(.data))
indicator_set <- rep(list(indicator_set), length(.data))

parallel::mcMap(
f = boot_estimate,
.data = .data,
w = w,
statistic = statistic,
vars = vars,
labs = labs,
indicator_set = indicator_set,
replicates = replicates,
mc.cores = 4
) |>
(\(x) do.call(rbind, x))()
(\(x) do.call(rbind, x))() |>
(\(x) { row.names(x) <- 1:nrow(x); x })()
}

## Specific bootstrap functions for household indicators groups/sets -----------

boot_estimates_household <- function(.data, w,
replicates = 399,
indicator_list) {
boot_estimates(
.data = .data,
w = w,
vars = indicator_list[["indicator_variable"]],
labs = indicator_list[["indicator"]],

indicator_set = indicator_list[["indicator_set"]],
replicates = replicates
)
}

## Specific bootstrap functions for carer indicators groups/sets ---------------

boot_estimates_carer <- function(.data, w,
replicates = 399, indicator_list) {
boot_estimates(
.data = .data,
w = w,
vars = indicator_list[["indicator_variable"]],
labs = indicator_list[["indicator"]],
indicator_set = indicator_list[["indicator_set"]],
replicates = replicates
)
}

## Specific bootstrap functions for woman indicators groups/sets ---------------

boot_estimates_woman <- function(.data, w,
replicates = 399, indicator_list) {
boot_estimates(
.data = .data,
w = w,
vars = indicator_list[["indicator_variable"]],
labs = indicator_list[["indicator"]],
indicator_set = indicator_list[["indicator_set"]],
replicates = replicates
)
}

## Specific bootstrap functions for child indicators groups/sets ---------------

boot_estimates_child <- function(.data, w,
replicates = 399, indicator_list) {
boot_estimates(
.data = .data,
w = w,
vars = indicator_list[["indicator_variable"]],
labs = indicator_list[["indicator"]],
indicator_set = indicator_list[["indicator_set"]],
replicates = replicates
)
}

## Bootstrap probit a single district ------------------------------------------

boot_probit <- function(.data,
w,
vars,
labs,
indicator_set,
THRESHOLD,
replicates = 399) {
currentDF <- .data[c("spid", "ea_code", "district", vars[1:2])] |>
(\(x) x[!is.na(x$spid) & x$spid != 0, ])()

##
params <- vars[1:2]

## Rename "eid" to psu
colnames(currentDF)[1] <- "psu"

w <- w[w$psu %in% currentDF$psu, ]

##
outputColumns <- params

bootPROBIT <- function(x, params, threshold = THRESHOLD) {
d <- x[[params[1]]]
m <- median(d, na.rm = TRUE)
s <- IQR(d, na.rm = TRUE) / 1.34898
x <- pnorm(q = threshold, mean = m, sd = s)
return(x)
}

##
temp <- bbw::bootBW(
x = currentDF,
w = w,
statistic = bootPROBIT,
params = params,
outputColumns = outputColumns,
replicates = replicates
)

temp <- data.frame(
global = temp[[1]],
moderate = temp[[1]] - temp[[2]],
severe = temp[[2]]
)

est <- apply(
X = temp,
MARGIN = 2,
FUN = quantile,
probs = c(0.5, 0.025, 0.975), na.rm = TRUE
)

se <- apply(
X = temp,
MARGIN = 2,
FUN = sd
)

est <- t(est)

vars <- ifelse(
unique(vars) == "hfaz",
c("global_stunting", "moderate_stunting", "severe_stunting"),
ifelse(
unique(vars) == "wfaz",
c("global_underweight", "moderate_underweight", "severe_underweight"),
ifelse(
unique(vars) == "wfhz",
c("global_wasting_whz", "moderate_wasting_whz", "severe_wasting_whz"),
c("global_wasting_muac", "moderate_wasting_muac", "severe_wasting_muac")
)
)
)

est <- data.frame(
unique(currentDF$district), indicator_set, vars, labs, est, se
)

row.names(est) <- 1:nrow(est)

names(est) <- c(
"district", "indicator_set", "indicator_variable",
"indicator", "estimate", "lcl", "ucl", "sd"
)

est
}

## Bootstrap probit all districts ----------------------------------------------

boot_probits_anthro <- function(.data,
w,
vars,
labs,
indicator_set,
THRESHOLD,
replicates = 399) {
# vars <- c("hfaz", "hfaz", "wfaz", "wfaz", "wfhz", "wfhz", "cmuac", "cmuac")
# labs <- c(
# "Global stunting", "Severe stunting",
# "Global underweight", "Severe underweight",
# "Global wasting by weight-for-height z-score",
# "Severe wasting by weight-for-height z-score",
# "Global wasting by MUAC", "Severe wasting by MUAC"
# )
# indicator_set <- rep("Child nutritional status", 8)

.data <- split(x = .data, f = .data$district)
w <- rep(list(w), length(.data))
vars <- rep(list(vars), length(.data))
labs <- rep(list(labs), length(.data))
indicator_set <- rep(list(indicator_set), length(.data))
THRESHOLD <- rep(list(THRESHOLD), length(.data))

parallel::mcMap(
f = boot_probit,
.data = .data,
w = w,
vars = vars,
labs = labs,
indicator_set = indicator_set,
THRESHOLD = THRESHOLD,
replicates = replicates,
mc.cores = 4
) |>
(\(x) do.call(rbind, x))() |>
(\(x) { row.names(x) <- 1:nrow(x); x })()
}


boot_probits_stunting <- function(.data, w, replicates = 399) {
boot_probits_anthro(
.data = .data,
w = w,
vars = rep("hfaz", 3),
labs = c(
"Proportion of children 6-59 months old with height-for-age z-score less than -2",
"Proportion of children 6-59 momths old with height-for-age z-score less than -2 and greater than or equal to -3",
"Proportion of children 6-59 months old with height-for-age z-score less than -3"
),
indicator_set = rep("Child nutritional status", 3),
THRESHOLD = c(-2, -3),
replicates = replicates
)
}


boot_probits_underweight <- function(.data, w, replicates = 399) {
boot_probits_anthro(
.data = .data,
w = w,
vars = rep("wfaz", 3),
labs = c(
"Proportion of children 6-59 months old with weight-for-age z-score less than -2",
"Proportion of children 6-59 momths old with weight-for-age z-score less than -2 and greater than or equal to -3",
"Proportion of children 6-59 months old with weight-for-age z-score less than -3"
),
indicator_set = rep("Child nutritional status", 3),
THRESHOLD = c(-2, -3),
replicates = replicates
)
}

boot_probits_whz <- function(.data, w, replicates = 399) {
boot_probits_anthro(
.data = .data,
w = w,
vars = rep("wfhz", 3),
labs = c(
"Proportion of children 6-59 months old with weight-for-height z-score less than -2",
"Proportion of children 6-59 momths old with weight-for-height z-score less than -2 and greater than or equal to -3",
"Proportion of children 6-59 months old with weight-for-height z-score less than -3"
),
indicator_set = rep("Child nutritional status", 3),
THRESHOLD = c(-2, -3),
replicates = replicates
)
}

boot_probits_muac <- function(.data, w, replicates = 399) {
boot_probits_anthro(
.data = .data,
w = w,
vars = rep("cmuac", 3),
labs = c(
"Proportion of children 6-59 months old with mid-upper arm circumference less than 12.5 cms",
"Proportion of children 6-59 momths old with mid-upper arm circumference less than 12.5 cms and greater than or equal to 11.5 cms",
"Proportion of children 6-59 months old with mid-upper arm circumference less than 11.5 cms"
),
indicator_set = rep("Child nutritional status", 3),
THRESHOLD = c(12.5, 11.5),
replicates = replicates
)
}
43 changes: 43 additions & 0 deletions R/calculate_province_estimates.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
################################################################################
#
#'
#' Calculate province estimates
#'
#
################################################################################

calculate_province_estimate <- function(df, pop) {
merge(df, pop) |>
dplyr::summarise(
indicator_set = unique(indicator_set),
indicator_variable = unique(indicator_variable),
indicator = unique(indicator),
estimate = sum(estimate * population, na.rm = TRUE) / sum(population),
se = sqrt(sum(sd ^ 2 * population / sum(population, na.rm = TRUE), na.rm = TRUE)),
lcl = estimate - 1.96 * se,
ucl = estimate + 1.96 * se
) |>
dplyr::relocate(se, .after = ucl)
}

## Calculate province estimates for all indicators -----------------------------

calculate_province_estimates <- function(results_table, pop) {
df_list <- split(
x = results_table,
f = factor(
x = results_table[["indicator"]],
levels = unique(results_table[["indicator"]])
)
)

pop <- rep(list(pop), length(df_list))

Map(
f = calculate_province_estimate,
df = df_list,
pop = pop
) |>
dplyr::bind_rows()
}

Loading

0 comments on commit 83a2ec9

Please sign in to comment.