Merge pull request #139 from katilingban/dev

finalise results outputs
katilingban · Dec 17, 2022 · 83a2ec9 · 83a2ec9
2 parents 33ec3ce + c93f6f8
commit 83a2ec9
Show file tree

Hide file tree

Showing 24 changed files with 2,430 additions and 121 deletions.
diff --git a/.gitignore b/.gitignore
@@ -16,6 +16,8 @@ outputs
 !outputs/.gitkeep
 !outputs/data_processing_report.html
 !outputs/s3m_recoded_data.csv
-!outputs.s3m_recoded_data.xlsx
+!outputs/s3m_recoded_data.xlsx
+!outputs/choropleth/.gitkeep
+!outputs/interpolation/.gitkeep
 _targets
 Rplots.pdf
diff --git a/R/bootstrap_indicators.R b/R/bootstrap_indicators.R
@@ -25,9 +25,11 @@
 ################################################################################
 
 boot_estimate <- function(.data,
-                          w, 
+                          w,
+                          statistic = bbw::bootClassic,
                           vars,
                           labs,
+                          indicator_set,
                           replicates = 399) {
   currentDF <- .data[c("spid", "ea_code", "district", vars)] |>
     (\(x) x[!is.na(x$spid) & x$spid != 0, ])()
@@ -47,7 +49,7 @@ boot_estimate <- function(.data,
   temp <- bbw::bootBW(
     x = currentDF, 
     w = w,
-    statistic = bbw::bootClassic,
+    statistic = statistic,
     params = params,
     outputColumns = outputColumns,
     replicates = replicates
@@ -60,35 +62,308 @@ boot_estimate <- function(.data,
     probs = c(0.5, 0.025, 0.975), na.rm = TRUE
   )
 
+  se <- apply(
+    X = temp,
+    MARGIN = 2,
+    FUN = sd
+  )
+
   est <- t(est)
 
-  est <- data.frame(unique(currentDF$district), labs, est)
+  est <- data.frame(
+    unique(currentDF$district), indicator_set, vars, labs, est, se
+  )
 
   row.names(est) <- 1:nrow(est)
 
-  names(est) <- c("district", "indicators", "estimate", "lcl", "ucl")
+  names(est) <- c(
+    "district", "indicator_set", "indicator_variable", 
+    "indicator", "estimate", "lcl", "ucl", "sd"
+  )
 
   est
 }
 
+## Bootstrap estimation by district --------------------------------------------
 
 boot_estimates <- function(.data, 
-                           w, vars, labs, 
+                           w,
+                           statistic = bbw::bootClassic,
+                           vars, labs, indicator_set,
                            replicates = 399) {
-  .data  <- split(x = .data, f = .data$district)
-  w      <- rep(list(w), length(.data)) 
-  vars   <- rep(list(vars), length(.data))
-  labs   <- rep(list(labs), length(.data))
+  .data         <- split(x = .data, f = .data$district)
+  w             <- rep(list(w), length(.data)) 
+  statistic     <- rep(list(statistic), length(.data))
+  vars          <- rep(list(vars), length(.data))
+  labs          <- rep(list(labs), length(.data))
+  indicator_set <- rep(list(indicator_set), length(.data))
 
   parallel::mcMap(
     f = boot_estimate,
     .data = .data,
     w = w,
+    statistic = statistic,
     vars = vars,
     labs = labs,
+    indicator_set = indicator_set,
     replicates = replicates,
     mc.cores = 4
   ) |>
-    (\(x) do.call(rbind, x))()
+    (\(x) do.call(rbind, x))() |>
+    (\(x) { row.names(x) <- 1:nrow(x); x })()
+}
+
+## Specific bootstrap functions for household indicators groups/sets -----------
+
+boot_estimates_household <- function(.data, w, 
+                                     replicates = 399, 
+                                     indicator_list) {
+  boot_estimates(
+    .data = .data,
+    w = w,
+    vars = indicator_list[["indicator_variable"]],
+    labs = indicator_list[["indicator"]],
+
+    indicator_set = indicator_list[["indicator_set"]],
+    replicates = replicates
+  )
+}
+
+## Specific bootstrap functions for carer indicators groups/sets ---------------
+
+boot_estimates_carer <- function(.data, w, 
+                                 replicates = 399, indicator_list) {
+  boot_estimates(
+    .data = .data,
+    w = w,
+    vars = indicator_list[["indicator_variable"]],
+    labs = indicator_list[["indicator"]],
+    indicator_set = indicator_list[["indicator_set"]],
+    replicates = replicates
+  )
 }
 
+## Specific bootstrap functions for woman indicators groups/sets ---------------
+
+boot_estimates_woman <- function(.data, w, 
+                                 replicates = 399, indicator_list) {
+  boot_estimates(
+    .data = .data,
+    w = w,
+    vars = indicator_list[["indicator_variable"]],
+    labs = indicator_list[["indicator"]],
+    indicator_set = indicator_list[["indicator_set"]],
+    replicates = replicates
+  )
+}
+
+## Specific bootstrap functions for child indicators groups/sets ---------------
+
+boot_estimates_child <- function(.data, w, 
+                                 replicates = 399, indicator_list) {
+  boot_estimates(
+    .data = .data,
+    w = w,
+    vars = indicator_list[["indicator_variable"]],
+    labs = indicator_list[["indicator"]],
+    indicator_set = indicator_list[["indicator_set"]],
+    replicates = replicates
+  )
+}
+
+## Bootstrap probit a single district ------------------------------------------
+
+boot_probit <- function(.data,
+                        w,
+                        vars,
+                        labs,
+                        indicator_set,
+                        THRESHOLD,
+                        replicates = 399) {
+  currentDF <- .data[c("spid", "ea_code", "district", vars[1:2])] |>
+    (\(x) x[!is.na(x$spid) & x$spid != 0, ])()
+
+  ##
+  params <- vars[1:2]
+
+  ## Rename "eid" to psu
+  colnames(currentDF)[1] <- "psu"
+
+  w <- w[w$psu %in% currentDF$psu, ]
+
+  ##
+  outputColumns <- params
+
+  bootPROBIT <- function(x, params, threshold = THRESHOLD) {
+    d <- x[[params[1]]]
+    m <- median(d, na.rm = TRUE)
+    s <- IQR(d, na.rm = TRUE) / 1.34898
+    x <- pnorm(q = threshold, mean = m, sd = s)
+    return(x)
+  }
+
+  ##
+  temp <- bbw::bootBW(
+    x = currentDF, 
+    w = w,
+    statistic = bootPROBIT,
+    params = params,
+    outputColumns = outputColumns,
+    replicates = replicates
+  )
+
+  temp <- data.frame(
+    global = temp[[1]],
+    moderate = temp[[1]] - temp[[2]],
+    severe = temp[[2]]
+  )
+
+  est <- apply(
+    X = temp, 
+    MARGIN = 2, 
+    FUN = quantile,
+    probs = c(0.5, 0.025, 0.975), na.rm = TRUE
+  )
+
+  se <- apply(
+    X = temp,
+    MARGIN = 2,
+    FUN = sd
+  )
+
+  est <- t(est)
+
+  vars <- ifelse(
+    unique(vars) == "hfaz", 
+    c("global_stunting", "moderate_stunting", "severe_stunting"),
+    ifelse(
+      unique(vars) == "wfaz",
+      c("global_underweight", "moderate_underweight", "severe_underweight"),
+      ifelse(
+        unique(vars) == "wfhz",
+        c("global_wasting_whz", "moderate_wasting_whz", "severe_wasting_whz"),
+        c("global_wasting_muac", "moderate_wasting_muac", "severe_wasting_muac")
+      )
+    )
+  )
+
+  est <- data.frame(
+    unique(currentDF$district), indicator_set, vars, labs, est, se
+  )
+
+  row.names(est) <- 1:nrow(est)
+
+  names(est) <- c(
+    "district", "indicator_set", "indicator_variable", 
+    "indicator", "estimate", "lcl", "ucl", "sd"
+  )
+
+  est
+}
+
+## Bootstrap probit all districts ----------------------------------------------
+
+boot_probits_anthro <- function(.data, 
+                                w,
+                                vars,
+                                labs,
+                                indicator_set,
+                                THRESHOLD,
+                                replicates = 399) {
+  # vars <- c("hfaz", "hfaz", "wfaz", "wfaz", "wfhz", "wfhz", "cmuac", "cmuac")
+  # labs <- c(
+  #   "Global stunting", "Severe stunting",
+  #   "Global underweight", "Severe underweight",
+  #   "Global wasting by weight-for-height z-score",
+  #   "Severe wasting by weight-for-height z-score",
+  #   "Global wasting by MUAC", "Severe wasting by MUAC"
+  # )
+  # indicator_set <- rep("Child nutritional status", 8)
+
+  .data         <- split(x = .data, f = .data$district)
+  w             <- rep(list(w), length(.data)) 
+  vars          <- rep(list(vars), length(.data))
+  labs          <- rep(list(labs), length(.data))
+  indicator_set <- rep(list(indicator_set), length(.data))
+  THRESHOLD     <- rep(list(THRESHOLD), length(.data))
+
+  parallel::mcMap(
+    f = boot_probit,
+    .data = .data,
+    w = w,
+    vars = vars,
+    labs = labs,
+    indicator_set = indicator_set,
+    THRESHOLD = THRESHOLD,
+    replicates = replicates,
+    mc.cores = 4
+  ) |>
+    (\(x) do.call(rbind, x))() |>
+    (\(x) { row.names(x) <- 1:nrow(x); x })()
+}
+
+
+boot_probits_stunting <- function(.data, w, replicates = 399) {
+  boot_probits_anthro(
+    .data = .data,
+    w = w,
+    vars = rep("hfaz", 3),
+    labs = c(
+      "Proportion of children 6-59 months old with height-for-age z-score less than -2",
+      "Proportion of children 6-59 momths old with height-for-age z-score less than -2 and greater than or equal to -3",
+      "Proportion of children 6-59 months old with height-for-age z-score less than -3"
+    ),
+    indicator_set = rep("Child nutritional status", 3),
+    THRESHOLD = c(-2, -3),
+    replicates = replicates
+  )
+}
+
+
+boot_probits_underweight <- function(.data, w, replicates = 399) {
+  boot_probits_anthro(
+    .data = .data,
+    w = w,
+    vars = rep("wfaz", 3),
+    labs = c(
+      "Proportion of children 6-59 months old with weight-for-age z-score less than -2",
+      "Proportion of children 6-59 momths old with weight-for-age z-score less than -2 and greater than or equal to -3",
+      "Proportion of children 6-59 months old with weight-for-age z-score less than -3"
+    ),
+    indicator_set = rep("Child nutritional status", 3),
+    THRESHOLD = c(-2, -3),
+    replicates = replicates
+  )
+}
+
+boot_probits_whz <- function(.data, w, replicates = 399) {
+  boot_probits_anthro(
+    .data = .data,
+    w = w,
+    vars = rep("wfhz", 3),
+    labs = c(
+      "Proportion of children 6-59 months old with weight-for-height z-score less than -2",
+      "Proportion of children 6-59 momths old with weight-for-height z-score less than -2 and greater than or equal to -3",
+      "Proportion of children 6-59 months old with weight-for-height z-score less than -3"
+    ),
+    indicator_set = rep("Child nutritional status", 3),
+    THRESHOLD = c(-2, -3),
+    replicates = replicates
+  )
+}
+
+boot_probits_muac <- function(.data, w, replicates = 399) {
+  boot_probits_anthro(
+    .data = .data,
+    w = w,
+    vars = rep("cmuac", 3),
+    labs = c(
+      "Proportion of children 6-59 months old with mid-upper arm circumference less than 12.5 cms",
+      "Proportion of children 6-59 momths old with mid-upper arm circumference less than 12.5 cms and greater than or equal to 11.5 cms",
+      "Proportion of children 6-59 months old with mid-upper arm circumference less than 11.5 cms"
+    ),
+    indicator_set = rep("Child nutritional status", 3),
+    THRESHOLD = c(12.5, 11.5),
+    replicates = replicates
+  )
+}
diff --git a/R/calculate_province_estimates.R b/R/calculate_province_estimates.R
@@ -0,0 +1,43 @@
+################################################################################
+#
+#'
+#' Calculate province estimates
+#'
+#
+################################################################################
+
+calculate_province_estimate <- function(df, pop) {
+  merge(df, pop) |>
+    dplyr::summarise(
+      indicator_set = unique(indicator_set),
+      indicator_variable = unique(indicator_variable),
+      indicator = unique(indicator),
+      estimate = sum(estimate * population, na.rm = TRUE) / sum(population),
+      se = sqrt(sum(sd ^ 2 * population / sum(population, na.rm = TRUE), na.rm = TRUE)),
+      lcl = estimate - 1.96 * se,
+      ucl = estimate + 1.96 * se
+    ) |>
+    dplyr::relocate(se, .after = ucl)
+}
+
+## Calculate province estimates for all indicators -----------------------------
+
+calculate_province_estimates <- function(results_table, pop) {
+  df_list <- split(
+    x = results_table,
+    f = factor(
+      x = results_table[["indicator"]],
+      levels = unique(results_table[["indicator"]])
+    )
+  )
+
+  pop <- rep(list(pop), length(df_list))
+
+  Map(
+    f = calculate_province_estimate,
+    df = df_list,
+    pop = pop
+  ) |>
+    dplyr::bind_rows()
+}
+