Skip to content

Commit

Permalink
Merge pull request #6 from openwashdata/dictionary
Browse files Browse the repository at this point in the history
Revise categorical variables
  • Loading branch information
larnsce authored Oct 23, 2024
2 parents 04663ae + 28e6ee6 commit 765f2d4
Show file tree
Hide file tree
Showing 8 changed files with 192 additions and 168 deletions.
28 changes: 26 additions & 2 deletions data-raw/data_processing.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@ households <- household_data |>
package_type_preference),
as.factor)) |>
dplyr::mutate(dplyr::across(starts_with("business_"), as.factor)) |>
dplyr::mutate(business_water_use = as.logical(business_water_use))
dplyr::mutate(business_water_use = as.logical(business_water_use)) |>
# neither the household nor respondent has a business
dplyr::mutate(business_ownership = case_when(business_ownership == "no" ~ NA)) |>
dplyr::mutate(time_of_last_struggle_to_find_water =
factor(time_of_last_struggle_to_find_water,
levels = c("over_year_ago", "last_year",
"last_30_days", "last_7_days", "last_3_days")))



Expand Down Expand Up @@ -79,12 +85,30 @@ waterpoint_data <- waterpoint_data |>

# Modify variable types
waterpoints <- waterpoint_data |>
dplyr::mutate(coli_mpn_health_risk = case_match(coli_mpn_health_risk,
"probably_safe" ~ "possibly_safe",
"probably_unsafe" ~ "possibly_unsafe")) |>
dplyr::mutate(tc_mpn_health_risk = case_match(tc_mpn_health_risk,
"probably_safe" ~ "possibly_safe",
"probably_unsafe" ~ "possibly_unsafe")) |>
dplyr::mutate(across(c(community, type, available_services,
location, owner, constructor, managers,
respondent_would_use_to_prepare_rice, perception_of_quality,
tap_closure_changes, CBT_sample_source, coli_mpn_health_risk,
tc_mpn_health_risk),
as.factor))
as.factor)) |>
# reorder nominal categorical variables
dplyr::mutate(perception_of_quality = factor(perception_of_quality, levels = c("low", "acceptable", "high"))) |>
## The classifications were created by the company that made the test.
## company's instructions for use (https://assets.ctfassets.net/vcps67yikf8u/5IbwfssqfSWqCo0U88GCAw/4ef1a9606f22cba7d79705ba3d096956/CBT_Instructions_EN.pdf)

dplyr::mutate(coli_mpn_health_risk = factor(coli_mpn_health_risk,
levels = c("safe", "possibly_safe", "possibly_unsafe", "unsafe"))) |>
dplyr::mutate(tc_mpn_health_risk = factor(tc_mpn_health_risk,
levels = c("safe", "possibly_safe", "possibly_unsafe", "unsafe")))



# Export Data ------------------------------------------------------------------
usethis::use_data(households, overwrite = TRUE)
usethis::use_data(waterpoints, overwrite = TRUE)
Expand Down
4 changes: 2 additions & 2 deletions data-raw/dictionary.csv
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ data,waterpoints.rda,price_increase,logical,price of any volume of water has inc
data,waterpoints.rda,CBT_sample_source,factor,"source of sample for compartment bag test (CBT) supplied by Aquagenx (https://www.aquagenx.com/cbt-ectc/), options including [1] indirect_from_tap_(traveled_through_hose), [2] other_storage_(traveled_through_hose_or_poured_through_container), [3] storage_tank, and [4] tap."
data,waterpoints.rda,coli_mpn,double,results of E. Coli most probable number (MPN) test per 100 mL sample
data,waterpoints.rda,coli_mpn_ci,double,results of E. Coli most probable number (MPN) test per 100 mL sample - upper 95% confidence interval (CI)
data,waterpoints.rda,coli_mpn_health_risk,factor,"results of E. Coli most probable number (MPN) test per 100 mL sample - descriptive health risk, options including [1]possibly_safe, [2]possibly_unsafe, [3]probably_saf, [4]probably_unsafe, [5] safe, [6]unsafe."
data,waterpoints.rda,coli_mpn_health_risk,factor,"results of E. Coli most probable number (MPN) test per 100 mL sample - descriptive health risk, options including options including [1]safe, [2]possibly_safe, [3] possibly_unsafe and [4]unsafe."
data,waterpoints.rda,tc_mpn,double,results of Total Coliforms (TC) most probable number (MPN) test per 100 mL sample
data,waterpoints.rda,tc_mpn_ci,double,results of Total Coliforms (TC) most probable number (MPN) test per 100 mL sample - upper 95% confidence interval (CI)
data,waterpoints.rda,tc_mpn_health_risk,factor,"results of Total Coliforms (TC) most probable number (MPN) test per 100 mL sample - descriptive health risk, options including [1]unsafe, [2]possibly_unsafe, and [3]probably_unsafe."
data,waterpoints.rda,tc_mpn_health_risk,factor,"results of Total Coliforms (TC) most probable number (MPN) test per 100 mL sample - descriptive health risk, options including [1]safe, [2]possibly_safe, [3] possibly_unsafe and [4]unsafe."
Binary file modified data/households.rda
Binary file not shown.
Binary file modified data/waterpoints.rda
Binary file not shown.
Loading

0 comments on commit 765f2d4

Please sign in to comment.