From 0d3c08d5ec2d49cca3dc6604bbd08ac22319be49 Mon Sep 17 00:00:00 2001 From: Alexander Rossell Hayes Date: Mon, 1 Apr 2024 22:48:45 -0700 Subject: [PATCH] test(census_geo_api): add test that age/sex subsets sum to race totals --- tests/testthat/test-census_geo_api.R | 49 +++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-census_geo_api.R b/tests/testthat/test-census_geo_api.R index d4d3534..8588bf6 100644 --- a/tests/testthat/test-census_geo_api.R +++ b/tests/testthat/test-census_geo_api.R @@ -2,7 +2,6 @@ skip_if_not(nzchar(Sys.getenv("CENSUS_API_KEY"))) test_that("snapshot", { # TODO: Test that sub-geographies sum to match pooled geographies (e.g. blocks sum to block groups, sum to tracts, sum to counties) - # TODO: Test that age/sex subsets sum to race totals # These snapshots were generated using the calculations in v2.0.0 # and verified that the calculations resulted in the same numbers for PR #120. @@ -23,3 +22,51 @@ test_that("snapshot", { style = "deparse" ) }) + +expect_subset_sums_equal_overall_total <- function(data) { + `%>%` <- dplyr::`%>%` + + sums <- data %>% + dplyr::select(-dplyr::starts_with("r_")) %>% + tidyr::pivot_longer(dplyr::starts_with("P")) %>% + dplyr::mutate( + subset = dplyr::case_when( + grepl("001", name) ~ "overall", + .default = "subset" + ), + name = sub("_.+", "", name) + ) %>% + dplyr::summarise( + value = sum(value), + .by = -value + ) %>% + dplyr::summarize( + are_equal = length(unique(value)) <= 1, + .by = c(-subset, -value) + ) + + expect_true(all(sums$are_equal)) +} + +test_that("sums", { + expect_subset_sums_equal_overall_total( + census_geo_api(state = "DE", geo = "county", year = "2020", sex = TRUE) + ) + expect_subset_sums_equal_overall_total( + census_geo_api(state = "DE", geo = "county", year = "2020", age = TRUE) + ) + expect_subset_sums_equal_overall_total( + census_geo_api(state = "DE", geo = "county", year = "2020", age = TRUE, sex = TRUE) + ) + expect_subset_sums_equal_overall_total( + census_geo_api(state = "DE", geo = "county", year = "2010", sex = TRUE) + ) + expect_subset_sums_equal_overall_total( + census_geo_api(state = "DE", geo = "county", year = "2010", age = TRUE) + ) + expect_subset_sums_equal_overall_total( + census_geo_api(state = "DE", geo = "county", year = "2010", age = TRUE, sex = TRUE) + ) +}) + +# TODO: Test that all variables sum to total population of geography