Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added metadata to crowding, edited process_resp #91

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
012bd4f
added metadata to crowding, edited process_resp
realbp Mar 21, 2024
a9b7227
edited example in process resp
realbp Mar 25, 2024
36b3976
created process metadata file, currently full of bugs
realbp Mar 28, 2024
b50408b
fixed process metadata
realbp Mar 28, 2024
e59c717
add metadata to education
realbp Mar 28, 2024
cb8eb4d
Merge pull request #98 from getwilds/main
realbp Mar 29, 2024
a556f34
edited process metadata
realbp Apr 1, 2024
69a9ba1
created a parsing function
realbp Apr 3, 2024
f06abb2
edited parse metadata
realbp Apr 8, 2024
3913315
from 2024-04-08 meeting
seankross Apr 8, 2024
f8b9fe8
created get-metadata function
realbp Apr 9, 2024
ece6f11
completed custom print for metadata
realbp Apr 9, 2024
b97e835
Merge pull request #102 from getwilds/metadata-sk
realbp Apr 9, 2024
66b39c1
edited get metadata functions
realbp Apr 11, 2024
b633c7d
started url message
realbp Apr 12, 2024
52d8a1e
completed hyperlink function
realbp Apr 16, 2024
6ac68e0
started risk metadata
realbp Apr 17, 2024
d0187ed
removed new lines in metadata
realbp Apr 18, 2024
35e8f76
completed incd and mortality metadata
realbp Apr 18, 2024
03c527c
completed get-metadata-function
realbp Apr 19, 2024
65f70b3
Merge pull request #103 from getwilds/get-metadata-function
realbp Apr 19, 2024
86ebc9e
moved custom print functions and extract_values function to utils.R
realbp Apr 19, 2024
c5766d1
created pretty print functionality
realbp Apr 22, 2024
d8b576d
Merge pull request #108 from getwilds/get-metadata-function
realbp Apr 22, 2024
0d74325
Merge pull request #109 from getwilds/main
realbp Apr 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Imports:
magrittr,
rlang,
stringr,
tibble,
utils
Suggests:
knitr,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Generated by roxygen2: do not edit by hand

S3method(print,cancerprof_metadata)
S3method(print,cancerprof_tbl)
export("%>%")
export(demo_crowding)
export(demo_education)
Expand Down Expand Up @@ -38,5 +40,6 @@ importFrom(rlang,sym)
importFrom(stats,setNames)
importFrom(stringr,str_pad)
importFrom(stringr,str_trim)
importFrom(tibble,as_tibble)
importFrom(utils,data)
importFrom(utils,read.csv)
1 change: 1 addition & 0 deletions R/.Rapp.history
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
load("/Users/datascience/Documents/Fred-Hutch-Internship/cancerprof/R/sysdata.rda")
42 changes: 42 additions & 0 deletions R/browse-trend.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# https://statecancerprofiles.cancer.gov/historicaltrend/index.php?0&9953&999&7599&136&071&48&2&0&0&1&1&1&1#results
#
# req_url_query(https://statecancerprofiles.cancer.gov/historicaltrend/index.php?0&9953&999&7599&136&071&48&2&0&0&1&1&1&1&6) %>%
# req_perform()
#
# browseURL(paste0(https://statecancerprofiles.cancer.gov/historicaltrend/index.php?0&9953&999&7599&136&071&48&2&0&0&1&1&1&1#results))
#
#
#
# req <- "https://statecancerprofiles.cancer.gov/historicaltrend/index.php"
#
# api_arguments <- "0&9953&999&7599&136&071&48&2&0&0&1&1&1&1&6"
#
# resp <- req %>%
# req_url_query(
# stateFIPS = fips_scp(area),
# areatype = tolower(areatype),
# topic = "crowd",
# demo = handle_crowding(crowding),
# race = handle_race(race),
# type = "manyareacensus",
# sortVariableName = "value",
# sortOrder = "default",
# output = 1
# ) %>%
# req_perform()
#
#
# test_ur_fail <- "https://statecancerprofiles.cancer.gov/historicaltrend/index.php?0&9953&999&7599&136&071&48&2&0&0&1&1&1&1&6"
# req <- "https://statecancerprofiles.cancer.gov/historicaltrend/data.php/historicaltrend.csv?0&9953&999&7599&136&071&07&2&0&0&1&1&1&1&6"
# req <- request("https://statecancerprofiles.cancer.gov/historicaltrend/index.php?0&9953&999&7599&136&071&48&2&0&0&1&1&1&1&6")
#
# resp <- req %>%
# req_perform() %>%
# resp_body_string()
#
#
# if (httr2::resp_content_type(resp) != "text/csv") {
# cli_abort("Invalid input, please check documentation for valid arguments.")
# }


9 changes: 7 additions & 2 deletions R/demo-crowding.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#' @importFrom httr2 req_url_query req_perform
#' @importFrom stats setNames
#' @importFrom dplyr mutate across
#' @importFrom tibble as_tibble
#'
#' @returns A data frame with the following columns: Area, Area Code,
#' Percent, Households, Rank.
Expand Down Expand Up @@ -68,14 +69,18 @@ demo_crowding <- function(area, areatype, crowding, race) {
) %>%
req_perform()

resp_url <- resp$url

resp <- process_resp(resp, "demographics")

resp %>%
resp$data <- resp$data %>%
setNames(c(
get_area(areatype),
"Percent",
"Households",
"Rank"
)) %>%
mutate(across(c("Percent", "Households"), \(x) as.numeric(x)))

process_metadata(resp, "demographics", resp_url)
}
9 changes: 7 additions & 2 deletions R/demo-education.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,20 @@ demo_education <- function(area, areatype, education, sex = NULL, race = NULL) {

resp <- resp %>%
req_perform()

resp_url <- resp$url

resp <- process_resp(resp, "demographics")

resp %>%
resp$data <- resp$data %>%
setNames(c(
get_area(areatype),
"Percent",
"Households",
"Rank"
)) %>%
mutate(across(c("Percent", "Households"), \(x) as.numeric(x)))

process_metadata(resp, "demographics", resp_url)

}
4 changes: 3 additions & 1 deletion R/demo-population.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,14 @@ demo_population <- function(area, areatype, population, race = NULL, sex = NULL)

resp <- process_resp(resp, "demographics")

resp %>%
resp$data %>%
setNames(c(
get_area(areatype),
"Percent",
"People",
"Rank"
)) %>%
mutate(across(c("Percent", "People"), \(x) as.numeric(x)))

process_metadata(resp)
}
4 changes: 3 additions & 1 deletion R/demo-svi.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ demo_svi <- function(area, svi) {

resp <- process_resp(resp, "demographics")

resp %>%
resp$data %>%
setNames(c("County", "FIPS", "Score")) %>%
mutate(across(c("Score"), \(x) as.numeric(x)))

process_metadata(resp)
}
101 changes: 101 additions & 0 deletions R/get-metadata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#' Get Metadata
#'
#' This function assigns a list of metadata components and returns a string of
#' processed metadata that is easily readable
#'
#' @param input_tbl A tibble object
#'
#' @returns a string of metadata and an invisible metadata object as a list
#' of strings
#'
#' @examples
#' \dontrun{
#' process_metadata(resp)
#' }
get_metadata <- function(input_tbl) {

resp_metadata <- attr(input_tbl, "metadata")

resp_metadata <- gsub("\\\"", "", resp_metadata)

#check data topic
data_topic <- attributes(input_tbl)$data_topic

# do some conditionals to filter data topic
if (data_topic == "demographics" || data_topic == "risks") {
data_report <- c(resp_metadata[1], resp_metadata[2], resp_metadata[3], resp_metadata[4])
sortedby <- extract_values("Sorted by", resp_metadata)
createdby <- extract_values("Created by", resp_metadata)
data_sources <- extract_values("Source", resp_metadata)
data_dictionary <- resp_metadata[grep("For more information", resp_metadata)]
data_limitations <- resp_metadata[grep("Data for", resp_metadata)]

name_change <- extract_values("Name Change:", resp_metadata)

exclude_keywords <- c("Sorted by", "Created by", "Source", "For more information", "Data for", "Name Change")

additional_notes <- resp_metadata[!grepl(paste(exclude_keywords, collapse = "|"), resp_metadata, ignore.case = TRUE)]
additional_notes <- additional_notes[!additional_notes %in% data_report]

output_metadata_list <- list(
data_report = data_report,
sortedby = sortedby,
createdby = createdby,
data_sources = data_sources,
data_dictionary = data_dictionary,
data_limitations = data_limitations,
additional_notes = additional_notes
)

} else if (data_topic == "incidence" || data_topic == "mortality") {
data_report <- c(resp_metadata[1], resp_metadata[2], resp_metadata[3])
sortedby <- extract_values("Sorted by", resp_metadata)
createdby <- extract_values("Created by", resp_metadata)
trend <- extract_values("^ ", resp_metadata)
trend_note <- extract_values("trend note", resp_metadata)
rate_note <- extract_values("rate note", resp_metadata)
stage_note <- extract_values("Stage ", resp_metadata)
rank_note <- extract_values("rank note", resp_metadata)
data_not_available <- resp_metadata[grep("Data not available", resp_metadata)]
data_sources <- extract_values("Source:", resp_metadata)
data_limitations <- resp_metadata[grep("Data for", resp_metadata)]

exclude_keywords <- c("Sorted by", "Created by", "^ ", "trend note",
"rate note", "Stage ", "rank note",
"Data not available", "Source", "Data for")

additional_notes <- resp_metadata[!grepl(paste(exclude_keywords, collapse = "|"), resp_metadata, ignore.case = TRUE)]
additional_notes <- additional_notes[!additional_notes %in% data_report]

output_metadata_list <- list(
data_report = data_report,
sortedby = sortedby,
createdby = createdby,
trend = trend,
trend_note = trend_note,
rate_note = rate_note,
stage_note = stage_note,
rank_note = rank_note,
data_not_available = data_not_available,
data_sources = data_sources,
data_limitations = data_limitations,
additional_notes = additional_notes
)

} else {
cli_abort("Incorrect data topic argument, please ensure that it is correct.")
}

#add attribute to list
attr(output_metadata_list, "data_topic") <- data_topic

class(output_metadata_list) <- c("cancerprof_metadata", class(output_metadata_list))

output_metadata_list
}

get_raw_metadata <- function(input_tbl) {
resp_metadata <- attr(input_tbl, "metadata")

return(resp_metadata)
}
2 changes: 0 additions & 2 deletions R/handle-cancer.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@
#' @noRd
#'
#' @examples
#' \dontrun{
#' handle_cancer("bladder")
#' }
handle_cancer <- function(cancer) {
cancer <- tolower(cancer)

Expand Down
2 changes: 0 additions & 2 deletions R/handle-food.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@
#' @noRd
#'
#' @examples
#' \dontrun{
#' handle_food("limited access to healthy food")
#' }
handle_food <- function(food) {
food <- tolower(food)

Expand Down
8 changes: 6 additions & 2 deletions R/incidence-cancer.R
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ incidence_cancer <- function(area, areatype, cancer, race, sex, age, stage, year

resp <- resp %>%
req_perform()

resp_url <- resp$url

resp <- process_resp(resp, "incidence")

Expand All @@ -179,7 +181,7 @@ incidence_cancer <- function(area, areatype, cancer, race, sex, age, stage, year
)

if (stage == "all stages") {
resp %>%
resp$data <- resp$data %>%
setNames(c(
get_area(areatype),
shared_names_to_numeric,
Expand All @@ -196,7 +198,7 @@ incidence_cancer <- function(area, areatype, cancer, race, sex, age, stage, year
"Trend_Upper_95%_CI"
), \(x) as.numeric(x)))
} else if (stage == "late stage (regional & distant)") {
resp %>%
resp$data <- resp$data %>%
setNames(c(
get_area(areatype),
shared_names_to_numeric,
Expand All @@ -208,4 +210,6 @@ incidence_cancer <- function(area, areatype, cancer, race, sex, age, stage, year
"Percentage_of_Cases_with_Late_Stage"
), \(x) as.numeric(x)))
}

process_metadata(resp, "incidence", resp_url)
}
6 changes: 5 additions & 1 deletion R/mortality-cancer.R
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ mortality_cancer <- function(area, areatype, cancer, race, sex, age, year) {
resp <- resp %>%
req_perform()

resp_url <- resp$url

resp <- process_resp(resp, "mortality")

names_to_numeric <- c(
Expand All @@ -158,7 +160,7 @@ mortality_cancer <- function(area, areatype, cancer, race, sex, age, year) {
"Upper_CI_Rank"
)

resp %>%
resp$data <- resp$data %>%
setNames(c(
get_area(areatype),
"Met Healthy People Objective of ***?",
Expand All @@ -180,4 +182,6 @@ mortality_cancer <- function(area, areatype, cancer, race, sex, age, year) {
"Lower_95%_CI_Trend",
"Upper_95%_CI_Trend"
), \(x) as.numeric(x)))

process_metadata(resp, "mortality", resp_url)
}
42 changes: 42 additions & 0 deletions R/process-metadata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#' Custom print function
#'
#' This custom print function processes the
#' metadata output for a response object
#'
#' @param x
#'
#' @export
print.cancerprof_tbl <- function(x, ...) {
#cat("Metadata:", "\n")
# we actually need to figure out how to use pillar here
cat("\033[38;5;246m# Access metadata with `get_metadata()`\033[39m", "\n")
# for (i in seq_along(attr(x, "metadata"))) {
# cat(names(attr(x, "metadata"))[i], attr(x, "metadata")[[i]], "\n")
# }
NextMethod(x, ...)
}

#' Process Metadata
#'
#' This function sets the class of the response data
#' to use the custom print function
#'
#' @param resp A response object
#'
#' @returns A response object with Metadata and a tibble
#'
#' @examples
#' \dontrun{
#' process_metadata(resp)
#' }
process_metadata <- function(resp) {

resp_data <- resp$data
resp_metadata <- resp$metadata

class(resp_data) <- c("cancerprof_tbl", class(resp_data))
attr(resp_data, "metadata") <- resp_metadata

#print(resp_data)
return(resp_data)
}
Loading
Loading