getwilds · realbp · Mar 21, 2024 · Mar 25, 2024 · Mar 28, 2024 · Mar 28, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -19,6 +19,7 @@ Imports:
     magrittr,
     rlang,
     stringr,
+    tibble,
     utils
 Suggests: 
     knitr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(print,cancerprof_metadata)
+S3method(print,cancerprof_tbl)
 export("%>%")
 export(demo_crowding)
 export(demo_education)
@@ -38,5 +40,6 @@ importFrom(rlang,sym)
 importFrom(stats,setNames)
 importFrom(stringr,str_pad)
 importFrom(stringr,str_trim)
+importFrom(tibble,as_tibble)
 importFrom(utils,data)
 importFrom(utils,read.csv)
diff --git a/R/.Rapp.history b/R/.Rapp.history
@@ -0,0 +1 @@
+load("/Users/datascience/Documents/Fred-Hutch-Internship/cancerprof/R/sysdata.rda")
diff --git a/R/browse-trend.R b/R/browse-trend.R
@@ -0,0 +1,42 @@
+# https://statecancerprofiles.cancer.gov/historicaltrend/index.php?0&9953&999&7599&136&071&48&2&0&0&1&1&1&1#results
+# 
+# req_url_query(https://statecancerprofiles.cancer.gov/historicaltrend/index.php?0&9953&999&7599&136&071&48&2&0&0&1&1&1&1&6) %>% 
+#   req_perform()
+# 
+# browseURL(paste0(https://statecancerprofiles.cancer.gov/historicaltrend/index.php?0&9953&999&7599&136&071&48&2&0&0&1&1&1&1#results))
+#                  
+#                  
+#                  
+# req <- "https://statecancerprofiles.cancer.gov/historicaltrend/index.php"
+#  
+# api_arguments <- "0&9953&999&7599&136&071&48&2&0&0&1&1&1&1&6"
+#  
+#  resp <- req %>%
+#    req_url_query(
+#      stateFIPS = fips_scp(area),
+#      areatype = tolower(areatype),
+#      topic = "crowd",
+#      demo = handle_crowding(crowding),
+#      race = handle_race(race),
+#      type = "manyareacensus",
+#      sortVariableName = "value",
+#      sortOrder = "default",
+#      output = 1
+#    ) %>%
+#    req_perform()
+#  
+#  
+#  test_ur_fail <- "https://statecancerprofiles.cancer.gov/historicaltrend/index.php?0&9953&999&7599&136&071&48&2&0&0&1&1&1&1&6"
+#  req <- "https://statecancerprofiles.cancer.gov/historicaltrend/data.php/historicaltrend.csv?0&9953&999&7599&136&071&07&2&0&0&1&1&1&1&6"
+#  req <- request("https://statecancerprofiles.cancer.gov/historicaltrend/index.php?0&9953&999&7599&136&071&48&2&0&0&1&1&1&1&6")
+#  
+# resp <- req %>%
+#   req_perform() %>% 
+#   resp_body_string()
+# 
+# 
+# if (httr2::resp_content_type(resp) != "text/csv") {
+#   cli_abort("Invalid input, please check documentation for valid arguments.")
+# }
+
+
diff --git a/R/demo-crowding.R b/R/demo-crowding.R
@@ -20,6 +20,7 @@
 #' @importFrom httr2 req_url_query req_perform
 #' @importFrom stats setNames
 #' @importFrom dplyr mutate across
+#' @importFrom tibble as_tibble
 #'
 #' @returns A data frame with the following columns: Area, Area Code,
 #' Percent, Households, Rank.
@@ -68,14 +69,18 @@ demo_crowding <- function(area, areatype, crowding, race) {
     ) %>%
     req_perform()
 
+  resp_url <- resp$url
+
   resp <- process_resp(resp, "demographics")
-
-  resp %>%
+  
+  resp$data <- resp$data %>%
     setNames(c(
       get_area(areatype),
       "Percent",
       "Households",
       "Rank"
     )) %>%
     mutate(across(c("Percent", "Households"), \(x) as.numeric(x)))
+
+  process_metadata(resp, "demographics", resp_url)
 }
diff --git a/R/demo-education.R b/R/demo-education.R
@@ -88,15 +88,20 @@ demo_education <- function(area, areatype, education, sex = NULL, race = NULL) {
 
   resp <- resp %>%
     req_perform()
+
+  resp_url <- resp$url
 
   resp <- process_resp(resp, "demographics")
-
-  resp %>%
+  
+  resp$data <- resp$data %>%
     setNames(c(
       get_area(areatype),
       "Percent",
       "Households",
       "Rank"
     )) %>%
     mutate(across(c("Percent", "Households"), \(x) as.numeric(x)))
+
+  process_metadata(resp, "demographics", resp_url)
+
 }
diff --git a/R/demo-population.R b/R/demo-population.R
@@ -114,12 +114,14 @@ demo_population <- function(area, areatype, population, race = NULL, sex = NULL)
 
   resp <- process_resp(resp, "demographics")
 
-  resp %>%
+  resp$data %>%
     setNames(c(
       get_area(areatype),
       "Percent",
       "People",
       "Rank"
     )) %>%
     mutate(across(c("Percent", "People"), \(x) as.numeric(x)))
+
+  process_metadata(resp)
 }
diff --git a/R/demo-svi.R b/R/demo-svi.R
@@ -56,7 +56,9 @@ demo_svi <- function(area, svi) {
 
   resp <- process_resp(resp, "demographics")
 
-  resp %>%
+  resp$data %>%
     setNames(c("County", "FIPS", "Score")) %>%
     mutate(across(c("Score"), \(x) as.numeric(x)))
+
+  process_metadata(resp)
 }
diff --git a/R/get-metadata.R b/R/get-metadata.R
@@ -0,0 +1,101 @@
+#' Get Metadata
+#'
+#' This function assigns a list of metadata components and returns a string of
+#' processed metadata that is easily readable
+#'
+#' @param input_tbl A tibble object
+#' 
+#' @returns a string of metadata and an invisible metadata object as a list 
+#' of strings
+#' 
+#' @examples
+#' \dontrun{
+#' process_metadata(resp)
+#' }
+get_metadata <- function(input_tbl) {
+
+  resp_metadata <- attr(input_tbl, "metadata")
+
+  resp_metadata <- gsub("\\\"", "", resp_metadata)
+
+  #check data topic
+  data_topic <- attributes(input_tbl)$data_topic
+
+  # do some conditionals to filter data topic
+  if (data_topic == "demographics" || data_topic == "risks") {
+    data_report <- c(resp_metadata[1], resp_metadata[2], resp_metadata[3], resp_metadata[4])
+    sortedby <- extract_values("Sorted by", resp_metadata)
+    createdby <- extract_values("Created by", resp_metadata)
+    data_sources <- extract_values("Source", resp_metadata)
+    data_dictionary <- resp_metadata[grep("For more information", resp_metadata)]
+    data_limitations <- resp_metadata[grep("Data for", resp_metadata)]
+
+    name_change <- extract_values("Name Change:", resp_metadata)
+
+    exclude_keywords <- c("Sorted by", "Created by", "Source", "For more information", "Data for", "Name Change")
+
+    additional_notes <- resp_metadata[!grepl(paste(exclude_keywords, collapse = "|"), resp_metadata, ignore.case = TRUE)]
+    additional_notes <- additional_notes[!additional_notes %in% data_report]
+
+    output_metadata_list <- list(
+      data_report = data_report,
+      sortedby = sortedby,
+      createdby = createdby,
+      data_sources = data_sources,
+      data_dictionary = data_dictionary,
+      data_limitations = data_limitations,
+      additional_notes = additional_notes
+    )
+
+  } else if (data_topic == "incidence" || data_topic == "mortality") {
+    data_report <- c(resp_metadata[1], resp_metadata[2], resp_metadata[3])
+    sortedby <- extract_values("Sorted by", resp_metadata)
+    createdby <- extract_values("Created by", resp_metadata)
+    trend <- extract_values("^ ", resp_metadata)
+    trend_note <- extract_values("trend note", resp_metadata)
+    rate_note <- extract_values("rate note", resp_metadata)
+    stage_note <- extract_values("Stage ", resp_metadata)
+    rank_note <- extract_values("rank note", resp_metadata)
+    data_not_available <- resp_metadata[grep("Data not available", resp_metadata)]
+    data_sources <- extract_values("Source:", resp_metadata)
+    data_limitations <- resp_metadata[grep("Data for", resp_metadata)]
+
+    exclude_keywords <- c("Sorted by", "Created by", "^ ", "trend note", 
+                          "rate note", "Stage ", "rank note", 
+                          "Data not available",  "Source", "Data for")
+
+    additional_notes <- resp_metadata[!grepl(paste(exclude_keywords, collapse = "|"), resp_metadata, ignore.case = TRUE)]
+    additional_notes <- additional_notes[!additional_notes %in% data_report]
+
+    output_metadata_list <- list(
+      data_report = data_report,
+      sortedby = sortedby,
+      createdby = createdby,
+      trend = trend,
+      trend_note = trend_note,
+      rate_note = rate_note,
+      stage_note = stage_note,
+      rank_note = rank_note,
+      data_not_available = data_not_available,
+      data_sources = data_sources,
+      data_limitations = data_limitations,
+      additional_notes = additional_notes
+    )
+
+  } else {
+    cli_abort("Incorrect data topic argument, please ensure that it is correct.")
+  }
+
+  #add attribute to list
+  attr(output_metadata_list, "data_topic") <- data_topic
+
+  class(output_metadata_list) <- c("cancerprof_metadata", class(output_metadata_list))
+
+  output_metadata_list
+}
+
+get_raw_metadata <- function(input_tbl) {
+  resp_metadata <- attr(input_tbl, "metadata")
+
+  return(resp_metadata)
+}
diff --git a/R/handle-cancer.R b/R/handle-cancer.R
@@ -35,9 +35,7 @@
 #' @noRd
 #'
 #' @examples
-#' \dontrun{
 #' handle_cancer("bladder")
-#' }
 handle_cancer <- function(cancer) {
   cancer <- tolower(cancer)
 

diff --git a/R/handle-food.R b/R/handle-food.R
@@ -12,9 +12,7 @@
 #' @noRd
 #'
 #' @examples
-#' \dontrun{
 #' handle_food("limited access to healthy food")
-#' }
 handle_food <- function(food) {
   food <- tolower(food)
 

diff --git a/R/incidence-cancer.R b/R/incidence-cancer.R
@@ -166,6 +166,8 @@ incidence_cancer <- function(area, areatype, cancer, race, sex, age, stage, year
 
   resp <- resp %>%
     req_perform()
+
+  resp_url <- resp$url
 
   resp <- process_resp(resp, "incidence")
 
@@ -179,7 +181,7 @@ incidence_cancer <- function(area, areatype, cancer, race, sex, age, stage, year
   )
 
   if (stage == "all stages") {
-    resp %>%
+    resp$data <- resp$data %>%
       setNames(c(
         get_area(areatype),
         shared_names_to_numeric,
@@ -196,7 +198,7 @@ incidence_cancer <- function(area, areatype, cancer, race, sex, age, stage, year
         "Trend_Upper_95%_CI"
       ), \(x) as.numeric(x)))
   } else if (stage == "late stage (regional & distant)") {
-    resp %>%
+    resp$data <- resp$data %>%
       setNames(c(
         get_area(areatype),
         shared_names_to_numeric,
@@ -208,4 +210,6 @@ incidence_cancer <- function(area, areatype, cancer, race, sex, age, stage, year
         "Percentage_of_Cases_with_Late_Stage"
       ), \(x) as.numeric(x)))
   }
+
+  process_metadata(resp, "incidence", resp_url)
 }
diff --git a/R/mortality-cancer.R b/R/mortality-cancer.R
@@ -147,6 +147,8 @@ mortality_cancer <- function(area, areatype, cancer, race, sex, age, year) {
   resp <- resp %>%
     req_perform()
 
+  resp_url <- resp$url
+
   resp <- process_resp(resp, "mortality")
 
   names_to_numeric <- c(
@@ -158,7 +160,7 @@ mortality_cancer <- function(area, areatype, cancer, race, sex, age, year) {
     "Upper_CI_Rank"
   )
 
-  resp %>%
+  resp$data <- resp$data %>%
     setNames(c(
       get_area(areatype),
       "Met Healthy People Objective of ***?",
@@ -180,4 +182,6 @@ mortality_cancer <- function(area, areatype, cancer, race, sex, age, year) {
       "Lower_95%_CI_Trend",
       "Upper_95%_CI_Trend"
     ), \(x) as.numeric(x)))
+
+  process_metadata(resp, "mortality", resp_url)
 }
diff --git a/R/process-metadata.R b/R/process-metadata.R
@@ -0,0 +1,42 @@
+#' Custom print function
+#'
+#' This custom print function processes the
+#' metadata output for a response object
+#'
+#' @param x
+#'
+#' @export
+print.cancerprof_tbl <- function(x, ...) {
+  #cat("Metadata:", "\n")
+  # we actually need to figure out how to use pillar here
+  cat("\033[38;5;246m# Access metadata with `get_metadata()`\033[39m", "\n")
+  # for (i in seq_along(attr(x, "metadata"))) {
+  #   cat(names(attr(x, "metadata"))[i], attr(x, "metadata")[[i]], "\n")
+  # }
+  NextMethod(x, ...)
+}
+
+#' Process Metadata
+#'
+#' This function sets the class of the response data 
+#' to use the custom print function
+#'
+#' @param resp A response object
+#' 
+#' @returns A response object with Metadata and a tibble
+#' 
+#' @examples
+#' \dontrun{
+#' process_metadata(resp)
+#' }
+process_metadata <- function(resp) {
+
+  resp_data <- resp$data
+  resp_metadata <- resp$metadata
+
+  class(resp_data) <- c("cancerprof_tbl", class(resp_data))
+  attr(resp_data, "metadata") <- resp_metadata
+
+  #print(resp_data)
+  return(resp_data)
+}