From 09e3e9f72a4ce8f746d2f9b154dc33a69f1baf28 Mon Sep 17 00:00:00 2001 From: carriewright11 Date: Sun, 29 Sep 2024 23:14:29 -0400 Subject: [PATCH] tiny updates to subsetting lab --- .../lab/Subsetting_Data_in_R_Lab_Key.Rmd | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/modules/Subsetting_Data_in_R/lab/Subsetting_Data_in_R_Lab_Key.Rmd b/modules/Subsetting_Data_in_R/lab/Subsetting_Data_in_R_Lab_Key.Rmd index 1341bfc8..b430488b 100644 --- a/modules/Subsetting_Data_in_R/lab/Subsetting_Data_in_R_Lab_Key.Rmd +++ b/modules/Subsetting_Data_in_R/lab/Subsetting_Data_in_R_Lab_Key.Rmd @@ -217,8 +217,9 @@ dim(ces_Alameda) What happens if you do the steps in a different order? Why does this not work? ```{r, error = TRUE, label = '3.2aresponse'} -#ces_Alameda <- ces %>% -# select(CA_county == "Alameda") +ces_Alameda <- ces %>% + select(Traffic, Asthma) %>% + filter(CA_county == "Alameda") # you get an error because there is no CA_county variable to filter from ``` @@ -236,7 +237,7 @@ arrange(ces_Alameda, Traffic) %>% ### 1.4 -Create a new variable in `ces_Alameda` called `Asthma100`, which is equal to `Asthma` divided by 100, using `mutate()`(don't forget to reassign `ces_Alameda`). Use pipes `%>%`. +Create a new variable in `ces_Alameda` called `Asthma100`, which is equal to `Asthma` divided by 100, using `mutate()`(don't forget to reassign `ces_Alameda`). Use pipes `%>%`. Take a look at the data now! ``` # General format @@ -245,6 +246,7 @@ NEWDATA <- OLD_DATA %>% mutate(NEW_COLUMN = OLD_COLUMN) ```{r 3.4response} ces_Alameda <- ces_Alameda %>% mutate(Asthma100 = Asthma/100) +ces_Alameda ``` @@ -261,12 +263,12 @@ ces_Alameda ### P.5 -How can you find the values of `Traffic` for areas within zip code 90745 (in Los Angeles county) that also have a CES4.0 score in the second highest percentile range (90-95%) - using the initial ces data - without just looking at the data manually and instead use functions we learned today? +Using the original `ces` data, how can you find the values of `ApproxLocation` for areas within zip code 90745 (in Los Angeles county) that also have a CES4.0 score in the range 90-95% - without just looking at the data manually and instead use functions we learned today? `CES4.0PercRange`: Percentile of the CalEnviroScreen score, grouped by 5% increments. The CalEnviroScreen score is a measure of the negative environmental effects seen in a given region. Those zip codes that have a percentile range of 90-95% are those regions that experience the highest effects of pollution in California. ```{r P.5response} ces %>% filter(ZIP == 90745 & CES4.0PercRange == "90-95%") %>% - select(Traffic) + select(ApproxLocation) ```