Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/outbreak layer #84

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ data/**/*.parquet
data/**/*.zip
data/**/*.nc
data/**/*.tif
data/*
18 changes: 13 additions & 5 deletions R/aggregate_augmented_data_by_adm.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,27 @@
#' @author Emma Mendelsohn
#' @export
aggregate_augmented_data_by_adm <- function(augmented_data,
rsa_polygon,
model_dates_selected) {
rsa_polygon,
model_dates_selected) {

# Read augmented data, convert to raster
r <- arrow::read_parquet(glue::glue("{augmented_data}/date={model_dates_selected}/part-0.parquet")) |>
rast()
crs(r) <- crs(rast())

# Mask raster to polygon
r <- mask(r, rsa_polygon)

p <- terra::extract(r, rsa_polygon, mean, na.rm = TRUE, weights = TRUE)
# Get the mean value by polygons
p <- terra::extract(r, rsa_polygon, mean, na.rm = TRUE, weights = FALSE, ID = FALSE)


# spatially weight in fitting - probability per unit area - per size of the district
# probability of it happening in the district is the spatial weighting

bind_cols(rsa_polygon, p) |>
as_tibble() |>
mutate(date = model_dates_selected) |>
select(date, shapeName, ID, names(p))

select(date, shapeName, names(p))
}
45 changes: 45 additions & 0 deletions R/build_workflow.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#' .. content for \description{} (no empty lines) ..
#'
#' .. content for \details{} ..
#'
#' @title
#' @param model_data_train
#' @return
#' @author Emma Mendelsohn
#' @export
build_workflow <- function(model_data_train) {

rec <- recipe(formula = as.formula(outbreak_30 ~
anomaly_relative_humidity_30 +
anomaly_temperature_30 +
anomaly_precipitation_30 +
anomaly_relative_humidity_60 +
anomaly_temperature_60 +
anomaly_precipitation_60 +
anomaly_relative_humidity_90 +
anomaly_temperature_90 +
anomaly_precipitation_90 +
anomaly_temperature_forecast_29 +
anomaly_precipitation_forecast_29+
anomaly_relative_humidity_forecast_29+
anomaly_ndvi_30 +
anomaly_ndvi_60 +
anomaly_ndvi_90) ,
data = model_data_train) |>
step_mutate(outbreak_30 = as.factor(outbreak_30), skip = TRUE)

spec <-
boost_tree(trees = 1000, min_n = tune(), tree_depth = tune(), learn_rate = tune(),
loss_reduction = tune()) |>
set_mode("classification" ) |>
set_engine("xgboost", num_class = 2, objective = "binary:logistic")

model_workflow <-
workflow() %>%
add_recipe(rec) %>%
add_model(spec)

model_workflow


}
36 changes: 36 additions & 0 deletions R/create_outbreak_layer.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#' .. content for \description{} (no empty lines) ..
#'
#' .. content for \details{} ..
#'
#' @title
#' @param wahis_rvf_outbreaks_preprocessed
#' @return
#' @author Emma Mendelsohn
#' @export
create_outbreak_layer <- function(wahis_rvf_outbreaks_preprocessed,
rsa_polygon,
model_dates_selected) {

# Get polygons for outbreaks
rvf_points <- wahis_rvf_outbreaks_preprocessed |>
distinct(country_name, outbreak_id, outbreak_start_date, latitude, longitude) |>
st_as_sf(coords = c("longitude", "latitude"))
st_crs(rvf_points) <- st_crs(rsa_polygon) # Set the CRS (Coordinate Reference System)

rvf_points_polygon <- st_join(rvf_points, rsa_polygon, join = st_within) |>
drop_na(shapeGroup) |>
mutate(outbreak_start_date = ymd(outbreak_start_date))

# For each model selected date, determine which polygons had an outbreak in the following 30 days
rvf_points_polygon_dates <- map_dfr(model_dates_selected, function(model_date){
day_diff <- rvf_points_polygon$outbreak_start_date - model_date
rvf_points_polygon[which(day_diff >= 1 & day_diff <= 30),] |>
mutate(date = model_date)
})

# Return the unique combination of model select dates and districts with outbreaks
rvf_points_polygon_dates |>
distinct(date, shapeName) |>
mutate(outbreak_30 = TRUE)

}
19 changes: 10 additions & 9 deletions R/download_sentinel_ndvi.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,19 @@ download_sentinel_ndvi <- function(sentinel_ndvi_api_parameters, download_direct

# auth
auth <- POST("https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
body = list(
grant_type = "password",
username = Sys.getenv("COPERNICUS_USERNAME"),
password = Sys.getenv("COPERNICUS_PASSWORD"),
client_id = "cdse-public"
),
encode = "form")

url <- glue::glue("http://catalogue.dataspace.copernicus.eu/odata/v1/Products({id})/$value")
body = list(
grant_type = "password",
username = Sys.getenv("COPERNICUS_USERNAME"),
password = Sys.getenv("COPERNICUS_PASSWORD"),
client_id = "cdse-public"
),
encode = "form")

url <- glue::glue("https://download.dataspace.copernicus.eu/odata/v1/Products({id})/$value")
file <- here::here(download_directory, glue::glue("{download_filename}.zip"))
response <- GET(url, add_headers(Authorization = paste("Bearer", content(auth)$access_token)),
write_disk(file, overwrite = TRUE))

unzip(file, files = paste0(download_filename, c(".SEN3/NDVI.nc")), exdir= download_directory)
file.remove(file)
file.rename(here::here(download_directory, paste0(download_filename, ".SEN3/NDVI.nc")),
Expand Down
41 changes: 41 additions & 0 deletions R/get_holdout_masks.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#' .. content for \description{} (no empty lines) ..
#'
#' .. content for \details{} ..
#'
#' @title
#' @param holdout_data
#' @param mask_lookup
#' @return
#' @author Emma Mendelsohn
#' @export
get_holdout_masks <- function(holdout_data, mask_lookup) {

date_masks <- holdout_data |>
select(date, shapeName) |>
left_join(mask_lookup$masked_dates_90_days_lookup, by = join_by(date)) |>
unnest(mask) |>
select(date = mask, shapeName)

shape_masks <- holdout_data |>
select(date, shapeName) |>
left_join(mask_lookup$masked_shapes_adjacent_lookup, by = join_by("shapeName" == "shape")) |>
unnest(mask) |>
select(date, shapeName = mask)

masks <- bind_rows(date_masks, shape_masks) |>
distinct()

return(masks)

# take 1 - mask shape and date (note this incorrectly misses having the district itself masked for 3 months)
# holdout_data |>
# select(date, shapeName) |>
# left_join(mask_lookup$masked_dates_90_days_lookup, by = join_by(date)) |>
# rename(date_mask = mask) |>
# left_join(mask_lookup$masked_shapes_adjacent_lookup, by = join_by("shapeName" == "shape")) |>
# rename(shape_mask = mask) |>
# select(-date, -shapeName) |>
# unnest(shape_mask) |>
# unnest(date_mask)

}
60 changes: 60 additions & 0 deletions R/get_mermaid.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
get_mermaid <- function(
targets_only = FALSE,
names = NULL,
shortcut = FALSE,
allow = NULL,
exclude = ".Random.seed",
outdated = TRUE,
label = NULL,
legend = TRUE,
color = TRUE,
reporter = targets::tar_config_get("reporter_outdated"),
seconds_reporter = targets::tar_config_get("seconds_reporter"),
callr_function = callr::r,
callr_arguments = targets::tar_callr_args_default(callr_function),
envir = parent.frame(),
script = targets::tar_config_get("script"),
store = targets::tar_config_get("store")
) {
tar_assert_allow_meta("tar_mermaid", store = store)
force(envir)
tar_assert_lgl(targets_only, "targets_only must be logical.")
tar_assert_lgl(outdated, "outdated in tar_mermaid() must be logical.")
tar_assert_in(label, c("time", "size", "branches"))
tar_assert_lgl(legend)
tar_assert_lgl(color)
tar_assert_scalar(legend)
tar_assert_scalar(color)
tar_config_assert_reporter_outdated(reporter)
tar_assert_callr_function(callr_function)
tar_assert_list(callr_arguments, "callr_arguments mut be a list.")
tar_assert_dbl(seconds_reporter)
tar_assert_scalar(seconds_reporter)
tar_assert_none_na(seconds_reporter)
tar_assert_ge(seconds_reporter, 0)
targets_arguments <- list(
path_store = store,
targets_only = targets_only,
names_quosure = rlang::enquo(names),
shortcut = shortcut,
allow_quosure = rlang::enquo(allow),
exclude_quosure = rlang::enquo(exclude),
outdated = outdated,
label = label,
legend = legend,
color = color,
reporter = reporter,
seconds_reporter = seconds_reporter
)
callr_outer(
targets_function = tar_mermaid_inner,
targets_arguments = targets_arguments,
callr_function = callr_function,
callr_arguments = callr_arguments,
envir = envir,
script = script,
store = store,
fun = "tar_mermaid"
)
}

29 changes: 29 additions & 0 deletions R/make_mask_lookup.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#' .. content for \description{} (no empty lines) ..
#'
#' .. content for \details{} ..
#'
#' @title
#' @param model_data
#' @return
#' @author Emma Mendelsohn
#' @export
make_mask_lookup <- function(model_dates_selected, rsa_polygon) {

masked_dates_90_days_lookup <- map_dfr(model_dates_selected, function(date){
diffs <- model_dates_selected - date
tibble(date = date, mask = list(model_dates_selected[diffs > 0 & diffs <= 90]))
})

masked_shapes_adjacent_lookup <- map_dfr(1:nrow(rsa_polygon), function(i){
select_shape <- rsa_polygon[i,]
touches <- st_touches(select_shape, rsa_polygon)
touches_shapes <- rsa_polygon[unlist(touches),]
tibble(shape = select_shape$shapeName,
mask = list(touches_shapes$shapeName))
})

list(masked_dates_90_days_lookup = masked_dates_90_days_lookup,
masked_shapes_adjacent_lookup = masked_shapes_adjacent_lookup
)

}
22 changes: 22 additions & 0 deletions R/model_grid.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#' .. content for \description{} (no empty lines) ..
#'
#' .. content for \details{} ..
#'
#' @title

#' @return
#' @author Emma Mendelsohn
#' @export
model_grid <- function(training_data) {

dials::grid_latin_hypercube(
dials::tree_depth(),
dials::min_n(),
dials::loss_reduction(),
sample_size = dials::sample_prop(),
dials::finalize(dials::mtry(), training_data),
dials::learn_rate(),
size = 10
)

}
35 changes: 35 additions & 0 deletions R/model_recipe.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#' .. content for \description{} (no empty lines) ..
#'
#' .. content for \details{} ..
#'
#' @title
#' @param training_data
#' @return
#' @author Emma Mendelsohn
#' @export
model_recipe <- function(training_data) {

recipe(formula = as.formula(outbreak_30 ~
# TODO add day of year
# TODO add static
# TODO add immunity layer
# TODO add recent outbreak layer - has there been an outbreak this season
anomaly_relative_humidity_30 +
anomaly_temperature_30 +
anomaly_precipitation_30 +
anomaly_relative_humidity_60 +
anomaly_temperature_60 +
anomaly_precipitation_60 +
anomaly_relative_humidity_90 +
anomaly_temperature_90 +
anomaly_precipitation_90 +
anomaly_temperature_forecast_29 +
anomaly_precipitation_forecast_29+
anomaly_relative_humidity_forecast_29+
anomaly_ndvi_30 +
anomaly_ndvi_60 +
anomaly_ndvi_90 +
area) ,
data = training_data)

}
30 changes: 30 additions & 0 deletions R/model_specs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#' .. content for \description{} (no empty lines) ..
#'
#' .. content for \details{} ..
#'
#' @title
#' @param base_score
#' @param interaction_constrains
#' @return
#' @author Emma Mendelsohn
#' @export
model_specs <- function(base_score, interaction_constraints) {

parsnip::boost_tree(
trees = 1000,
tree_depth = hardhat::tune(),
min_n = hardhat::tune(),
loss_reduction = hardhat::tune(),
sample_size = hardhat::tune(),
mtry = hardhat::tune(),
learn_rate = hardhat::tune()
) |>
parsnip::set_engine("xgboost",
objective = "binary:logistic",
base_score = base_score, # set the background/intercept rate - this allows the tree to split even when the training is all negatives
interaction_constraints = interaction_constraints, # do not interact on area
monotone_constraints = monotone_constraints # enforce positive relationship for area
) |>
parsnip::set_mode("classification")

}
Loading