Skip to content

Commit

Permalink
Merge pull request #293 from bcgov/timeseries_backend
Browse files Browse the repository at this point in the history
Timeseries backend - this addresses #247
  • Loading branch information
kdaust authored Aug 15, 2024
2 parents a3e57bf + b8418c9 commit 07749b0
Show file tree
Hide file tree
Showing 7 changed files with 477 additions and 41 deletions.
3 changes: 2 additions & 1 deletion R/downscale.R
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,8 @@ downscale <- function(xyz, which_refmap = "auto",
ssps = ssps,
years = gcm_ssp_years,
max_run = max_run,
cache = cache
cache = cache,
fast = TRUE
)
} else {
gcm_ssp_ts <- NULL
Expand Down
205 changes: 191 additions & 14 deletions R/gcm.R
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
#' pool::poolClose(dbCon)
#' @rdname gcms-input-data
#' @export
input_gcms <- function(dbCon, bbox = NULL, gcms = list_gcms(), ssps = list_ssps(), period = list_gcm_periods(), max_run = 0L, cache = TRUE) {
input_gcms <- function(dbCon, bbox = NULL, gcms = list_gcms(), ssps = list_ssps(), period = list_gcm_periods(), max_run = 0L, cache = TRUE, run_nm = NULL) {
## checks
if (!is.null(bbox)) {
.check_bb(bbox)
Expand Down Expand Up @@ -144,6 +144,7 @@ input_gcm_hist <- function(dbCon, bbox = NULL, gcms = list_gcms(),
#' See [`list_gcm_ssp_years()`] for available years.
#' @template max_run
#' @template cache
#' @param fast Logical. Should we use the faster method of downloading data from the database using arrays instead of Postgis rasters?
#'
#' @return A `list` of `SpatRasters`, each with possibly multiple layers, that can
#' be used with [`downscale_core()`].
Expand All @@ -161,20 +162,31 @@ input_gcm_hist <- function(dbCon, bbox = NULL, gcms = list_gcms(),
#' @rdname gcms-input-data
#' @export
input_gcm_ssp <- function(dbCon, bbox = NULL, gcms = list_gcms(), ssps = list_ssps(),
years = 2020:2030, max_run = 0L, cache = TRUE) {
years = 2020:2030, max_run = 0L, cache = TRUE, fast = TRUE) {

## checks
if (!is.null(bbox)) {
.check_bb(bbox)
}

if (nrow(dbnames_ts) < 1) stop("That isn't a valid GCM")
# Load each file individually + select layers
res <- sapply(gcms, process_one_gcm4,
ssps = ssps, period = years,
dbnames = dbnames_ts, bbox = bbox, dbCon = dbCon,
max_run = max_run, cache = cache, USE.NAMES = TRUE, simplify = FALSE
)
res <- res[!sapply(res, is.null)] ## remove NULL
if(fast){
res <- sapply(gcms, process_one_gcmts_fast,
ssps = ssps, period = years,
dbnames = dbnames_ts_fast, bbox = bbox, dbCon = dbCon,
max_run = max_run, cache = cache, USE.NAMES = TRUE, simplify = FALSE
)
}else{
res <- sapply(gcms, process_one_gcm4,
ssps = ssps, period = years,
dbnames = dbnames_ts, bbox = bbox, dbCon = dbCon,
max_run = max_run, cache = cache, USE.NAMES = TRUE, simplify = FALSE
)
}

res <- res[!sapply(res, is.null)] ##remove NULL

attr(res, "builder") <- "climr"

# Return a list of SpatRasters, one element for each model
Expand Down Expand Up @@ -229,16 +241,21 @@ list_unique <- function(files, col_num) {
#'
#' @return `SpatRaster`
#' @noRd
process_one_gcm2 <- function(gcm_nm, ssps, bbox, period, max_run, dbnames = dbnames, dbCon, cache) { ## need to update to all GCMs
process_one_gcm2 <- function(gcm_nm, ssps, bbox, period, max_run, dbnames = dbnames, dbCon, cache, run_nm = NULL) { ## need to update to all GCMs
gcmcode <- dbnames$dbname[dbnames$GCM == gcm_nm]
# gcm_nm <- gsub("-", ".", gcm_nm)

rInfoPath <- file.path(R_user_dir("climr", "data"), "run_info")

runs <- fread(file.path(rInfoPath, "gcm_periods.csv"))
runs <- sort(unique(runs[mod == gcm_nm & scenario %in% ssps, run]))
sel_runs <- runs[1:(max_run + 1L)]


if(is.null(run_nm)){
runs <- fread(file.path(rInfoPath, "gcm_periods.csv"))
runs <- sort(unique(runs[mod == gcm_nm & scenario %in% ssps, run]))
sel_runs <- runs[1:(max_run + 1L)]
}else{
sel_runs <- run_nm
}


## check cached
needDownload <- TRUE

Expand Down Expand Up @@ -584,3 +601,163 @@ process_one_gcm4 <- function(gcm_nm, ssps, period, max_run, dbnames = dbnames_ts
}
}
}


#' Process one GCM time series at a time (faster version)
#'
#' @template gcm_nm
#' @template ssps
#' @template period
#' @template max_run
#' @param dbnames `data.frame` with the list of available GCMs (time series projections)
#' and their corresponding names in the PostGIS data base. See climr:::dbnames_ts
#' @template bbox
#' @template dbCon
#' @template cache
#'
#' @importFrom tools R_user_dir
#' @import data.table
#' @import dplyr
#' @import abind
#' @import terra
#'
#' @return a `SpatRaster`
#' @noRd
process_one_gcmts_fast <- function(gcm_nm, ssps, period, max_run, dbnames = dbnames_ts, bbox, dbCon, cache) { ## need to update to all GCMs
if(gcm_nm %in% dbnames$GCM){
gcmcode <- dbnames$dbname[dbnames$GCM == gcm_nm]
gcmarray <- dbnames$dbarray[dbnames$GCM == gcm_nm]
rInfoPath <- file.path(R_user_dir("climr", "data"), "run_info")

runs <- fread(file.path(rInfoPath, "gcm_ts.csv"))
runs <- sort(unique(runs[mod == gcm_nm & scenario %in% ssps, run]))
if (length(runs) < 1) {
warning("That GCM isn't in our database yet.")
}else{
sel_runs <- runs[1:(max_run + 1L)]

## check cached
needDownload <- TRUE

cPath <- file.path(cache_path(), "gcmts", gcmcode)

if (dir.exists(cPath)) {
bnds <- try(fread(file.path(cPath, "meta_area.csv")), silent = TRUE)

if (is(bnds, "try-error")) {
## try to get the data again
message(
"Metadata file no longer exists or is unreadable.",
" Downloading the data again"
)
} else {
needDownload <- FALSE
}
}


if (!needDownload) {
setorder(bnds, -numlay)

spat_match <- lapply(1:nrow(bnds), FUN = \(x){
if (is_in_bbox(bbox, matrix(bnds[x, 2:5]))) bnds$uid[x]
})
spat_match <- spat_match[!sapply(spat_match, is.null)]

if (length(spat_match) > 0) {
periods <- fread(file.path(cPath, "meta_period.csv"))
ssps_cache <- fread(file.path(cPath, "meta_ssp.csv"))
isin <- FALSE
for (oldid in spat_match) { ## see if any have all required variables
if (all(period %in% periods[uid == oldid, period]) &
all(ssps %in% ssps_cache[uid == oldid, ssps]) &
max_run <= bnds[uid == oldid, max_run]) {
isin <- TRUE
break
}
}

if (isin) {
message("Retrieving from cache...")
gcm_rast <- tryCatch(
suppressWarnings(rast(file.path(cPath, paste0(oldid, ".tif")))),
error = function(e) {
rast(file.path(cPath, paste0(oldid, ".grd")))
}
)
layinfo <- data.table(fullnm = names(gcm_rast))
layinfo[, c("Mod", "Var", "Month", "Scenario", "Run", "Year") := tstrsplit(fullnm, "_")]
layinfo[, laynum := seq_along(fullnm)]
sel <- layinfo[Scenario %in% ssps & Year %in% period & Run %in% sel_runs, laynum]
gcm_rast <- gcm_rast[[sel]]
} else {
message("Not fully cached :( Will download more")
needDownload <- TRUE
}
} else {
message("Not fully cached :( Will download more")
needDownload <- TRUE
}
}

if (needDownload) {
template <- pgGetTerra(dbCon, name = gcmcode, tile = F, bands = 1, boundary = bbox)

if(length(period) >= 79){ ##faster if almost all years are selected
results <- tbl(dbCon, sql(paste0("select cellid, ssp, year, run, vals from ",gcmarray," where cellid in (",paste0(values(template)[,1], collapse = ','),")
and ssp in ('",paste(ssps, collapse = "','"),"') and run in ('", paste(sel_runs, collapse = "','"), "')")))
}else{
results <- tbl(dbCon, sql(paste0("select cellid, ssp, year, run, vals from ",gcmarray," where cellid in (",paste0(values(template)[,1], collapse = ','),")
and year in ('",paste(period, collapse = "','"),"') and ssp in ('",paste(ssps, collapse = "','"),"') and run in ('", paste(sel_runs, collapse = "','"), "')")))
}

dat <-
results %>%
mutate(vals = unnest(vals)) %>%
collect()
setDT(dat)
setorder(dat, cellid, year, ssp, run)
dat[, month := rep(sort(sprintf(c("PPT_%02d", "Tmax_%02d", "Tmin_%02d"), sort(rep(1:12, 3)))),
nrow(dat)/(12*3))]

dat[, fullnm := paste(month, ssp, run, year, sep = "_")]
cell_nums = cells(template)
coords <- rowColFromCell(template, cell_nums)
cellcoords <- data.table(coords, values(template))
setnames(cellcoords, c("row","col","cellid"))
dat[cellcoords, `:=`(row = i.row, col = i.col), on = "cellid"]
temp <- dat[,.(row,col,fullnm,vals)]
t2 <- dcast(temp, fullnm + row ~ col, value.var = "vals")

t_array = split(as.data.frame(t2[,!c("fullnm","row")]), t2$fullnm)
t3 <- abind(t_array, along = 3)
gcm_rast <- rast(t3)
ext(gcm_rast) <- ext(template)
names(gcm_rast) <- paste0(gcm_nm,"_", names(t_array))

if (cache) {
message("Caching data...")
uid <- UUIDgenerate()
dir.create(cPath, recursive = TRUE, showWarnings = FALSE)
if(nlyr(gcm_rast) > 65500){ ##geotifs are limited to 65535 layers
writeRaster(gcm_rast, file.path(cPath, paste0(uid, ".grd")), filetype = "ENVI")
}else{
writeRaster(gcm_rast, file.path(cPath, paste0(uid, ".tif")))
}
rastext <- ext(gcm_rast)
t1 <- data.table(
uid = uid, ymax = rastext[4], ymin = rastext[3], xmax = rastext[2], xmin = rastext[1],
numlay = nlyr(gcm_rast), max_run = max_run
)
t2 <- data.table(uid = rep(uid, length(period)), period = period)
t3 <- data.table(uid = rep(uid, length(ssps)), ssps = ssps)
fwrite(t1, file = file.path(cPath, "meta_area.csv"), append = TRUE)
fwrite(t2, file = file.path(cPath, "meta_period.csv"), append = TRUE)
fwrite(t3, file = file.path(cPath, "meta_ssp.csv"), append = TRUE)
}
}

return(gcm_rast)
}
}
}
Binary file modified R/sysdata.rda
Binary file not shown.
14 changes: 13 additions & 1 deletion data-raw/internal_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,18 @@ dbnames_ts <- data.table(
)
)

usethis::use_data(param, dbnames, dbnames_hist, dbnames_ts, dbnames_hist_obs,
dbnames_ts_fast <- data.table(
GCM = c(
"ACCESS-ESM1-5","BCC-CSM2-MR", "CanESM5",
"CNRM-ESM2-1", "EC-Earth3", "GISS-E2-1-G", "INM-CM5-0", "GFDL-ESM4",
"IPSL-CM6A-LR", "MIROC6", "MPI-ESM1-2-HR", "MRI-ESM2-0", "UKESM1-0-LL"
),
dbname = c("access_template", "bcc_template","canesm_template","cnrm_template", "ecearth3_template","giss_template","inm_template",
"gfdl_template", "ipsl_template","miroc6_template","mpi_template","mri_template","ukesm_template"),
dbarray = c("access_array", "bcc_array","canesm_array","cnrm_array", "ecearth3_array","giss_array","inm_array",
"gfdl_array", "ipsl_array","miroc6_array","mpi_array","mri_array","ukesm_array")
)

usethis::use_data(param, dbnames, dbnames_hist, dbnames_ts, dbnames_hist_obs, dbnames_ts_fast,
overwrite = TRUE, internal = TRUE
)
34 changes: 17 additions & 17 deletions data_processing/CreateDB.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,31 @@ library(analogsea)
library(ssh)


bc_rast <- rast("../Common_Files/composite_wna_wlrdem.tif")[[32]]
plot(bc_rast)
bc_rast[!is.na(bc_rast)] <- 1L
plot(bc_rast)
bc_rast2 <- terra::aggregate(bc_rast, fact = 4, fun = "max")
plot(bc_rast2)

writeRaster(bc_rast2, filename = "inst/extdata/wna_outline.tif", datatype = "INT1U")
# bc_rast <- rast("../Common_Files/composite_wna_wlrdem.tif")[[32]]
# plot(bc_rast)
# bc_rast[!is.na(bc_rast)] <- 1L
# plot(bc_rast)
# bc_rast2 <- terra::aggregate(bc_rast, fact = 4, fun = "max")
# plot(bc_rast2)
#
# writeRaster(bc_rast2, filename = "inst/extdata/wna_outline.tif", datatype = "INT1U")

##process NA normals
# Load normal files
dir_normal <- "../Common_Files/colin_climatology/"
dir_normal <- "../Common_Files/climatena_normals/Normal_1961_1990MP/"
files <- list.files(dir_normal, full.names = T)
fnames <- list.files(dir_normal)
files <- files[grep("Tmin|Tmax|PPT",files)]
d <- rast(files)
names(d) <- c("PPT01", "PPT02", "PPT03", "PPT04", "PPT05", "PPT06", "PPT07", "PPT08", "PPT09", "PPT10",
"PPT11", "PPT12", "Tmax01", "Tmax02", "Tmax03", "Tmax04", "Tmax05", "Tmax06", "Tmax07",
"Tmax08", "Tmax09", "Tmax10", "Tmax11", "Tmax12", "Tmin01", "Tmin02", "Tmin03", "Tmin04",
"Tmin05", "Tmin06", "Tmin07", "Tmin08", "Tmin09", "Tmin10", "Tmin11", "Tmin12")
# names(d) <- c("PPT01", "PPT02", "PPT03", "PPT04", "PPT05", "PPT06", "PPT07", "PPT08", "PPT09", "PPT10",
# "PPT11", "PPT12", "Tmax01", "Tmax02", "Tmax03", "Tmax04", "Tmax05", "Tmax06", "Tmax07",
# "Tmax08", "Tmax09", "Tmax10", "Tmax11", "Tmax12", "Tmin01", "Tmin02", "Tmin03", "Tmin04",
# "Tmin05", "Tmin06", "Tmin07", "Tmin08", "Tmin09", "Tmin10", "Tmin11", "Tmin12")

dem <- rast("../Common_Files/composite_WNA_dem.tif")
dem <- rast("../climatena/InputFiles/na4000.asc")
#r <- r[[grep("PPT|Tmin|Tmax", names(r))]]

lr <- lapse_rate(
normal = d,
reference = d,
dem = dem,
NA_replace = TRUE,
nthread = 4,
Expand All @@ -42,7 +42,7 @@ names(lr) <- paste0("lr_",names(lr))
# Actual writing
terra::writeRaster(
c(d, lr, dem),
file.path("../Common_Files/composite_wna_wlrdem.tif"),
file.path("../Common_Files/climatena_wlrdem.tif"),
overwrite = TRUE,
gdal="COMPRESS=NONE"
)
Expand Down
38 changes: 30 additions & 8 deletions data_processing/Test_Script.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,23 @@ library(data.table)
library(terra)
library(climr)


wlr <- rast("../Common_Files/composite_WNA_dem.tif")
plot(wlr)

dem <- rast("../Common_Files/dem_noram_lowres.tif")
test <- rast("../Common_Files/climatena_normals/Normal_1961_1990MP/Tmin07.asc")
plot(test)

my_grid <- as.data.frame(dem, cells = TRUE, xy = TRUE)
colnames(my_grid) <- c("id", "lon", "lat", "elev") # rename column names to what climr expects
db <- data_connect()
bbox <- get_bb(my_grid)
bbox2 <- c(20,14.83,-80,-120)
refmap <- input_refmap(db, bbox)

plot(refmap$Tmax_07)

pts <- data.frame(lon = c(-124.11, -125.11), lat = rep(48.82, 2), elev = rep(25,2), id = 1:2)

bbox <- get_bb(pts[2,])
Expand Down Expand Up @@ -31,11 +48,11 @@ refmap <- input_refmap(db, bbox)


projected <- climr_downscale(pts[2,],
gcm_models = list_gcm()[c(4)],
ssp = list_ssp()[c(1,2)],
max_run = 3,
gcm_hist_years = 1851:2014,
gcm_ts_years = 2015:2100
gcm_models = list_gcm()[c(4)],
ssp = list_ssp()[c(1,2)],
max_run = 3,
gcm_hist_years = 1851:2014,
gcm_ts_years = 2015:2100
)

dat <- fread("../climatena/Perioddat/Year_1905.ann")
Expand Down Expand Up @@ -80,11 +97,16 @@ data <- downscale(xyz = pt,
)

plot_timeSeries(data, var1 = "Tmax_08")
data <- plot_timeSeries_input(pt, gcms = list_gcms()[4], vars = "MAP")
plot_timeSeries(data, var1 = "MAP")
library(abind)
library(dplyr)
library(terra)
data <- plot_timeSeries_input(pt, gcms = list_gcms()[1], max_run = 5)
data <- downscale(pt, gcms = list_gcms(), gcm_ssp_years = list_gcm_ssp_years(),
ssps = list_ssps()[1:3], max_run = 5L)
plot_timeSeries(data, var1 = "MAT")

data <- downscale(xyz = pt,
gcm_models = list_gcms()[2],
gcm_models = list_gcms()[5],
ssp = list_ssps(),
max_run = 10,
historic_ts_dataset = c("cru.gpcc", "climatena"),
Expand Down
Loading

0 comments on commit 07749b0

Please sign in to comment.