diff --git a/.Rbuildignore b/.Rbuildignore index fe4e27f..9351ebd 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -17,4 +17,5 @@ ^vignettes/articles$ ^\.dodsrc$ \.urs_cookies$ -^stac_testing$ \ No newline at end of file +^stac_testing$ +^LICENSE\.md$ diff --git a/DESCRIPTION b/DESCRIPTION index 6fd681c..166dfa6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,6 +22,7 @@ Imports: gifski, ncmeta, RNetCDF, + rnz, terra, utils, methods, @@ -30,7 +31,7 @@ Imports: License: MIT + file LICENSE Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 Suggests: httr, AOI, @@ -39,8 +40,16 @@ Suggests: rmarkdown, knitr, kableExtra, - distill + distill, + ggplot2, + patchwork, + sf, + tidyr, + tidyterra, + reticulate Remotes: - mikejohnson51/AOI + mikejohnson51/AOI, + dblodgett-usgs/ncmeta, + usgs-doi/rnz Config/testthat/edition: 3 VignetteBuilder: knitr diff --git a/LICENSE b/LICENSE index e006fc6..a631da1 100644 --- a/LICENSE +++ b/LICENSE @@ -1,21 +1,2 @@ -MIT License - -Copyright (c) 2018 mikejohnson51 - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +YEAR: 2018 +COPYRIGHT HOLDER: Mike Johnson diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..7c8192e --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +# MIT License + +Copyright (c) 2024 Mike Johnson + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/NAMESPACE b/NAMESPACE index 9644df4..d5912f3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,7 +2,9 @@ export("%>%") export(.resource_grid) +export(.resource_grid_zarr) export(.resource_time) +export(.resource_time_zarr) export(animation) export(checkDodsrc) export(checkNetrc) @@ -42,6 +44,7 @@ export(getVIC) export(getWorldClim) export(get_data) export(go_get_dap_data) +export(go_get_zarr) export(grid_meta) export(make_ext) export(make_vect) @@ -51,12 +54,17 @@ export(plot) export(read_dap_file) export(read_ftp) export(read_live_catalog) +export(read_zarr_file) export(time_meta) export(var_to_terra) export(variable_meta) export(vrt_crop_get) export(writeDodsrc) export(writeNetrc) +export(zarr_crop) +export(zarr_get) +export(zarr_to_terra) +export(zarr_xyzv) importFrom(RNetCDF,att.get.nc) importFrom(RNetCDF,close.nc) importFrom(RNetCDF,dim.inq.nc) @@ -65,6 +73,7 @@ importFrom(RNetCDF,utcal.nc) importFrom(RNetCDF,var.get.nc) importFrom(RNetCDF,var.inq.nc) importFrom(arrow,read_parquet) +importFrom(dplyr,"%>%") importFrom(dplyr,`%>%`) importFrom(dplyr,bind_rows) importFrom(dplyr,distinct) @@ -90,6 +99,12 @@ importFrom(ncmeta,nc_gm_to_prj) importFrom(ncmeta,nc_grid_mapping_atts) importFrom(ncmeta,nc_var) importFrom(ncmeta,nc_vars) +importFrom(rnz,close_nz) +importFrom(rnz,get_att) +importFrom(rnz,get_var) +importFrom(rnz,inq_dim) +importFrom(rnz,inq_var) +importFrom(rnz,open_nz) importFrom(stats,complete.cases) importFrom(terra,`crs<-`) importFrom(terra,`ext<-`) diff --git a/R/catalog.R b/R/catalog.R index e838314..cfd3a71 100644 --- a/R/catalog.R +++ b/R/catalog.R @@ -17,6 +17,7 @@ NULL #' @importFrom methods formalArgs #' @importFrom stats complete.cases #' @importFrom grDevices blues9 +#' @importFrom rnz open_nz close_nz inq_var get_att get_var inq_dim #' @export diff --git a/R/dap.R b/R/dap.R index 39adc0e..f63ddbd 100644 --- a/R/dap.R +++ b/R/dap.R @@ -87,6 +87,23 @@ dap <- function(URL = NULL, toptobottom = toptobottom, verbose = verbose ) + + } else if(any(getExtension(urls) %in% 'zarr')) { + + zarr <- zarr_crop( + URL = URL, + catalog = catalog, + AOI = aoi, + startDate = startDate, + endDate = endDate, + start = start, + end = end, + varname = varname, + verbose = verbose + ) + + x = zarr_get(zarr) + } else { dap <- dap_crop( URL = URL, @@ -278,7 +295,7 @@ dap_crop <- function(URL = NULL, if (grepl("hour", catalog$interval[1])) { startDate <- paste(startDate, "00:00:00") - endDate <- paste(endDate, "23:00:00") + endDate <- paste(endDate, "23:00:00") } startDate <- as.POSIXct(as.character(startDate), tz = "UTC") diff --git a/R/utils.R b/R/utils.R index bc2f85c..a833da2 100644 --- a/R/utils.R +++ b/R/utils.R @@ -416,7 +416,7 @@ go_get_dap_data <- function(dap) { } #' Convert catalog entry to extent -#' @param cat catalog entry (data.frame with an {Xn, X1, Yn, Y1, crs}) +#' @param cat catalog entry (data.frame with an (Xn, X1, Yn, Y1, crs) #' @return SpatExtent #' @family dap #' @export @@ -432,7 +432,7 @@ make_ext <- } #' Make Vector -#' @param cat catalog entry (data.frame with an {Xn, X1, Yn, Y1, crs}) +#' @param cat catalog entry (data.frame with an c(Xn, X1, Yn, Y1, crs)) #' @return SpatVect #' @family dap #' @export diff --git a/R/zarr.R b/R/zarr.R new file mode 100644 index 0000000..78e20ac --- /dev/null +++ b/R/zarr.R @@ -0,0 +1,793 @@ +#' Read Zarr File +#' +#' Reads a Zarr file from a specified URL and extracts metadata and variable information. +#' +#' @param URL Character. The URL of the Zarr file. +#' @param varname Character. Variable name to extract. Defaults to NULL. +#' @param id Character. An identifier for the dataset. +#' @param varmeta Logical. Whether to include variable metadata. Defaults to TRUE. +#' @return A data frame with merged metadata and variable information. +#' @family zarr +#' @export + +read_zarr_file <- function(URL, varname = NULL, id, varmeta = TRUE) { + + nz <- rnz::open_nz(URL) + on.exit(rnz::close_nz(nz)) + + raw <- zarr_xyzv(obj = nz, varname, varmeta = varmeta) + raw$URL <- URL + raw$id <- id + + raw <- merge(raw, data.frame(.resource_time_zarr(URL, T_name = raw$T_name[1]), id = id), by = "id") + + raw <- merge(raw, .resource_grid_zarr(URL, X_name = raw$X_name[1], Y_name = raw$Y_name[1])) + + raw +} + +#' Extract Variable Information from Zarr Object +#' +#' Extracts variable and coordinate information from a Zarr object. +#' +#' @param obj Zarr object or file path. +#' @param varname Character. Specific variable name to extract. Defaults to NULL. +#' @param varmeta Logical. Whether to include variable metadata. Defaults to FALSE. +#' @return A data frame containing variable metadata and coordinate information. +#' @family zarr +#' @export + +zarr_xyzv <- function(obj, varname = NULL, varmeta = FALSE) { + + if (!inherits(obj, "ZarrGroup")) { + obj <- rnz::open_nz(obj) + on.exit(rnz::close_nz(obj)) + } + + + if(is.null(varname)){ + raw = suppressWarnings({ + tryCatch({ + nc_coord_var(obj, variable = NULL)[, c("variable", "X", "Y", "T", "Z")] + }, error = function(e){ + vars = nc_vars(obj)$name + lapply(1:length(vars), FUN = function(j){ + tryCatch({ + nc_coord_var(obj, variable = vars[j])[, c("variable", "X", "Y", "T", "Z")] + }, + error = function(x){ + NULL + }) + }) + }) %>% + bind_rows() + }) + + } else { + + raw = suppressWarnings({ + tryCatch({ + nc_coord_var(obj, variable = varname)[, c("variable", "X", "Y", "T", "Z")] + }, error = function(e){ + NULL + }) + }) + } + + + raw <- raw[!apply(raw, 1, function(x) { + sum(!is.na(x)) <= 3 + }), ] + + + raw$dim_order = NA + raw$nX = NA + raw$nY = NA + raw$nZ = NA + + + for(i in 1:nrow(raw)){ + + o = rnz::inq_var(obj, raw$variable[1])$dimids + dims = nc_dims(obj, varname)$name[o + 1] + length = nc_dims(obj, varname)$length[o + 1] + + if(is.na(raw$Z[i]) & length(dims) == 4){ + raw$Z[i] = dims[!dims %in% as.vector(raw[i,-1])] + } + + o = names(raw)[match(dims, raw[i,])] + raw$dim_order[i] = paste(o, collapse = "") + raw$nX[i] = length[which(dims == raw$X[i])] + raw$nY[i] = length[which(dims == raw$Y[i])] + #raw$nT[i] = length[which(dims == raw$T[i])] + if(is.na(raw$Z)[i]){ + raw$nZ[i] = NA + } else { + raw$nZ[i] = length[which(dims == raw$Z[i])] + } + } + + names(raw) <- c("varname", "X_name", "Y_name", "T_name", "Z_name", "dim_order", + "nX", "nY", "nZ") + + ll <- list() + + if (varmeta) { + for (i in 1:nrow(raw)) { + if (unique(nc_var(obj, raw$varname[i])$ndims) > 4) { + ll[[i]] <- NULL + warning("We do not support 5D datasets:", raw$varname[i]) + } else { + ll[[i]] <- data.frame( + varname = raw$varname[i], + units = try_att(obj, raw$varname[i], "units"), + description = try_att(obj, raw$varname[i], "long_name") + ) + + if(is.na(ll[[i]]$description)){ + ll[[i]]$description = try_att(obj, raw$varname[i], "LongName") + } + + ll[[i]]$description = gsub("\\s+"," ", ll[[i]]$description) + } + } + + merge(raw, do.call(rbind, ll), by = "varname") + } else { + raw + } +} + +#' Extract Time Information from Zarr Resource +#' +#' Retrieves time dimension metadata and calculates time intervals. +#' +#' @param URL Character. The URL of the Zarr file. +#' @param T_name Character. Name of the time variable. Defaults to NULL. +#' @return A list with time duration, interval, and count information. +#' @family zarr +#' @export + +.resource_time_zarr <- function(URL, T_name = NULL) { + + if (is.null(T_name)) { + nz = rnz::open_nz(URL) + atts <- zarr_xyzv(nz) + T_name <- omit.na(unique(atts$T_name)) + rnz::close_nz(nz) + } + + nz = rnz::open_nz(URL) + + time_steps <- utcal.nc( + unitstring = rnz::get_att(nz, T_name, "units"), + value = rnz::get_var(nz, T_name, unpack = TRUE), + type = "c") + + dT <- diff(time_steps) + + g <- data.frame(expand.grid(unique(dT), units(dT))) + g <- g[order(g$Var1), ] + g$n <- as.numeric(table(dT)) + + names(g) <- c("value", "interval", "n") + + if (nrow(g) > 1 & all(g$value %in% c(28, 29, 30, 31))) { + g <- data.frame(value = 1, interval = "months") + } else { + g <- g[which.max(g$n), ] + } + + # If time is within 5 days of today then we call the range Open + maxDate <- ifelse(max(time_steps) >= Sys.time() - (5 * 86400) & max(time_steps) <= Sys.time() + 1, + "..", + as.character(max(time_steps)) + ) + + nT <- ifelse(maxDate == "..", NA, length(time_steps)) + + int <- paste(g$value, g$interval) + + if (length(int) == 0) { + int <- "0" + } + + list( + duration = paste0(min(time_steps), "/", maxDate), + interval = int, + nT = nT + ) +} + +#' Extract Grid Information from Zarr Resource +#' +#' Retrieves grid dimension metadata and calculates grid properties. +#' +#' @param URL Character. The URL of the Zarr file. +#' @param X_name Character. Name of the X-coordinate variable. Defaults to NULL. +#' @param Y_name Character. Name of the Y-coordinate variable. Defaults to NULL. +#' @param stopIfNotEqualSpaced Logical. Whether to stop if grid cells are not equally spaced. Defaults to TRUE. +#' @return A data frame with grid properties. +#' @family zarr +#' @export + +.resource_grid_zarr <- function(URL, X_name = NULL, Y_name = NULL, stopIfNotEqualSpaced = TRUE) { + + nz <- rnz::open_nz(URL) + + if (is.null(X_name) | is.null(Y_name)) { + atts <- zarr_xyzv(nz) + X_name <- omit.na(unique(atts$X_name)) + Y_name <- omit.na(unique(atts$Y_name)) + } + + nc_grid_mapping <- suppressWarnings(nc_grid_mapping_atts(nz)) + + degree <- grepl("degree", try_att(nz, X_name, "units"), ignore.case = TRUE) + + if (nrow(nc_grid_mapping) == 0) { + if (degree) { + message(paste( + "No projection information found. \n", + "Coordinate variable units are degrees so, \n", + "assuming EPSG:4326" + )) + crs <- "EPSG:4326" + } else { + warning("No projection information found in nc file.") + crs <- NA + } + } else { + crs <- try(nc_gm_to_prj(nc_grid_mapping)) + if (inherits(crs, "try-error")) { + crs <- NA + } else { + crs + } + } + + ncols <- rnz::inq_dim(nz, X_name)$len + nrows <- rnz::inq_dim(nz, Y_name)$len + + xx <- try(rnz::get_var(nz, X_name)) + + if (inherits(xx, "try-error")) { xx <- seq_len(ncols) } + + rs <- xx[-length(xx)] - xx[-1] + + if (!isTRUE(all.equal(min(rs), max(rs), tolerance = 0.025, scale = abs(min(rs))))) { + if (is.na(stopIfNotEqualSpaced)) { + warning("cells are not equally spaced; you should extract values as points") + } else if (stopIfNotEqualSpaced) { + stop("cells are not equally spaced; you should extract values as points") + } + } + + if (any(xx > 180) & degree) { xx <- xx - 360 } + + xrange <- c(min(xx), max(xx)) + resx <- (xrange[2] - xrange[1]) / (ncols - 1) + X1 <- xx[1] + Xn <- xx[length(xx)] + + yy <- try(rnz::get_var(nz, Y_name)) + + if (inherits(yy, "try-error")) { yy <- seq_len(nrows) } + + Y1 <- yy[1] + Yn <- yy[length(yy)] + + rs <- yy[-length(yy)] - yy[-1] + + if (!isTRUE(all.equal(min(rs), max(rs), tolerance = 0.025, scale = abs(min(rs))))) { + if (is.na(stopIfNotEqualSpaced)) { + warning("cells are not equally spaced; you should extract values as points") + } else if (stopIfNotEqualSpaced) { + stop("cells are not equally spaced; you should extract values as points") + } + } + + yrange <- c(min(yy), max(yy)) + resy <- (yrange[2] - yrange[1]) / (nrows - 1) + + if (yy[1] > yy[length(yy)]) { + toptobottom <- FALSE + } else { + toptobottom <- TRUE + } + + data.frame( + crs = crs, + # xmin, xmax, ymin, ymax + X1 = X1, + Xn = Xn, + Y1 = Y1, + Yn = Yn, + resX = resx, + resY = resy, + ncols = ncols, + nrows = nrows, + toptobottom = toptobottom + ) +} + + +#' Crop Zarr Data to a Spatial and Temporal Subset +#' +#' Crops data in a Zarr file based on spatial (AOI) and temporal (start/end) filters. +#' +#' @param URL Character. The URL of the Zarr file. Defaults to NULL. +#' @param catalog Data frame. Metadata catalog for the Zarr file. Defaults to NULL. +#' @param AOI Spatial object. Area of interest for cropping. Defaults to NULL. +#' @param startDate Character. Start date for cropping. Defaults to NULL. +#' @param endDate Character. End date for cropping. Defaults to NULL. +#' @param start Numeric. Start index for cropping. Defaults to NULL. +#' @param end Numeric. End index for cropping. Defaults to NULL. +#' @param varname Character. Variable name to crop. Defaults to NULL. +#' @param verbose Logical. Whether to print verbose output. Defaults to TRUE. +#' @return A cropped dataset matching the specified criteria. +#' @family zarr +#' @export + +zarr_crop <- function(URL = NULL, + catalog = NULL, + AOI = NULL, + startDate = NULL, + endDate = NULL, + start = NULL, + end = NULL, + varname = NULL, + verbose = TRUE) { + + interval <- NULL + + if (!is.null(URL)) { + catalog <- read_zarr_file(URL, varname = varname, id = "local") + catalog$tiled <- "" + } + + ## TIME + + for(i in 1:nrow(catalog)){ + if(grepl("[..]", catalog$duration[i])){ + tmp = .resource_time(URL = catalog$URL[i], T_name = catalog$T_name[i]) + catalog$duration[i] = tmp$duration + catalog$interval[i] = tmp$interval + catalog$interval[i] = tmp$interval + } + } + + if (is.null(startDate) & is.null(endDate)) { + + if(is.null(start) & is.null(end)){ + catalog$T <- paste0("[0:1:", catalog$nT - 1, "]") + catalog$Tdim <- catalog$nT + tmp <- do.call(rbind, strsplit(catalog$duration, "/")) + catalog <- cbind(catalog, data.frame(startDate = tmp[, 1], endDate = tmp[, 2])) + } else { + if(is.null(end)){ end = start} + catalog$T <- paste0("[", start - 1, ":1:", end - 1, "]") + catalog$Tdim <- max(end - start, 1) + + for(i in 1:nrow(catalog)){ + tmp <- strsplit(catalog$duration[i], "/")[[1]] + d = seq.Date(as.Date(tmp[1]), as.Date(tmp[2]), by = catalog$interval[1]) + catalog$startDate = d[start] + catalog$endDate = d[end] + } + } + + } else { + + if (is.null(endDate)) { endDate <- startDate } + + if (grepl("hour", catalog$interval[1])) { + startDate <- paste(startDate, "00:00:00") + endDate <- paste(endDate, "23:00:00") + } + + startDate <- as.POSIXct(as.character(startDate), tz = "UTC") + endDate <- as.POSIXct(as.character(endDate), tz = "UTC") + + out <- list() + + catalog = catalog %>% + mutate(interval = ifelse(interval == "monthly normal", "month", interval)) + + for (i in 1:nrow(catalog)) { + + time_steps <- parse_date(duration = catalog$duration[i], interval = catalog$interval[i]) + + int_unit = strsplit(catalog$interval[i], " ")[[1]][2] + + time_steps = trunc(time_steps, int_unit) + + if(catalog$nT[i] == 1 & !is.na(catalog$nT[i])){ + out[[i]] <- cbind( + catalog[i,], + data.frame( + T = "[0:1:0]", + Tdim = 1, + startDate = time_steps[1], + endDate = time_steps[1] + ) + ) + } else if (startDate > max(time_steps) | endDate < min(time_steps)) { + out[[i]] <- NULL + } else { + tmp = abs(time_steps - startDate) + # upper limit on tied lower threshold + T1 <- max(which(tmp == min(tmp))) + # + if(startDate == endDate){ + Tn = T1 + } else { + tmp = abs(time_steps - endDate) + # lower limit on tied upper threshold + Tn <- min(which(tmp == min(tmp))) + } + + out[[i]] <- cbind( + catalog[i,], + data.frame( + T = paste0("[", T1 - 1, ":1:", Tn - 1, "]"), + Tdim = (Tn - T1) + 1, + startDate = time_steps[T1], + endDate = time_steps[Tn] + ) + ) + } + } + + catalog <- do.call(rbind, out) + + if (length(catalog) == 0) { + stop("Requested Time not found in ", + unique(catalog$duration), + call. = FALSE) + } + } + + + ## SPACE (XY) + if (is.null(AOI)) { + catalog$X <- paste0("[0:1:", catalog$ncols - 1, "]") + catalog$Y <- paste0("[0:1:", catalog$nrows - 1, "]") + } else { + + out <- lapply(1:nrow(catalog), function(i) { + if(is.na(catalog$crs[i])){ + warning("No assigned CRS. Trying WGS84") + catalog$crs[i] = "EPSG:4326" + } + tryCatch({ + ext(terra::intersect(terra::project(terra::ext(AOI), crs(AOI), catalog$crs[i]), + make_vect(catalog[i,]))) + }, + error = function(e) { + NULL + }) + }) + + drops <- which(sapply(out, is.null)) + + if (length(drops) != 0) { + catalog <- catalog[-drops,] + out <- catalog[-drops] + } + + if (nrow(catalog) < 1) { + stop("No resources intersect with provided AOI", call. = FALSE) + } + + for (i in 1:nrow(catalog)) { + X_coords <- + seq(catalog$X1[i], catalog$Xn[i], length.out = catalog$ncols[i]) + + Y_coords <- + seq(catalog$Y1[i], catalog$Yn[i], length.out = catalog$nrows[i]) + + ys <- + c(which.min(abs(Y_coords - out[[i]]$ymin)), which.min(abs(Y_coords - out[[i]]$ymax))) - 1 + xs <- + c(which.min(abs(X_coords - out[[i]]$xmin)), which.min(abs(X_coords - out[[i]]$xmax))) - 1 + + catalog$Y[i] <- paste0("[", paste(sort(ys), collapse = ":1:"), "]") + catalog$X[i] <- paste0("[", paste(sort(xs), collapse = ":1:"), "]") + catalog$X1[i] <- min(X_coords[xs + 1]) + catalog$Xn[i] <- max(X_coords[xs + 1]) + catalog$Y1[i] <- min(Y_coords[ys + 1]) + catalog$Yn[i] <- max(Y_coords[ys + 1]) + catalog$ncols[i] <- abs(diff(xs)) + 1 + catalog$nrows[i] <- abs(diff(ys)) + 1 + catalog + } + + } + + first = substr(catalog$dim_order, 1, 1) + second = substr(catalog$dim_order, 2, 2) + third = substr(catalog$dim_order, 3, 3) + + first = ifelse(length(first) == 0, "T", first) + second = ifelse(length(second) == 0, "Y", second) + third = ifelse(length(third) == 0, "X", third) + + if (any(grepl("XY", catalog$tiled))) { + catalog$URL <- + glue( + "{catalog$URL}?{catalog$varname}{catalog[[first]]}{catalog[[second]]}{catalog[[third]]}" + ) + } else { + catalog$URL <- + glue( + "{catalog$URL}?{catalog$varname}{catalog[[first]]}{catalog[[second]]}{catalog[[third]]}" + ) + } + + if (!is.null(varname)) { + if (all(!varname %in% catalog$varname, !varname %in% catalog$variable)) { + stop( + "variable(s) in resource include:\n\t> ", + paste(unique(catalog$varname), collapse = "\t >"), + call. = FALSE + ) + } + + catalog <- catalog[catalog$varname %in% varname,] + } + + catalog$X <- NULL + catalog$Y <- NULL + catalog$T <- NULL + + catalog$variable = catalog$varname + + if (verbose) { + dap_summary(catalog) + } + + catalog +} + +#' Retrieve data from a Zarr resource +#' +#' This function retrieves or prepares metadata for data stored in a Zarr format. +#' +#' @param zarr A data frame containing details of the Zarr resource to process. +#' Each row should correspond to a single Zarr resource. +#' @param get Logical. If `TRUE`, retrieves data; if `FALSE`, returns metadata information. +#' +#' @return If `get = TRUE`, returns the requested data as a matrix. If `get = FALSE`, +#' returns a data frame containing metadata information for the resource. +#' @examples +#' \dontrun{ +#' # Example usage (assuming `zarr` is a properly formatted data frame): +#' # result <- go_get_zarr(zarr, get = TRUE) +#' } +#' @family zarr +#' @export + +go_get_zarr = function(zarr, get = TRUE) { + + if (nrow(zarr) != 1) { + stop("This function processes only 1 zarr row at a time ... currently there are ", nrow(zarr)) + } + + nz <- rnz::open_nz(sub("\\?.*", "", zarr$URL)) + on.exit(rnz::close_nz(nz)) + # + k <- regmatches(zarr$URL, gregexpr("\\[.*?\\]", zarr$URL))[[1]] + k <- gsub("[", "", k, fixed = TRUE) + k <- gsub("]", "", k, fixed = TRUE) + + nz_var_info <- rnz::inq_var(nz, zarr$varname) + X_var_info <- rnz::inq_var(nz, zarr$X_name)$dimids + Y_var_info <- rnz::inq_var(nz, zarr$Y_name)$dimids + T_var_info <- rnz::inq_var(nz, zarr$T_name)$dimids + + o = if(zarr$dim_order == "XYT") { + c(zarr$X1, zarr$Y1, zarr$T) + c(X_var_info, Y_var_info, T_var_info) + } else if ( zarr$dim_order == "XTY" ){ + c(X_var_info, T_var_info, Y_var_info) + } else if ( zarr$dim_order == "YXT" ){ + c(Y_var_info, X_var_info, T_var_info) + } else if ( zarr$dim_order == "YTX" ){ + c(Y_var_info, T_var_info, X_var_info) + } else if ( zarr$dim_order == "TXY" ){ + c(T_var_info, X_var_info, Y_var_info) + } else if ( zarr$dim_order == "TYX" ){ + c(T_var_info, Y_var_info, X_var_info) + } + + dimid_order <- match( + nz_var_info$dimids, + o + ) + + start <- (as.numeric(sapply(strsplit(k, ":"), "[[", 1)) + 1)[dimid_order] + + count <- (c(zarr$Tdim, zarr$nrows, zarr$ncols))[dimid_order] + + if (get) { + rnz::get_var(nz, + zarr$varname, + start = start, + count = count, + unpack = TRUE) + } else { + data.frame( + file = sub("\\?.*", "", zarr$URL), + variable = zarr$varname, + start = I(list(start)), + count = I(list(count)), + unpack = TRUE + ) + } +} + + +#' Process and retrieve Zarr data as terra objects +#' +#' This function processes Zarr resources and converts them into terra spatial objects. +#' +#' @param zarr A data frame containing details of the Zarr resources to process. +#' Each row should correspond to a single Zarr resource. +#' @param varname Character vector specifying the variable names to retrieve. +#' If `NULL`, retrieves all available variables. +#' +#' @return Returns processed data as terra SpatRaster objects or merged data frames, +#' depending on the input and Zarr properties. +#' @examples +#' \dontrun{ +#' # Example usage (assuming `zarr` is a properly formatted data frame): +#' # result <- zarr_get(zarr, varname = "temperature") +#' } +#' @family zarr +#' @export + +zarr_get <- function(zarr, varname = NULL) { + + if (!is.null(varname)) { + if (all(!varname %in% zarr$varname, !varname %in% zarr$variable)) { + stop( + "variable(s) in resource include:\n\t> ", + paste(unique(zarr$varname), + collapse = "\t >"), + call. = FALSE + ) + } + + zarr <- zarr[zarr$varname %in% varname | zarr$variable %in% varname,] + } + + out <- future_lapply( + 1:nrow(zarr), + FUN = function(x) { + zarr_to_terra(go_get_zarr(zarr = zarr[x,]), zarr = zarr[x,]) + } + ) + + names(out) <- sub("_$", "", paste0(zarr$varname)) + + if(!inherits(out[[1]], "SpatRaster")){ + Reduce(function(dtf1, dtf2) { merge(dtf1, dtf2, by = "date", all.x = TRUE) }, out) + + } else if (any(grepl("XY", zarr$tiled))) { + ll = list() + u <- unique(unlist(lapply(out, units))) + if (length(u) == 1) { + out <- suppressWarnings({ + merge(sprc(out)) + }) + units(out) <- rep(u, nlyr(out)) + out + } else { + out + } + + ll[[zarr$varname[1]]] = out + ll + } else if (any(zarr$tiled == "T")) { + merge_across_time(out) + } else { + out + } +} + +#' Convert data to terra SpatRaster or data frame format +#' +#' Converts extracted Zarr data into terra SpatRaster objects or data frames based on +#' spatial and temporal dimensions. +#' +#' @param var A variable containing extracted Zarr data. +#' @param zarr A data frame containing details of the Zarr resource, including metadata. +#' +#' @return Returns a terra SpatRaster object or data frame containing the processed data. +#' @examples +#' \dontrun{ +#' # Example usage (assuming `var` and `zarr` are properly formatted): +#' # result <- zarr_to_terra(var, zarr) +#' } +#' @family zarr +#' @export + +zarr_to_terra = function(var, zarr) { + + if(zarr$startDate == zarr$endDate){ + dates = as.POSIXct(zarr$startDate, tz = "UTC") + } else { + dates <- seq.POSIXt(as.POSIXct(zarr$startDate, tz = "UTC"), + as.POSIXct(zarr$endDate, tz = "UTC"), + by = zarr$interval) + + } + + + name <- gsub("_NA", "",paste(zarr$variable, + dates, + zarr$model, + zarr$ensemble, + zarr$scenario, + sep = "_")) + + vars = zarr$variable + + if(length(vars) == 0){ vars = zarr$varname } + + names_ts = sub("_$", "", + gsub("__", "", gsub("_NA", "", + paste( + vars, + zarr$model, + zarr$ensemble, + zarr$scenario, + sep = "_"))) + ) + + if (zarr$X1 == zarr$Xn & zarr$Y1 == zarr$Yn) { + df = data.frame(date = dates, var) + names(df) = c("date", names_ts) + return(df) + } + + resx <- zarr$resX#(dap$Xn - dap$X1) / (dap$ncols - 1) + resy <- zarr$resY#(dap$Yn - dap$Y1) / (dap$nrows - 1) + + xmin <- zarr$X1 - 0.5 * resx + xmax <- zarr$Xn + 0.5 * resx + ymin <- zarr$Y1 - 0.5 * resy + ymax <- zarr$Yn + 0.5 * resy + + + if (length(dim(var)) == 2) { + dim(var) <- c(dim(var), 1) + } + + r = rast(nrows = zarr$nrows, + ncols = zarr$ncols, + crs = zarr$crs, + nlyrs = zarr$Tdim, + extent = c( + xmin = min(xmin, xmax), + xmax = max(xmin, xmax), + ymin = min(ymin, ymax), + ymax = max(ymin, ymax) + )) + + + terra::values(r) = var + + if (zarr$toptobottom) { r <- flip(r) } + + units(r) <- zarr$units + time(r) <- dates + names(r) <- name + + r +} + + + diff --git a/README.Rmd b/README.Rmd index 15449a7..e070246 100644 --- a/README.Rmd +++ b/README.Rmd @@ -16,7 +16,6 @@ knitr::opts_chunk$set( ) library(AOI) -devtools::load_all() library(dplyr) library(ggplot2) library(tidyterra) @@ -53,6 +52,9 @@ remotes::install_github("mikejohnson51/AOI") # suggested! remotes::install_github("mikejohnson51/climateR") ``` +```{r} +library(climateR) +``` # Basic Usage The examples used here call upon the following shortcuts: @@ -129,21 +131,16 @@ ggplot() + ```{r} # Request data using cities (POINTs) - -checkNetrc() -writeDodsrc() - -modis_pet = getMODIS( +pr = getGridMET( AOI = cities, - asset = 'MOD16A3GF.061', - varname = "PET_500m", + varname = "pr", startDate = "2020-10-29") ``` ```{r, echo = FALSE} ggplot() + - geom_spatraster(data = modis_pet$PET_500m) + + geom_spatraster(data = pr$precipitation_amount) + geom_spatvector(data = cities, color = "black", fill = NA, size = .01) + facet_wrap(~lyr) + scale_fill_hypso_c( @@ -186,7 +183,6 @@ All `climateR` functions treat the extent of the AOI and the default extraction ```{r} pipes = aoi_ext("Fort Collins", wh = c(10, 20), units = "km", bbox = TRUE)|> getNLCD() |> - get3DEP() %>% getTerraClimNormals(varname = c("tmax", "ppt")) lapply(pipes, dim) diff --git a/README.md b/README.md index 25986ad..c2371d6 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ # Welcome! + [![DOI](https://zenodo.org/badge/158620263.svg)](https://zenodo.org/badge/latestdoi/158620263) @@ -18,7 +19,7 @@ Active](https://www.repostatus.org/badges/latest/active.svg)](https://www.repost `climateR` simplifies the steps needed to get gridded geospatial data into R. At its core, it provides three main things: -1. A catalog of 108106 geospatial climate, land cover, and soils +1. A catalog of 112398 geospatial climate, land cover, and soils resources from 3477 collections. See (`climateR::catalog`) This catalog is an [evolving, federated collection of @@ -49,6 +50,10 @@ remotes::install_github("mikejohnson51/AOI") # suggested! remotes::install_github("mikejohnson51/climateR") ``` +``` r +library(climateR) +``` + # Basic Usage The examples used here call upon the following shortcuts: @@ -79,7 +84,7 @@ colorado = aoi_get(state = "CO", county = "all") cities = readRDS(system.file("co/cities_colorado.rds", package = "climateR")) ``` - + ## Extent extraction @@ -100,29 +105,22 @@ system.time({ endDate = "1991-11-06") }) #> user system elapsed -#> 0.221 0.038 1.416 +#> 0.195 0.021 1.266 ``` - + #### POINTS(s) act as a single extent ``` r # Request data using cities (POINTs) - -checkNetrc() -#> [1] TRUE -writeDodsrc() -#> [1] ".dodsrc" - -modis_pet = getMODIS( +pr = getGridMET( AOI = cities, - asset = 'MOD16A3GF.061', - varname = "PET_500m", + varname = "pr", startDate = "2020-10-29") ``` - + #### Single POINT(s) act as an extent @@ -139,7 +137,7 @@ system.time({ endDate = "2050-11-06") }) #> user system elapsed -#> 0.130 0.012 1.229 +#> 0.203 0.005 0.547 ``` ``` r @@ -165,21 +163,17 @@ together using either the base R or dplyr piping syntax. ``` r pipes = aoi_ext("Fort Collins", wh = c(10, 20), units = "km", bbox = TRUE)|> getNLCD() |> - get3DEP() %>% getTerraClimNormals(varname = c("tmax", "ppt")) lapply(pipes, dim) #> $`2019 Land Cover L48` -#> [1] 1401 786 1 -#> -#> $`30m CONUS DEM` -#> [1] 1417 1179 1 +#> [1] 1402 786 1 #> #> $tmax -#> [1] 10 9 12 +#> [1] 10 8 12 #> #> $ppt -#> [1] 10 9 12 +#> [1] 10 8 12 ``` ### Extract timeseries from exisitng objects: @@ -230,7 +224,7 @@ names(chirps_pts)[1:5] #> [1] "date" "ADAMSCITY" "AGATE" "AGUILAR" "AKRON" ``` - + ### Integration with `zonal` @@ -253,10 +247,10 @@ system.time({ ID = "fip_code") }) #> user system elapsed -#> 0.216 0.021 1.546 +#> 0.174 0.012 1.245 ``` - + ## Basic Animation diff --git a/_pkgdown.yml b/_pkgdown.yml index 8283b7e..29dd0b2 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -19,18 +19,18 @@ articles: - title: Quick Start desc: Data access in R and Python contents: - - 01-intro - - 03-intro-climatepy + - intro + - intro-climatepy - title: Data Catalogs contents: - schema - - 02-catalogs + - catalogs - title: Use Cases contents: - - 05-mros-climateR - - 04-stream-morph + - mros-climateR + - stream-morph reference: - title: @@ -42,6 +42,10 @@ reference: desc: General tools for data access contents: - has_concept("dap") +- subtitle: Zarr Access + desc: General tools for zarr access + contents: + - has_concept("zarr") - subtitle: EarthData Access desc: General tools for configuring NASA EarthData Access contents: diff --git a/docs/404.html b/docs/404.html index 71ea438..def87e0 100644 --- a/docs/404.html +++ b/docs/404.html @@ -6,61 +6,55 @@ Page not found (404) • climateR + + + + + + - - - - + + + + Skip to contents - -