Skip to content

Commit

Permalink
Merge pull request #9 from bolotinl/nwm3.0
Browse files Browse the repository at this point in the history
Add access to and info for NWM 3.0 netcdf outputs
  • Loading branch information
mikejohnson51 authored Feb 12, 2024
2 parents 8969e23 + ec0632d commit e61bc3a
Show file tree
Hide file tree
Showing 5 changed files with 790 additions and 6 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Authors@R:
role = c("aut", "cre"),
email = "[email protected]",
comment = c(ORCID = "0000-0002-5288-8350")),
person(given = "Lauren", family = "Bolotin", role = c("ctb")),
person(given = "CUAHSI", role = "fnd"),
person(given = "NSF", role = "fnd"),
person(given = "Lynker", role = "fnd"))
Expand Down
6 changes: 5 additions & 1 deletion R/get_filelists.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ validate = function(complete, field, value){

if(field %in% names(complete) & !is.null(value)){
opts = unique(complete[[field]])


if(any(grepl(value, opts))){
return(filter(complete, grepl(!!value, get(field))))
Expand Down Expand Up @@ -119,6 +120,7 @@ get_aws_urls = function(version = 2.1,
output = "CHRTOUT",
config = NULL,
ensemble = NULL,
domain = 'conus',
date = "2010-10-29",
hour = "00",
minute = "00",
Expand All @@ -130,15 +132,17 @@ get_aws_urls = function(version = 2.1,
config = config,
output = output,
ensemble = NULL,
domain = NULL)
domain = domain)

dates = seq.POSIXt(as.POSIXlt(paste(date, hour, minute), tz = 'UTC'),
length.out = num,
by = paste(meta$timestep, ifelse(meta$timestep > 10, "minutes", "hours")))

urls = glue(meta$http_pattern,
bucket = meta$bucket,
domain = meta$domain,
config = meta$config,
format = 'netcdf',
output = meta$output,
YYYY = format(dates, "%Y"),
YYYYMMDDHHMM = format(dates, "%Y%m%d%H%M"))
Expand Down
155 changes: 150 additions & 5 deletions data-raw/nwm_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ pacman::p_load(dplyr, glue, tidyr, readr, xml2, rvest)

# AWS Scrapping Function ------------------------------

scrape_aws = function(version, bucket, config, year, startDate, endDate){
scrape_aws = function(version, bucket, config, year, startDate, endDate, domain, format){

unlink('data/aws_scrape.csv')

Expand All @@ -15,7 +15,17 @@ scrape_aws = function(version, bucket, config, year, startDate, endDate){
http_pattern = "{gsub('s3', 'http', {bucket})}.s3.amazonaws.com/{YYYY}/{YYYYMMDDHHMM}.{output}.comp"
system(glue('aws s3 ls {pattern} >> data/aws_scrape.csv'))
}

if(version == '3.0' | version == '3'){
pattern = glue('{bucket}/{domain}/{format}/{config}/{year}/{year}')
system(glue('aws s3 ls {pattern} >> data/aws_scrape.csv'))
if(domain != 'CONUS' & config == 'FORCING'){
http_pattern = "{gsub('s3', 'http', {bucket})}.s3.amazonaws.com/{domain}/{format}/{config}/{YYYY}/{YYYYMMDDHH}.{output}"
system(glue('aws s3 ls {pattern} >> data/aws_scrape.csv'))
}
else{
http_pattern = "{gsub('s3', 'http', {bucket})}.s3.amazonaws.com/{domain}/{format}/{config}/{YYYY}/{YYYYMMDDHHMM}.{output}"
}
}
yy = data.table::fread("data/aws_scrape.csv") %>%
select(path = V4) %>%
filter(path != "") %>%
Expand All @@ -24,18 +34,23 @@ scrape_aws = function(version, bucket, config, year, startDate, endDate){
separate(path, sep = "\\.", c("date", "output", "ext")) %>%
mutate(http_pattern = http_pattern,
hour = as.numeric(substr(date,9,10)),
minute = ifelse(str_length(date) <= 10, 0, as.numeric(substr(date, 11, 12))),
date = NULL,
ext = NULL) %>%
distinct() %>%
group_by(output) %>%
arrange(hour) %>%
mutate(timestep = median(hour - lag(hour), na.rm = TRUE),
domain = "conus",
mutate(timestep = ifelse(median(minute - lag(minute), na.rm = TRUE) == 15,
0.25,
median(hour - lag(hour), na.rm = TRUE)),
domain = domain,
hour = NULL,
horizion = 1,
prefix = "",
version = !!version,
startDate = startDate, endDate = endDate) %>%
select(-c(minute)) %>%
distinct() %>%
ungroup() %>%
distinct()

Expand Down Expand Up @@ -172,6 +187,131 @@ g = scrape_aws(version = "1.2", bucket = "s3://nwm-archive",
startDate = as.character(get_nwm_meta(version = "1.2")$startDate),
endDate = as.character(get_nwm_meta(version = "1.2")$endDate))

n1 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'FORCING', year = 2010, domain = 'Alaska', format = 'netcdf',
startDate = as.character("1/1/81 1:00"),
endDate = as.character("12/31/19 23:00"))
n2 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'CHRTOUT', year = 2010, domain = 'Alaska', format = 'netcdf',
startDate = as.character("1/1/81 1:00"),
endDate = as.character("12/31/19 23:00"))
n3 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'GWOUT', year = 2010, domain = 'Alaska', format = 'netcdf',
startDate = as.character("1/1/81 1:00"),
endDate = as.character("12/31/19 23:00"))
n4 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'LAKEOUT', year = 2010, domain = 'Alaska', format = 'netcdf',
startDate = as.character("1/1/81 1:00"),
endDate = as.character("12/31/19 23:00"))
n5 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'LDASOUT', year = 2010, domain = 'Alaska', format = 'netcdf',
startDate = as.character("1/1/81 1:00"),
endDate = as.character("12/31/19 23:00"))
n6 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'RTOUT', year = 2010, domain = 'Alaska', format = 'netcdf',
startDate = as.character("1/1/81 1:00"),
endDate = as.character("12/31/19 23:00"))

o1 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'FORCING', year = 2010, domain = 'CONUS', format = 'netcdf',
startDate = as.character("2/1/79 1:00"),
endDate = as.character("12/31/23 23:00"))
o2 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'CHRTOUT', year = 2010, domain = 'CONUS', format = 'netcdf',
startDate = as.character("2/1/79 1:00"),
endDate = as.character("12/31/23 23:00"))
o3 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'GWOUT', year = 2010, domain = 'CONUS', format = 'netcdf',
startDate = as.character("2/1/79 1:00"),
endDate = as.character("12/31/23 23:00"))
o4 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'LAKEOUT', year = 2010, domain = 'CONUS', format = 'netcdf',
startDate = as.character("2/1/79 1:00"),
endDate = as.character("12/31/23 23:00"))
o5 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'LDASOUT', year = 2010, domain = 'CONUS', format = 'netcdf',
startDate = as.character("2/1/79 1:00"),
endDate = as.character("12/31/23 23:00"))
o6 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'RTOUT', year = 2010, domain = 'CONUS', format = 'netcdf',
startDate = as.character("2/1/79 1:00"),
endDate = as.character("12/31/23 23:00"))
o7 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'CHANOBS', year = 2010, domain = 'CONUS', format = 'netcdf',
startDate = as.character("2/1/79 1:00"),
endDate = as.character("12/31/23 23:00"))

p1 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'FORCING', year = 2010, domain = 'Hawaii', format = 'netcdf',
startDate = as.character("1/1/94 1:00"),
endDate = as.character("1/1/14 23:00"))
p2 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'CHRTOUT', year = 2010, domain = 'Hawaii', format = 'netcdf',
startDate = as.character("1/1/94 1:00"),
endDate = as.character("1/1/14 23:00"))
p3 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'GWOUT', year = 2010, domain = 'Hawaii', format = 'netcdf',
startDate = as.character("1/1/94 1:00"),
endDate = as.character("1/1/14 23:00"))
p4 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'LAKEOUT', year = 2010, domain = 'Hawaii', format = 'netcdf',
startDate = as.character("1/1/94 1:00"),
endDate = as.character("1/1/14 23:00"))
p5 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'LDASOUT', year = 2010, domain = 'Hawaii', format = 'netcdf',
startDate = as.character("1/1/94 1:00"),
endDate = as.character("1/1/14 23:00"))
p6 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'RTOUT', year = 2010, domain = 'Hawaii', format = 'netcdf',
startDate = as.character("1/1/94 1:00"),
endDate = as.character("1/1/14 23:00"))
p7 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'CHANOBS', year = 2010, domain = 'Hawaii', format = 'netcdf',
startDate = as.character("1/1/94 1:00"),
endDate = as.character("1/1/14 23:00"))
p8 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'OUTPUT_RTOUT_COMPRESSED', year = 2010, domain = 'Hawaii', format = 'netcdf',
startDate = as.character("1/1/94 1:00"),
endDate = as.character("1/1/14 23:00"))
p9 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'OUTPUT_LDASOUT_COMPRESSED', year = 2010, domain = 'Hawaii', format = 'netcdf',
startDate = as.character("1/1/94 1:00"),
endDate = as.character("1/1/14 23:00"))
p10 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'OUTPUT_LAKEOUT', year = 2010, domain = 'Hawaii', format = 'netcdf',
startDate = as.character("1/1/94 1:00"),
endDate = as.character("1/1/14 23:00"))

q1 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'FORCING', year = 2010, domain = 'PR', format = 'netcdf',
startDate = as.character("1/1/81 1:00"),
endDate = as.character("5/1/23 23:00"))
q2 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'CHRTOUT', year = 2010, domain = 'PR', format = 'netcdf',
startDate = as.character("1/1/81 1:00"),
endDate = as.character("5/1/23 23:00"))
q3 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'GWOUT', year = 2010, domain = 'PR', format = 'netcdf',
startDate = as.character("1/1/81 1:00"),
endDate = as.character("5/1/23 23:00"))
q4 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'LAKEOUT', year = 2010, domain = 'PR', format = 'netcdf',
startDate = as.character("1/1/81 1:00"),
endDate = as.character("5/1/23 23:00"))
q5 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'LDASOUT', year = 2010, domain = 'PR', format = 'netcdf',
startDate = as.character("1/1/81 1:00"),
endDate = as.character("5/1/23 23:00"))
q6 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'RTOUT', year = 2010, domain = 'PR', format = 'netcdf',
startDate = as.character("1/1/81 1:00"),
endDate = as.character("5/1/23 23:00"))
q7 = scrape_aws(version = "3.0", bucket = "s3://noaa-nwm-retrospective-3-0-pds",
config = 'CHANOBS', year = 2010, domain = 'PR', format = 'netcdf',
startDate = as.character("1/1/81 1:00"),
endDate = as.character("5/1/23 23:00"))


#'http://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/'
h = scrape_nomads('http://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/', "prod")
i = scrape_nomads('http://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/', "v2.2")
Expand All @@ -196,7 +336,12 @@ nwm_data = bind_rows(a,
j,
k,
l,
m)
m,
n1, n2, n3, n4, n5, n6,
o1, o2, o3, o4, o5, o6, o7,
p1, p2, p3, p4, p5, p6, p7, p8, p9, p10,
q1, q2, q3, q4, q5, q6, q7
)

usethis::use_data(nwm_data, overwrite = TRUE)

Expand Down
Loading

0 comments on commit e61bc3a

Please sign in to comment.