diff --git a/Dockerfile b/Dockerfile index ce1dcc3..5870f55 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,7 @@ ENV PYTHONUNBUFFERED=1 ENV PYTEST_ADDOPTS="--color=yes" # env var for test data version to use, which should always be the most up to date -ENV TEST_DATA_TAG=2023-09-19 +ENV TEST_DATA_TAG=2023-11-08 # Note: The apt and python pip below should MOSTLY match the # cumulus-api/async-geoproc/Dockerfile to ensure the diff --git a/src/cumulus_geoproc/processors/abrfc-qpf-06h.py b/src/cumulus_geoproc/processors/abrfc-qpf-06h.py index 04fd9db..1faaa4e 100644 --- a/src/cumulus_geoproc/processors/abrfc-qpf-06h.py +++ b/src/cumulus_geoproc/processors/abrfc-qpf-06h.py @@ -51,7 +51,7 @@ def process(*, src: str, dst: str = None, acquirable: str = None): ds = cgdal.findsubset(ds, [SUBSET_NAME, SUBSET_DATATYPE]) - version_datetime = cgdal.getVersionDate( + version_datetime = cgdal.getDate( ds, src_path, "NC_GLOBAL#creationTime", "%Y%m%d%H", "\\d{10}", False ) diff --git a/src/cumulus_geoproc/processors/aprfc-qpe-06h.py b/src/cumulus_geoproc/processors/aprfc-qpe-06h.py new file mode 100644 index 0000000..b370f63 --- /dev/null +++ b/src/cumulus_geoproc/processors/aprfc-qpe-06h.py @@ -0,0 +1,98 @@ +""" +# Alaska Pacific River Forecast Center + +## QPE 06 hour total precipitation +""" + + +import os +import sys +import pyplugs + +from cumulus_geoproc import logger +from cumulus_geoproc.utils import cgdal + + +@pyplugs.register +def process(*, src: str, dst: str = None, acquirable: str = None): + """ + # Grid processor + + __Requires keyword only arguments (*)__ + + Parameters + ---------- + src : str + path to input file for processing + dst : str, optional + path to temporary directory + acquirable: str, optional + acquirable slug + + Returns + ------- + List[dict] + ``` + { + "filetype": str, Matching database acquirable + "file": str, Converted file + "datetime": str, Valid Time, ISO format with timezone + "version": str Reference Time (forecast), ISO format with timezone + } + ``` + """ + + try: + attr = {"GRIB_ELEMENT": "APCP"} + # determine the path and open the file in gdal + ds, src_path, dst_path = cgdal.openfileGDAL(src, dst, GDALAccess="read_only") + + # Grad the grid from the band + if (band_number := cgdal.find_band(ds, attr)) is None: + raise Exception("Band number not found for attributes: {attr}") + + logger.debug(f"Band number '{band_number}' found for attributes {attr}") + + raster = ds.GetRasterBand(band_number) + + # Get Datetime from String Like "1599008400 sec UTC" + dt_valid = cgdal.getDate(raster, src_path, "GRIB_VALID_TIME", None, None) + + cgdal.gdal_translate_w_options( + tif := os.path.join( + dst, f'{acquirable}.{dt_valid.strftime("%Y%m%d_%H%M")}.tif' + ), + ds, + bandList=[band_number], + ) + + # validate COG + if (validate := cgdal.validate_cog("-q", tif)) == 0: + logger.debug(f"Validate COG = {validate}\t{tif} is a COG") + + outfile_list = [ + { + "filetype": acquirable, + "file": tif, + "datetime": dt_valid.isoformat(), + "version": None, + }, + ] + + except (RuntimeError, KeyError, Exception) as ex: + exc_type, exc_value, exc_traceback = sys.exc_info() + traceback_details = { + "filename": os.path.basename(exc_traceback.tb_frame.f_code.co_filename), + "line number": exc_traceback.tb_lineno, + "method": exc_traceback.tb_frame.f_code.co_name, + "type": exc_type.__name__, + "message": exc_value, + } + for k, v in traceback_details.items(): + logger.error(f"{k}: {v}") + + finally: + ds = None + raster = None + + return outfile_list diff --git a/src/cumulus_geoproc/processors/aprfc-qpf-06h.py b/src/cumulus_geoproc/processors/aprfc-qpf-06h.py new file mode 100644 index 0000000..7de3ca2 --- /dev/null +++ b/src/cumulus_geoproc/processors/aprfc-qpf-06h.py @@ -0,0 +1,100 @@ +""" +# Alaska Pacific River Forecast Center + +## QPF 06 hour total precipitation +""" + + +import os +import sys +import pyplugs + +from cumulus_geoproc import logger +from cumulus_geoproc.utils import cgdal + + +@pyplugs.register +def process(*, src: str, dst: str = None, acquirable: str = None): + """ + # Grid processor + + __Requires keyword only arguments (*)__ + + Parameters + ---------- + src : str + path to input file for processing + dst : str, optional + path to temporary directory + acquirable: str, optional + acquirable slug + + Returns + ------- + List[dict] + ``` + { + "filetype": str, Matching database acquirable + "file": str, Converted file + "datetime": str, Valid Time, ISO format with timezone + "version": str Reference Time (forecast), ISO format with timezone + } + ``` + """ + + try: + attr = {"GRIB_ELEMENT": "APCP"} + # determine the path and open the file in gdal + ds, src_path, dst_path = cgdal.openfileGDAL(src, dst, GDALAccess="read_only") + + # Grad the grid from the band + if (band_number := cgdal.find_band(ds, attr)) is None: + raise Exception("Band number not found for attributes: {attr}") + + logger.debug(f"Band number '{band_number}' found for attributes {attr}") + + raster = ds.GetRasterBand(band_number) + + # Get Datetime from String Like "1599008400 sec UTC" + dt_valid = cgdal.getDate(raster, src_path, "GRIB_VALID_TIME", None, None) + dt_version = cgdal.getDate(raster, src_path, "GRIB_REF_TIME", None, None) + + cgdal.gdal_translate_w_options( + tif := os.path.join( + dst, + f'{acquirable}.{dt_version.strftime("%Y%m%d_%H%M")}.{dt_valid.strftime("%Y%m%d_%H%M")}.tif', + ), + ds, + bandList=[band_number], + ) + + # validate COG + if (validate := cgdal.validate_cog("-q", tif)) == 0: + logger.debug(f"Validate COG = {validate}\t{tif} is a COG") + + outfile_list = [ + { + "filetype": acquirable, + "file": tif, + "datetime": dt_valid.isoformat(), + "version": dt_version.isoformat(), + }, + ] + + except (RuntimeError, KeyError, Exception) as ex: + exc_type, exc_value, exc_traceback = sys.exc_info() + traceback_details = { + "filename": os.path.basename(exc_traceback.tb_frame.f_code.co_filename), + "line number": exc_traceback.tb_lineno, + "method": exc_traceback.tb_frame.f_code.co_name, + "type": exc_type.__name__, + "message": exc_value, + } + for k, v in traceback_details.items(): + logger.error(f"{k}: {v}") + + finally: + ds = None + raster = None + + return outfile_list diff --git a/src/cumulus_geoproc/utils/cgdal.py b/src/cumulus_geoproc/utils/cgdal.py index 7c43565..8a491c5 100644 --- a/src/cumulus_geoproc/utils/cgdal.py +++ b/src/cumulus_geoproc/utils/cgdal.py @@ -302,7 +302,7 @@ def validate_cog(*args): return validate_cloud_optimized_geotiff.main(argv) -def openfileGDAL(src, dst): +def openfileGDAL(src, dst, GDALAccess="Update"): """Set Source and Destination paths and open file in GDAL Parameters @@ -310,6 +310,8 @@ def openfileGDAL(src, dst): path to input file for processing dst : str, optional path to temporary directory + GDALAccess: str default 'Update' + read_only or update access to object. Returns ------- @@ -344,18 +346,22 @@ def openfileGDAL(src, dst): ".zip", ".tar.gz", ) + if GDALAccess == "read_only": + GDALAccess = gdal.GA_ReadOnly + else: + GDALAccess = gdal.GA_Update try: if any([x in src for x in exts]): try: - ds = gdal.Open("/vsigzip/" + src, gdal.GA_Update) + ds = gdal.Open("/vsigzip/" + src, GDALAccess) except RuntimeError as err: logger.warning(err) logger.warning(f'gunzip "{src}" and use as source file') src_unzip = utils.decompress(src, str(dst_path)) - ds = gdal.Open(src_unzip, gdal.GA_Update) + ds = gdal.Open(src_unzip, GDALAccess) else: - ds = gdal.Open(src, gdal.GA_Update) + ds = gdal.Open(src, GDALAccess) except RuntimeError as err: logger.warning(err) logger.warning(f"could not open file {src}") @@ -397,7 +403,7 @@ def findsubset(ds: gdal.Dataset, subset_params): return ds -def getVersionDate( +def getDate( ds: gdal.Dataset, src_path, metaVar: str, @@ -405,7 +411,7 @@ def getVersionDate( filedateSearch, MetaDate=True, ): - """Get the Version date of the grid + """Get date from the grid or filename Parameters ds: osgeo.gdal.Dataset Object open GDAL object @@ -422,25 +428,31 @@ def getVersionDate( Returns ------- - version_datetime: datatime + date_datetime: datatime Reference Time (forecast), ISO format with timezone """ + date_datetime = None # get the version date_created = ds.GetMetadataItem(metaVar) date_created_match = re.search("\\d+", date_created) if date_created_match and MetaDate: - version_datetime = datetime.fromtimestamp(int(date_created_match[0])).replace( + date_datetime = datetime.fromtimestamp(int(date_created_match[0])).replace( tzinfo=timezone.utc ) else: filename = src_path.name date_str = re.search(filedateSearch, filename)[0] - version_datetime = datetime.strptime(date_str, fileDateFormat).replace( + date_datetime = datetime.strptime(date_str, fileDateFormat).replace( tzinfo=timezone.utc ) - return version_datetime + + if date_datetime is None: + raise Exception( + f"Did not find the date we were looking for in grid, {filename}" + ) + return date_datetime def geoTransform_ds(ds: gdal.Dataset, SUBSET_NAME: str, dstSRS: str = "EPSG:4326"): diff --git a/src/tests/integration/fixtures/test_products.json b/src/tests/integration/fixtures/test_products.json index def84e5..873432b 100644 --- a/src/tests/integration/fixtures/test_products.json +++ b/src/tests/integration/fixtures/test_products.json @@ -9,10 +9,24 @@ { "plugin": "abrfc-qpf-06h", "url": "", - "local_source": "fixtures/abrfc-qpf-06h/QPF6_2023091912f060.cdf", + "local_source": "cumulus-geoproc-test-data/fixtures/abrfc-qpf-06h/QPF6_2023091912f060.cdf", "versioned": true, "name_pattern": "QPF6_%Y%m%d%Hf060.nc.gz" }, + { + "plugin": "aprfc-qpe-06h", + "url": "", + "local_source": "cumulus-geoproc-test-data/fixtures/aprfc-qpe-06h/precip_acr_grid_06_12_20231104.grb.gz", + "versioned": false, + "name_pattern": "precip_acr_grid_06_12_%y%m%d.grb.gz" + }, + { + "plugin": "aprfc-qpf-06h", + "url": "", + "local_source": "cumulus-geoproc-test-data/fixtures/aprfc-qpf-06h/qpf06f_has_96f_20231108_12_awips_202311040704.grb.gz", + "versioned": true, + "name_pattern": "qpf06f_has_96f_%Y%m%d_%H_awips_%Y%m%d%H%M.grb.gz" + }, { "plugin": "cbrfc-mpe", "url": "",