From 213636cf956077486f52af1abd36896d682e90e3 Mon Sep 17 00:00:00 2001 From: Per Dahlgren Date: Fri, 12 Jan 2024 09:37:30 +0000 Subject: [PATCH 1/6] Added features in bufr.py for carra2 to be able to read local observations --- pysurfex/bufr.py | 97 +++++++++++++++++++++++------------------------- 1 file changed, 46 insertions(+), 51 deletions(-) diff --git a/pysurfex/bufr.py b/pysurfex/bufr.py index 3f14a09..cb19bc2 100644 --- a/pysurfex/bufr.py +++ b/pysurfex/bufr.py @@ -1,5 +1,6 @@ """bufr treatment.""" import logging +import sys from math import exp import numpy as np @@ -36,7 +37,6 @@ def __init__( latrange=None, label="bufr", use_first=False, - sigmao=None, ): """Initialize a bufr observation set. @@ -49,7 +49,6 @@ def __init__( latrange (list): Allowed range of latitides [min, max] label (str): A label for the resulting observations set use_first (bool): Use only the first valid observation for a point if more are found - sigmao (float, optional): Observation error relative to normal background error. Defaults to None. Raises: RuntimeError: ECCODES not found. Needed for bufr reading @@ -67,9 +66,6 @@ def __init__( # open bufr file file_handler = open(bufrfile, mode="rb") - number_of_bytes = file_handler.seek(0, 2) - logging.info("File size: %s", number_of_bytes) - file_handler.seek(0) # define the keys to be printed keys = [ @@ -86,8 +82,8 @@ def __init__( "heightOfStation", "stationNumber", "blockNumber", + "stationOrSiteName", ] - processed_threshold = 0 nerror = {} ntime = {} nundef = {} @@ -113,6 +109,10 @@ def __init__( "/heightOfSensorAboveLocalGroundOrDeckOfMarinePlatform=1.5" "/dewpointTemperature" ) + keys.append( + "/heightOfSensorAboveLocalGroundOrDeckOfMarinePlatform=2" + "/relativeHumidity" + ) elif var == "airTemperatureAt2M": keys.append("airTemperatureAt2M") keys.append( @@ -171,7 +171,9 @@ def __init__( stid = "NA" station_number = -1 block_number = -1 + site_name = "NA" t2m = np.nan + rh2m = np.nan td2m = np.nan s_d = np.nan temp = np.nan @@ -223,6 +225,8 @@ def __init__( station_number = val if key == "blockNumber": block_number = val + if key == "stationOrSiteName": + site_name = val if key == "airTemperatureAt2M": t2m = val if ( @@ -234,6 +238,12 @@ def __init__( "/airTemperature" ): temp = val + if ( + key + == "/heightOfSensorAboveLocalGroundOrDeckOfMarinePlatform=2" + "/relativeHumidity" + ): + rh2m = val if key == "dewpointTemperatureAt2M": td2m = val if ( @@ -279,24 +289,29 @@ def __init__( if not exists: logging.debug("Pos does not exist %s %s", pos, var) if var == "relativeHumidityAt2M": - if not np.isnan(t2m) and not np.isnan(td2m): + if not np.isnan(t2m) and not np.isnan(td2m) and np.isnan(rh2m): try: value = self.td2rh(td2m, t2m) - value = value * 0.01 + value = value * 0.01 except Exception: logging.debug("Got exception for %s:", var) value = np.nan + elif not np.isnan(temp) and not np.isnan(t_d) and np.isnan(rh2m): + try: + value = self.td2rh(t_d, temp) + value = value * 0.01 + except Exception: + logging.debug( + "Got exception for %s", + var, + ) + value = np.nan else: - if not np.isnan(temp) and not np.isnan(t_d): - try: - value = self.td2rh(t_d, temp) - value = value * 0.01 - except Exception: - logging.debug( - "Got exception for %s", - var, - ) - value = np.nan + value = np.nan + + if np.isnan(value) and not np.isnan(rh2m): + value = 0.01 * rh2m + elif var == "airTemperatureAt2M": if np.isnan(t2m): if not np.isnan(temp): @@ -349,28 +364,10 @@ def __init__( latrange[0] <= lat <= latrange[1] and lonrange[0] <= lon <= lonrange[1] ): - obs_dtg = None - try: - obs_dtg = as_datetime_args( - year=year, - month=month, - day=day, - hour=hour, - minute=minute, - ) - except ValueError: - logging.warning( - "Bad observations time: year=%s month=%s day=%s hour=%s minute=%s Position is lon=%s, lat=%s", - year, - month, - day, - hour, - minute, - lon, - lat, - ) - obs_dtg = None - if not np.isnan(value) and obs_dtg is not None: + obs_dtg = as_datetime_args( + year=year, month=month, day=day, hour=hour, minute=minute + ) + if not np.isnan(value): if self.inside_window(obs_dtg, valid_dtg, valid_range): logging.debug( "Valid DTG for station %s %s %s %s %s %s %s %s", @@ -385,6 +382,9 @@ def __init__( ) if station_number > 0 and block_number > 0: stid = str((block_number * 1000) + station_number) + if stid == "NA" and site_name != "NA" and site_name.isnumeric(): + stid = site_name + observations.append( Observation( obs_dtg, @@ -407,23 +407,18 @@ def __init__( ndomain.update({var: ndomain[var] + 1}) cnt += 1 - try: - nbytes = file_handler.tell() - except ValueError: - nbytes = number_of_bytes - processed = int(round(float(nbytes) * 100.0 / float(number_of_bytes))) - if processed > processed_threshold and processed % 5 == 0: - processed_threshold = processed - logging.info("Read: %s%%", processed) + if (cnt % 1000) == 0: + print(".", end="") + sys.stdout.flush() # delete handle eccodes.codes_release(bufr) - logging.info("Found %s/%s", str(len(observations)), str(cnt)) + logging.info("\nFound %s/%s", str(len(observations)), str(cnt)) logging.info("Not decoded: %s", str(not_decoded)) for var in variables: - logging.info("Observations for var=%s: %s", var, str(nobs[var])) + logging.info("\nObservations for var=%s: %s", var, str(nobs[var])) logging.info( "Observations removed because of domain check: %s", str(ndomain[var]) ) @@ -440,7 +435,7 @@ def __init__( # close the file file_handler.close() - ObservationSet.__init__(self, observations, label=label, sigmao=sigmao) + ObservationSet.__init__(self, observations, label=label) @staticmethod def td2rh(t_d, temp, kelvin=True): From 3af9e2062af6aec48615c7b28354ee326a310b83 Mon Sep 17 00:00:00 2001 From: Per Dahlgren Date: Fri, 12 Jan 2024 14:55:22 +0000 Subject: [PATCH 2/6] Again, added features in bufr.py for carra2 to be able to read local observations --- pysurfex/bufr.py | 49 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/pysurfex/bufr.py b/pysurfex/bufr.py index cb19bc2..1b6ebaa 100644 --- a/pysurfex/bufr.py +++ b/pysurfex/bufr.py @@ -37,6 +37,7 @@ def __init__( latrange=None, label="bufr", use_first=False, + sigmao=None, ): """Initialize a bufr observation set. @@ -49,6 +50,7 @@ def __init__( latrange (list): Allowed range of latitides [min, max] label (str): A label for the resulting observations set use_first (bool): Use only the first valid observation for a point if more are found + sigmao (float, optional): Observation error relative to normal background error. Defaults to None. Raises: RuntimeError: ECCODES not found. Needed for bufr reading @@ -66,6 +68,9 @@ def __init__( # open bufr file file_handler = open(bufrfile, mode="rb") + number_of_bytes = file_handler.seek(0, 2) + logging.info("File size: %s", number_of_bytes) + file_handler.seek(0) # define the keys to be printed keys = [ @@ -84,6 +89,7 @@ def __init__( "blockNumber", "stationOrSiteName", ] + processed_threshold = 0 nerror = {} ntime = {} nundef = {} @@ -364,10 +370,28 @@ def __init__( latrange[0] <= lat <= latrange[1] and lonrange[0] <= lon <= lonrange[1] ): - obs_dtg = as_datetime_args( - year=year, month=month, day=day, hour=hour, minute=minute - ) - if not np.isnan(value): + obs_dtg = None + try: + obs_dtg = as_datetime_args( + year=year, + month=month, + day=day, + hour=hour, + minute=minute, + ) + except ValueError: + logging.warning( + "Bad observations time: year=%s month=%s day=%s hour=%s minute=%s Position is lon=%s, lat=%s", + year, + month, + day, + hour, + minute, + lon, + lat, + ) + obs_dtg = None + if not np.isnan(value) and obs_dtg is not None: if self.inside_window(obs_dtg, valid_dtg, valid_range): logging.debug( "Valid DTG for station %s %s %s %s %s %s %s %s", @@ -407,18 +431,23 @@ def __init__( ndomain.update({var: ndomain[var] + 1}) cnt += 1 + try: + nbytes = file_handler.tell() + except ValueError: + nbytes = number_of_bytes - if (cnt % 1000) == 0: - print(".", end="") - sys.stdout.flush() + processed = int(round(float(nbytes) * 100.0 / float(number_of_bytes))) + if processed > processed_threshold and processed % 5 == 0: + processed_threshold = processed + logging.info("Read: %s%%", processed) # delete handle eccodes.codes_release(bufr) - logging.info("\nFound %s/%s", str(len(observations)), str(cnt)) + logging.info("Found %s/%s", str(len(observations)), str(cnt)) logging.info("Not decoded: %s", str(not_decoded)) for var in variables: - logging.info("\nObservations for var=%s: %s", var, str(nobs[var])) + logging.info("Observations for var=%s: %s", var, str(nobs[var])) logging.info( "Observations removed because of domain check: %s", str(ndomain[var]) ) @@ -435,7 +464,7 @@ def __init__( # close the file file_handler.close() - ObservationSet.__init__(self, observations, label=label) + ObservationSet.__init__(self, observations, label=label, sigmao=sigmao) @staticmethod def td2rh(t_d, temp, kelvin=True): From 5c77dfc0a508b114238d993ea4b8be9e8afcd5e5 Mon Sep 17 00:00:00 2001 From: Trygve Aspelien Date: Tue, 16 Jan 2024 10:02:54 +0100 Subject: [PATCH 3/6] Do black and remove unused import --- pysurfex/bufr.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/pysurfex/bufr.py b/pysurfex/bufr.py index 1b6ebaa..0a6eac1 100644 --- a/pysurfex/bufr.py +++ b/pysurfex/bufr.py @@ -1,6 +1,5 @@ """bufr treatment.""" import logging -import sys from math import exp import numpy as np @@ -295,14 +294,22 @@ def __init__( if not exists: logging.debug("Pos does not exist %s %s", pos, var) if var == "relativeHumidityAt2M": - if not np.isnan(t2m) and not np.isnan(td2m) and np.isnan(rh2m): + if ( + not np.isnan(t2m) + and not np.isnan(td2m) + and np.isnan(rh2m) + ): try: value = self.td2rh(td2m, t2m) - value = value * 0.01 + value = value * 0.01 except Exception: logging.debug("Got exception for %s:", var) value = np.nan - elif not np.isnan(temp) and not np.isnan(t_d) and np.isnan(rh2m): + elif ( + not np.isnan(temp) + and not np.isnan(t_d) + and np.isnan(rh2m) + ): try: value = self.td2rh(t_d, temp) value = value * 0.01 @@ -317,7 +324,7 @@ def __init__( if np.isnan(value) and not np.isnan(rh2m): value = 0.01 * rh2m - + elif var == "airTemperatureAt2M": if np.isnan(t2m): if not np.isnan(temp): @@ -406,9 +413,13 @@ def __init__( ) if station_number > 0 and block_number > 0: stid = str((block_number * 1000) + station_number) - if stid == "NA" and site_name != "NA" and site_name.isnumeric(): + if ( + stid == "NA" + and site_name != "NA" + and site_name.isnumeric() + ): stid = site_name - + observations.append( Observation( obs_dtg, From a6d2f5095ade3222e6cffe84e0449784e0e162a3 Mon Sep 17 00:00:00 2001 From: Per Dahlgren Date: Tue, 23 Jan 2024 12:36:52 +0000 Subject: [PATCH 4/6] Added features in bufr.py for carra2 to be able to read local observations --- pysurfex/bufr.py | 60 +++++++++++++++--------------------------------- 1 file changed, 18 insertions(+), 42 deletions(-) diff --git a/pysurfex/bufr.py b/pysurfex/bufr.py index 1b6ebaa..60908c3 100644 --- a/pysurfex/bufr.py +++ b/pysurfex/bufr.py @@ -37,7 +37,6 @@ def __init__( latrange=None, label="bufr", use_first=False, - sigmao=None, ): """Initialize a bufr observation set. @@ -50,7 +49,6 @@ def __init__( latrange (list): Allowed range of latitides [min, max] label (str): A label for the resulting observations set use_first (bool): Use only the first valid observation for a point if more are found - sigmao (float, optional): Observation error relative to normal background error. Defaults to None. Raises: RuntimeError: ECCODES not found. Needed for bufr reading @@ -68,9 +66,6 @@ def __init__( # open bufr file file_handler = open(bufrfile, mode="rb") - number_of_bytes = file_handler.seek(0, 2) - logging.info("File size: %s", number_of_bytes) - file_handler.seek(0) # define the keys to be printed keys = [ @@ -86,10 +81,8 @@ def __init__( "heightOfStationGroundAboveMeanSeaLevel", "heightOfStation", "stationNumber", - "blockNumber", - "stationOrSiteName", + "blockNumber" ] - processed_threshold = 0 nerror = {} ntime = {} nundef = {} @@ -232,7 +225,7 @@ def __init__( if key == "blockNumber": block_number = val if key == "stationOrSiteName": - site_name = val + site_name = str(val) if key == "airTemperatureAt2M": t2m = val if ( @@ -328,6 +321,8 @@ def __init__( value = s_d elif var == "heightOfBaseOfCloud": value = c_b + elif var == "stationOrSiteName": + site_name = site_name else: raise NotImplementedError( f"Var {var} is not coded! Please do it!" @@ -370,28 +365,10 @@ def __init__( latrange[0] <= lat <= latrange[1] and lonrange[0] <= lon <= lonrange[1] ): - obs_dtg = None - try: - obs_dtg = as_datetime_args( - year=year, - month=month, - day=day, - hour=hour, - minute=minute, - ) - except ValueError: - logging.warning( - "Bad observations time: year=%s month=%s day=%s hour=%s minute=%s Position is lon=%s, lat=%s", - year, - month, - day, - hour, - minute, - lon, - lat, - ) - obs_dtg = None - if not np.isnan(value) and obs_dtg is not None: + obs_dtg = as_datetime_args( + year=year, month=month, day=day, hour=hour, minute=minute + ) + if not np.isnan(value): if self.inside_window(obs_dtg, valid_dtg, valid_range): logging.debug( "Valid DTG for station %s %s %s %s %s %s %s %s", @@ -404,11 +381,15 @@ def __init__( elev, stid, ) + if station_number > 0 and block_number > 0: stid = str((block_number * 1000) + station_number) + + if stid == "NA" and site_name != "NA" and site_name.isnumeric(): stid = site_name + observations.append( Observation( obs_dtg, @@ -431,23 +412,18 @@ def __init__( ndomain.update({var: ndomain[var] + 1}) cnt += 1 - try: - nbytes = file_handler.tell() - except ValueError: - nbytes = number_of_bytes - processed = int(round(float(nbytes) * 100.0 / float(number_of_bytes))) - if processed > processed_threshold and processed % 5 == 0: - processed_threshold = processed - logging.info("Read: %s%%", processed) + if (cnt % 1000) == 0: + print(".", end="") + sys.stdout.flush() # delete handle eccodes.codes_release(bufr) - logging.info("Found %s/%s", str(len(observations)), str(cnt)) + logging.info("\nFound %s/%s", str(len(observations)), str(cnt)) logging.info("Not decoded: %s", str(not_decoded)) for var in variables: - logging.info("Observations for var=%s: %s", var, str(nobs[var])) + logging.info("\nObservations for var=%s: %s", var, str(nobs[var])) logging.info( "Observations removed because of domain check: %s", str(ndomain[var]) ) @@ -464,7 +440,7 @@ def __init__( # close the file file_handler.close() - ObservationSet.__init__(self, observations, label=label, sigmao=sigmao) + ObservationSet.__init__(self, observations, label=label) @staticmethod def td2rh(t_d, temp, kelvin=True): From b26664ea9486ef54c790f77187e0bcfd8314a9c8 Mon Sep 17 00:00:00 2001 From: Per Dahlgren Date: Tue, 23 Jan 2024 15:17:04 +0000 Subject: [PATCH 5/6] Further updates to bufr.py --- pysurfex/bufr.py | 62 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/pysurfex/bufr.py b/pysurfex/bufr.py index 115d887..9207488 100644 --- a/pysurfex/bufr.py +++ b/pysurfex/bufr.py @@ -36,6 +36,7 @@ def __init__( latrange=None, label="bufr", use_first=False, + sigmao=None, ): """Initialize a bufr observation set. @@ -48,6 +49,7 @@ def __init__( latrange (list): Allowed range of latitides [min, max] label (str): A label for the resulting observations set use_first (bool): Use only the first valid observation for a point if more are found + sigmao (float, optional): Observation error relative to normal background error. Defaults to None. Raises: RuntimeError: ECCODES not found. Needed for bufr reading @@ -65,6 +67,9 @@ def __init__( # open bufr file file_handler = open(bufrfile, mode="rb") + number_of_bytes = file_handler.seek(0, 2) + logging.info("File size: %s", number_of_bytes) + file_handler.seek(0) # define the keys to be printed keys = [ @@ -80,8 +85,9 @@ def __init__( "heightOfStationGroundAboveMeanSeaLevel", "heightOfStation", "stationNumber", - "blockNumber" + "blockNumber", ] + processed_threshold = 0 nerror = {} ntime = {} nundef = {} @@ -302,7 +308,7 @@ def __init__( not np.isnan(temp) and not np.isnan(t_d) and np.isnan(rh2m) - ): + ): try: value = self.td2rh(t_d, temp) value = value * 0.01 @@ -317,7 +323,7 @@ def __init__( if np.isnan(value) and not np.isnan(rh2m): value = 0.01 * rh2m - + elif var == "airTemperatureAt2M": if np.isnan(t2m): if not np.isnan(temp): @@ -372,10 +378,28 @@ def __init__( latrange[0] <= lat <= latrange[1] and lonrange[0] <= lon <= lonrange[1] ): - obs_dtg = as_datetime_args( - year=year, month=month, day=day, hour=hour, minute=minute - ) - if not np.isnan(value): + obs_dtg = None + try: + obs_dtg = as_datetime_args( + year=year, + month=month, + day=day, + hour=hour, + minute=minute, + ) + except ValueError: + logging.warning( + "Bad observations time: year=%s month=%s day=%s hour=%s minute=%s Position is lon=%s, lat=%s", + year, + month, + day, + hour, + minute, + lon, + lat, + ) + obs_dtg = None + if not np.isnan(value) and obs_dtg is not None: if self.inside_window(obs_dtg, valid_dtg, valid_range): logging.debug( "Valid DTG for station %s %s %s %s %s %s %s %s", @@ -388,17 +412,18 @@ def __init__( elev, stid, ) - if station_number > 0 and block_number > 0: stid = str((block_number * 1000) + station_number) + + if ( stid == "NA" and site_name != "NA" and site_name.isnumeric() ): stid = site_name - - + + observations.append( Observation( obs_dtg, @@ -421,18 +446,23 @@ def __init__( ndomain.update({var: ndomain[var] + 1}) cnt += 1 + try: + nbytes = file_handler.tell() + except ValueError: + nbytes = number_of_bytes - if (cnt % 1000) == 0: - print(".", end="") - sys.stdout.flush() + processed = int(round(float(nbytes) * 100.0 / float(number_of_bytes))) + if processed > processed_threshold and processed % 5 == 0: + processed_threshold = processed + logging.info("Read: %s%%", processed) # delete handle eccodes.codes_release(bufr) - logging.info("\nFound %s/%s", str(len(observations)), str(cnt)) + logging.info("Found %s/%s", str(len(observations)), str(cnt)) logging.info("Not decoded: %s", str(not_decoded)) for var in variables: - logging.info("\nObservations for var=%s: %s", var, str(nobs[var])) + logging.info("Observations for var=%s: %s", var, str(nobs[var])) logging.info( "Observations removed because of domain check: %s", str(ndomain[var]) ) @@ -449,7 +479,7 @@ def __init__( # close the file file_handler.close() - ObservationSet.__init__(self, observations, label=label) + ObservationSet.__init__(self, observations, label=label, sigmao=sigmao) @staticmethod def td2rh(t_d, temp, kelvin=True): From e185bc86a0beeb66155f0bd29f7d2bfc7a35018b Mon Sep 17 00:00:00 2001 From: Per Dahlgren Date: Wed, 24 Jan 2024 11:58:55 +0000 Subject: [PATCH 6/6] Reformatted pysurfex/bufr.py --- pysurfex/bufr.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pysurfex/bufr.py b/pysurfex/bufr.py index 9207488..9c07979 100644 --- a/pysurfex/bufr.py +++ b/pysurfex/bufr.py @@ -308,7 +308,7 @@ def __init__( not np.isnan(temp) and not np.isnan(t_d) and np.isnan(rh2m) - ): + ): try: value = self.td2rh(t_d, temp) value = value * 0.01 @@ -323,7 +323,7 @@ def __init__( if np.isnan(value) and not np.isnan(rh2m): value = 0.01 * rh2m - + elif var == "airTemperatureAt2M": if np.isnan(t2m): if not np.isnan(temp): @@ -414,7 +414,6 @@ def __init__( ) if station_number > 0 and block_number > 0: stid = str((block_number * 1000) + station_number) - if ( stid == "NA" @@ -422,8 +421,7 @@ def __init__( and site_name.isnumeric() ): stid = site_name - - + observations.append( Observation( obs_dtg,