From 52a873b663967ced0eb4e28be583cd2dad4c82e1 Mon Sep 17 00:00:00 2001 From: Jan Griesfeller Date: Mon, 9 Sep 2024 16:41:26 +0200 Subject: [PATCH] WIP --- .../actrisebas/ActrisEbasReader.py | 49 +++++++++++++------ src/pyaro_readers/actrisebas/definitions.toml | 20 +++++++- tests/test_ActrisEbasReader.py | 38 +++++++------- 3 files changed, 70 insertions(+), 37 deletions(-) diff --git a/src/pyaro_readers/actrisebas/ActrisEbasReader.py b/src/pyaro_readers/actrisebas/ActrisEbasReader.py index 1b195cc..75a9e52 100644 --- a/src/pyaro_readers/actrisebas/ActrisEbasReader.py +++ b/src/pyaro_readers/actrisebas/ActrisEbasReader.py @@ -89,25 +89,25 @@ def __init__( ) try: - vars_to_read = filters["variables"]["include"] + self.vars_to_read = filters["variables"]["include"] except KeyError: raise ValueError( f"As of now, you have to give the species you want to read in filter.variables.include" ) try: - sites_to_read = filters["stations"]["include"] + self.sites_to_read = filters["stations"]["include"] except KeyError: - sites_to_read = [] + self.sites_to_read = [] try: - sites_to_exclude = filters["stations"]["exclude"] + self.sites_to_exclude = filters["stations"]["exclude"] except KeyError: - sites_to_exclude = [] + self.sites_to_exclude = [] # read config file self._def_data = self._read_definitions(file=DEFINITION_FILE) - for var in vars_to_read: + for var in self.vars_to_read: self._metadata[var] = {} # for testing since the API is error-prone and slow at the time of this writing test_file = os.path.join( @@ -129,8 +129,8 @@ def __init__( self._metadata[var] = json_resp self._urls_to_dl[var] = self.extract_urls( json_resp, - sites_to_read=sites_to_read, - sites_to_exclude=sites_to_exclude, + sites_to_read=self.sites_to_read, + sites_to_exclude=self.sites_to_exclude, ) self._data[var] = self.read_data(self._urls_to_dl[var]) @@ -141,7 +141,6 @@ def read_data( self, urls_to_dl: dict, tqdm_desc="reading stations", - sites_to_read: list[str] = None, ): """ read the data from EBAS thredds server @@ -150,7 +149,7 @@ def read_data( for s_idx, site_name in enumerate(urls_to_dl): for f_idx, url in enumerate(urls_to_dl[site_name]): tmp_data = xr.open_dataset(url) - # create times... + # create variables valid for all measured variables... start_time = np.asarray(tmp_data["time_bnds"][:, 0]) stop_time = np.asarray(tmp_data["time_bnds"][:, 1]) ts_no = len(start_time) @@ -161,7 +160,16 @@ def read_data( standard_deviation = np.full(ts_no, np.nan) # put all data variables in the data struct for the moment - for _data_var in self._get_ebas_data_vars(tmp_data): + for d_idx, _data_var in enumerate( + self._get_ebas_data_vars( + tmp_data, + ) + ): + # the naming of the variable in the file does not reflect the vocabulary naming ot pyaerocom's + # naming + ret_data_var = _data_var.copy() + # if ret_data_var not in self.vars_to_read and : + # # we need vals = tmp_data[_data_var].values flags = np.full(ts_no, Flag.VALID) if _data_var not in self._data: @@ -181,6 +189,12 @@ def read_data( flag=flags, standard_deviation=standard_deviation, ) + # make sure to return something in the user given variable name for now + try: + if _data_var != self.vars_to_read[d_idx]: + self._data[self.vars_to_read[d_idx]] = self._data[_data_var] + except IndexError: + pass if not site_name in self._stations: self._stations[site_name] = Station( { @@ -219,7 +233,16 @@ def _get_ebas_data_vars(self, tmp_data, actris_var: str = None, units: str = Non if len(tmp_data[data_var].dims) != 1: continue elif tmp_data[data_var].dims[0] in TIME_VAR_NAME: - data_vars.append(data_var) + # check for standard unit + try: + # if defined, return only names that match + if ( + tmp_data[data_var].attrs["units"] + == self._def_data["actris_std_units"][data_var] + ): + data_vars.append(data_var) + except KeyError: + data_vars.append(data_var) return data_vars @@ -292,9 +315,7 @@ class ActrisEbasTimeSeriesEngine(AutoFilterReaderEngine.AutoFilterEngine): def reader_class(self): return ActrisEbasTimeSeriesReader - # def open(self, filename, *args, **kwargs) -> ActrisEbasTimeSeriesReader: def open(self, *args, **kwargs) -> ActrisEbasTimeSeriesReader: - # return self.reader_class()(filename, *args, **kwargs) return self.reader_class()(*args, **kwargs) def description(self): diff --git a/src/pyaro_readers/actrisebas/definitions.toml b/src/pyaro_readers/actrisebas/definitions.toml index 36fe1d0..cf8383d 100644 --- a/src/pyaro_readers/actrisebas/definitions.toml +++ b/src/pyaro_readers/actrisebas/definitions.toml @@ -8,16 +8,32 @@ #ebas_component = [] #ebas_matrix = [] -[variables.vmro3] +[variables.conco3] actris_variable = ["ozone mass concentration", "ozone amount fraction"] actris_matrix = ["gas phase"] ebas_component = ["ozone"] ebas_matrix = ["air"] ebas_unit = "ug/m3" +[variables.vmro3] +actris_variable = ["ozone mass concentration", "ozone amount fraction"] +actris_matrix = ["gas phase"] +ebas_component = ["ozone"] +ebas_matrix = ["air"] +ebas_unit = "nmol/mol" + [variables.concso4] actris_variable = ["aerosol particle sulphate mass concentration"] actris_matrix = ["aerosol particle phase", "PM10", "PM2.5"] ebas_component = ["sulphate_corrected", "sulphate_total"] ebas_matrix = ["aerosol", "pm10", "pm25"] -ebas_unit = "ug/m3" \ No newline at end of file +ebas_unit = "ug/m3" + +[actris_std_units] +# tell the reader which unit shall be returned in case the same property is available in several units +# e.g. ozone mass concentration is available in the files in [ug/m3] and [nmol/mol] +# make sure to use ACTRIS-EBAS unit notation since this is a simple string match +# For variables not noted here, we will return the first unit found inb the first data file +"ozone mass concentration" = "nmol/mol" +"ozone amount fraction" = "nmol/mol" +"aerosol particle sulphate mass concentration" = "ug/m3" diff --git a/tests/test_ActrisEbasReader.py b/tests/test_ActrisEbasReader.py index 5c0ad89..a63e426 100644 --- a/tests/test_ActrisEbasReader.py +++ b/tests/test_ActrisEbasReader.py @@ -64,17 +64,23 @@ def test_api_reading_small_data_set(self): ) as ts: self.assertGreaterEqual(len(ts.variables()), 1) - # def test_stationfilter(self): - # engine = pyaro.list_timeseries_engines()["aeronetsunreader"] - # sfilter = pyaro.timeseries.filters.get("stations", exclude=["Cuiaba"]) - # with engine.open( - # self.file, filters=[sfilter], tqdm_desc="test_stationfilter" - # ) as ts: - # count = 0 - # for var in ts.variables(): - # count += len(ts.data(var)) - # self.assertEqual(count, 48775) - # self.assertEqual(len(ts.stations()), 3) + def test_api_reading_pyaerocom_naming(self): + # test access to the EBAS API + filters = { + "variables": { + "include": [ + "vmro3", + ] + }, + "stations": {"include": ["Birkenes II", "Jungfraujoch"]}, + } + engine = pyaro.list_timeseries_engines()[self.engine] + # + with engine.open( + filters=filters, + ) as ts: + self.assertGreaterEqual(len(ts.variables()), 1) + # # def test_wrappers(self): # engine = pyaro.list_timeseries_engines()["aeronetsunreader"] @@ -85,16 +91,6 @@ def test_api_reading_small_data_set(self): # self.assertEqual(ts.data(new_var_name).variable, new_var_name) # pass # - # def test_variables_filter(self): - # engine = pyaro.list_timeseries_engines()["aeronetsunreader"] - # new_var_name = "od550aer" - # vfilter = pyaro.timeseries.filters.get( - # "variables", reader_to_new={"AOD_550nm": new_var_name} - # ) - # with engine.open( - # self.file, filters=[vfilter], tqdm_desc="test_variables_filter" - # ) as ts: - # self.assertEqual(ts.data(new_var_name).variable, new_var_name) if __name__ == "__main__":