Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
Jan Griesfeller committed Sep 9, 2024
1 parent 2dd3ff3 commit 52a873b
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 37 deletions.
49 changes: 35 additions & 14 deletions src/pyaro_readers/actrisebas/ActrisEbasReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,25 +89,25 @@ def __init__(
)

try:
vars_to_read = filters["variables"]["include"]
self.vars_to_read = filters["variables"]["include"]
except KeyError:
raise ValueError(
f"As of now, you have to give the species you want to read in filter.variables.include"
)

try:
sites_to_read = filters["stations"]["include"]
self.sites_to_read = filters["stations"]["include"]
except KeyError:
sites_to_read = []
self.sites_to_read = []

try:
sites_to_exclude = filters["stations"]["exclude"]
self.sites_to_exclude = filters["stations"]["exclude"]
except KeyError:
sites_to_exclude = []
self.sites_to_exclude = []

# read config file
self._def_data = self._read_definitions(file=DEFINITION_FILE)
for var in vars_to_read:
for var in self.vars_to_read:
self._metadata[var] = {}
# for testing since the API is error-prone and slow at the time of this writing
test_file = os.path.join(
Expand All @@ -129,8 +129,8 @@ def __init__(
self._metadata[var] = json_resp
self._urls_to_dl[var] = self.extract_urls(
json_resp,
sites_to_read=sites_to_read,
sites_to_exclude=sites_to_exclude,
sites_to_read=self.sites_to_read,
sites_to_exclude=self.sites_to_exclude,
)
self._data[var] = self.read_data(self._urls_to_dl[var])

Expand All @@ -141,7 +141,6 @@ def read_data(
self,
urls_to_dl: dict,
tqdm_desc="reading stations",
sites_to_read: list[str] = None,
):
"""
read the data from EBAS thredds server
Expand All @@ -150,7 +149,7 @@ def read_data(
for s_idx, site_name in enumerate(urls_to_dl):
for f_idx, url in enumerate(urls_to_dl[site_name]):
tmp_data = xr.open_dataset(url)
# create times...
# create variables valid for all measured variables...
start_time = np.asarray(tmp_data["time_bnds"][:, 0])
stop_time = np.asarray(tmp_data["time_bnds"][:, 1])
ts_no = len(start_time)
Expand All @@ -161,7 +160,16 @@ def read_data(
standard_deviation = np.full(ts_no, np.nan)

# put all data variables in the data struct for the moment
for _data_var in self._get_ebas_data_vars(tmp_data):
for d_idx, _data_var in enumerate(
self._get_ebas_data_vars(
tmp_data,
)
):
# the naming of the variable in the file does not reflect the vocabulary naming ot pyaerocom's
# naming
ret_data_var = _data_var.copy()
# if ret_data_var not in self.vars_to_read and :
# # we need
vals = tmp_data[_data_var].values
flags = np.full(ts_no, Flag.VALID)
if _data_var not in self._data:
Expand All @@ -181,6 +189,12 @@ def read_data(
flag=flags,
standard_deviation=standard_deviation,
)
# make sure to return something in the user given variable name for now
try:
if _data_var != self.vars_to_read[d_idx]:
self._data[self.vars_to_read[d_idx]] = self._data[_data_var]
except IndexError:
pass
if not site_name in self._stations:
self._stations[site_name] = Station(
{
Expand Down Expand Up @@ -219,7 +233,16 @@ def _get_ebas_data_vars(self, tmp_data, actris_var: str = None, units: str = Non
if len(tmp_data[data_var].dims) != 1:
continue
elif tmp_data[data_var].dims[0] in TIME_VAR_NAME:
data_vars.append(data_var)
# check for standard unit
try:
# if defined, return only names that match
if (
tmp_data[data_var].attrs["units"]
== self._def_data["actris_std_units"][data_var]
):
data_vars.append(data_var)
except KeyError:
data_vars.append(data_var)

return data_vars

Expand Down Expand Up @@ -292,9 +315,7 @@ class ActrisEbasTimeSeriesEngine(AutoFilterReaderEngine.AutoFilterEngine):
def reader_class(self):
return ActrisEbasTimeSeriesReader

# def open(self, filename, *args, **kwargs) -> ActrisEbasTimeSeriesReader:
def open(self, *args, **kwargs) -> ActrisEbasTimeSeriesReader:
# return self.reader_class()(filename, *args, **kwargs)
return self.reader_class()(*args, **kwargs)

def description(self):
Expand Down
20 changes: 18 additions & 2 deletions src/pyaro_readers/actrisebas/definitions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,32 @@
#ebas_component = [<list of ebas component names>]
#ebas_matrix = [<lust of ebas matrix names>]

[variables.vmro3]
[variables.conco3]
actris_variable = ["ozone mass concentration", "ozone amount fraction"]
actris_matrix = ["gas phase"]
ebas_component = ["ozone"]
ebas_matrix = ["air"]
ebas_unit = "ug/m3"

[variables.vmro3]
actris_variable = ["ozone mass concentration", "ozone amount fraction"]
actris_matrix = ["gas phase"]
ebas_component = ["ozone"]
ebas_matrix = ["air"]
ebas_unit = "nmol/mol"

[variables.concso4]
actris_variable = ["aerosol particle sulphate mass concentration"]
actris_matrix = ["aerosol particle phase", "PM10", "PM2.5"]
ebas_component = ["sulphate_corrected", "sulphate_total"]
ebas_matrix = ["aerosol", "pm10", "pm25"]
ebas_unit = "ug/m3"
ebas_unit = "ug/m3"

[actris_std_units]
# tell the reader which unit shall be returned in case the same property is available in several units
# e.g. ozone mass concentration is available in the files in [ug/m3] and [nmol/mol]
# make sure to use ACTRIS-EBAS unit notation since this is a simple string match
# For variables not noted here, we will return the first unit found inb the first data file
"ozone mass concentration" = "nmol/mol"
"ozone amount fraction" = "nmol/mol"
"aerosol particle sulphate mass concentration" = "ug/m3"
38 changes: 17 additions & 21 deletions tests/test_ActrisEbasReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,23 @@ def test_api_reading_small_data_set(self):
) as ts:
self.assertGreaterEqual(len(ts.variables()), 1)

# def test_stationfilter(self):
# engine = pyaro.list_timeseries_engines()["aeronetsunreader"]
# sfilter = pyaro.timeseries.filters.get("stations", exclude=["Cuiaba"])
# with engine.open(
# self.file, filters=[sfilter], tqdm_desc="test_stationfilter"
# ) as ts:
# count = 0
# for var in ts.variables():
# count += len(ts.data(var))
# self.assertEqual(count, 48775)
# self.assertEqual(len(ts.stations()), 3)
def test_api_reading_pyaerocom_naming(self):
# test access to the EBAS API
filters = {
"variables": {
"include": [
"vmro3",
]
},
"stations": {"include": ["Birkenes II", "Jungfraujoch"]},
}
engine = pyaro.list_timeseries_engines()[self.engine]
#
with engine.open(
filters=filters,
) as ts:
self.assertGreaterEqual(len(ts.variables()), 1)

#
# def test_wrappers(self):
# engine = pyaro.list_timeseries_engines()["aeronetsunreader"]
Expand All @@ -85,16 +91,6 @@ def test_api_reading_small_data_set(self):
# self.assertEqual(ts.data(new_var_name).variable, new_var_name)
# pass
#
# def test_variables_filter(self):
# engine = pyaro.list_timeseries_engines()["aeronetsunreader"]
# new_var_name = "od550aer"
# vfilter = pyaro.timeseries.filters.get(
# "variables", reader_to_new={"AOD_550nm": new_var_name}
# )
# with engine.open(
# self.file, filters=[vfilter], tqdm_desc="test_variables_filter"
# ) as ts:
# self.assertEqual(ts.data(new_var_name).variable, new_var_name)


if __name__ == "__main__":
Expand Down

0 comments on commit 52a873b

Please sign in to comment.