Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/32-add-pyaro-ebas-reader-for-nil…
Browse files Browse the repository at this point in the history
…u-pmf-data-as-well-as-general-use' into 32-add-pyaro-ebas-reader-for-nilu-pmf-data-as-well-as-general-use

# Conflicts:
#	setup.cfg
  • Loading branch information
Jan Jurgen Griesfeller committed May 8, 2024
2 parents 7f1f0e3 + 5719526 commit 937cb98
Show file tree
Hide file tree
Showing 10 changed files with 346 additions and 454 deletions.
18 changes: 6 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ conventions.
### nilupmfebas: EBAS format (Nasa-Ames)
Reader for random EBAS data in NASA-AMES format. This reader is tested only with PMF data provided by
NILU, but should in principle able to read any random text file in EBAS NASA-AMES.
The variables provided contain in EBAS terms a combination of matrix and component with a number sign (#)
as seperator (e.g. `pm10_pm25#total_carbon` or `pm10#organic_carbon` or `pm10#galactosan`)
The variables provided contain in EBAS terms a combination of matrix, component and unit with a number sign (#)
as seperator (e.g. `pm10_pm25#total_carbon#ug C m-3"` or `pm10#organic_carbon##ug C m-3` or `pm10#galactosan#ng m-3`)

## Usage
### aeronetsunreader
Expand Down Expand Up @@ -143,32 +143,24 @@ with pyaro.open_timeseries(
```


### geocoder_reverse_natural_earth
geocoder_reverse_natural_earth is small helper to identify country codes for obs networks that don't mention the
countrycode of a station in their location data

### nilupmfebas
```python
import pyaro
TEST_URL = "/home/jang/data/Python3/pyaro-readers/tests/testdata/PMF_EBAS/NO0042G.20171109070000.20220406124026.high_vol_sampler..pm10.4mo.1w.NO01L_hvs_week_no42_pm10.NO01L_NILU_sunset_002.lev2.nas"
TEST_URL = "/home/jang/data/Python3/pyaro-readers/tests/testdata/PMF_EBAS/NO0042G.20171109070000.20220406124026.high_vol_sampler..pm10.4mo.1w.NO01L_hvs_week_no42_pm10.NO01L_NILU_sunset_002.lev2.nas"

TEST_URL = "testdata/PMF_EBAS/NO0042G.20171109070000.20220406124026.high_vol_sampler..pm10.4mo.1w.NO01L_hvs_week_no42_pm10.NO01L_NILU_sunset_002.lev2.nas"
def main():
with pyaro.open_timeseries(
'nilupmfebas', TEST_URL, filters=[]
) as ts:
variables = ts.variables()
for var in variables:

data = ts.data(var)
print(f"var:{var} ; unit:{data.units}")
# stations
print(set(data.stations))
# start_times
print(data.start_times)
for idx, time in enumerate(data.start_times):
print(f"{time}_{data.values[idx]}")

print(f"{time}: {data.values[idx]}")
# stop_times
data.end_times
# latitudes
Expand All @@ -180,5 +172,7 @@ def main():
# values
data.values

if __name__ == "__main__":
main()
```

10 changes: 8 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = pyaro_readers
version = 0.0.6
version = 0.0.7
author = MET Norway
description = implementations of pyaerocom reading plugings using pyaro as interface
long_description = file: README.md
Expand Down Expand Up @@ -32,10 +32,16 @@ install_requires =

package_dir =
=src
packages = pyaro_readers.aeronetsunreader, pyaro_readers.aeronetsdareader, pyaro_readers.ascii2netcdf, pyaro_readers.harpreader, pyaro_readers.nilupmfebas, pyaro_readers
packages =
find:

test_require = tox:tox

[options.packages.find]
where=src

[options.package_data]
* = *.csv

[options.entry_points]
pyaro.timeseries =
Expand Down
120 changes: 76 additions & 44 deletions src/pyaro_readers/nilupmfebas/EbasPmfReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,20 @@
Station,
)
from tqdm import tqdm
from pyaro_readers.units_helpers import UALIASES

from pathlib import Path
import cf_units
import re

logger = logging.getLogger(__name__)

FILL_COUNTRY_FLAG = False
FILE_MASK = "*.nas"
FIELDS_TO_SKIP = ["start_time of measurement", "end_time of measurement"]


class EBASPMFReaderException(Exception):
pass


class EbasPmfTimeseriesReader(AutoFilterReaderEngine.AutoFilterReader):
Expand All @@ -26,17 +31,26 @@ def __init__(
filename: [Path, str],
filters=[],
tqdm_desc: [str, None] = None,
ts_type: str = "daily",
filemask: str = FILE_MASK,
# files: list = None,
vars_to_read: list[str] = None,
):
self._filters = filters
self._stations = {}
self._data = {} # var -> {data-array}
self._set_filters(filters)
self._header = []
self._opts = {"default": ReadEbasOptions()}
self._variables = {}
self._metadata = {}

# variable include filter comes like this
# {'variables': {'include': ['PM10_density']}}
# test for variable filter
if "variables" in filters:
if "include" in filters["variables"]:
vars_to_read = filters["variables"]["include"]
self._vars_to_read = vars_to_read
logger.info(f"applying variable include filter {vars_to_read}...")

realpath = Path(filename).resolve()

Expand All @@ -55,16 +69,13 @@ def __init__(
bar.close()
elif Path(realpath).is_file():
self.read_file(realpath)
# print(realpath)

else:
# filename is something else
# Error
pass
raise EBASPMFReaderException(f"No such file or directory: {filename}")

def read_file_basic(
self,
filename,
filename: [Path, str],
):
"""Read EBAS NASA Ames file
Expand All @@ -82,66 +93,60 @@ def read_file_basic(

return data_out

def read_file(self, filename, vars_to_read=None):
def read_file(self, filename: [Path, str], vars_to_read: list[str] = None):
"""Read EBAS NASA Ames file and put the data in the object"""

_file_dummy = self.read_file_basic(filename)
matrix = _file_dummy.meta["matrix"]
vars_read_in_file = []
# multicolumn file: ebas var names come from _file_dummy.col_names_vars
for var_idx in range(len(_file_dummy.var_defs)):
for var_idx, var_def in enumerate(_file_dummy.var_defs):
# continue if the variable is not an actual data variable (but e.g. time)
if not _file_dummy.var_defs[var_idx].is_var:
if not var_def.is_var:
continue
# skip additional fields...
if var_def.name in FIELDS_TO_SKIP:
continue
# continue if the statistcs is to be ignored
try:
if (
_file_dummy.var_defs[var_idx].statistics
in self._opts["default"].ignore_statistics
):
if var_def.statistics in self._opts["default"].ignore_statistics:
continue
except KeyError:
# sometimes there's no statistics: pass
pass

# var_name = f"{matrix}#{_file_dummy.var_defs[var_idx].name}"
# var_name = f"{matrix}#{_file_dummy.var_defs[var_idx].name}#{_file_dummy.meta['unit']}"
var_name = f"{matrix}#{_file_dummy.var_defs[var_idx].name}#{_file_dummy.var_defs[var_idx]['unit']}"
# adjust unit string
unit = var_def.unit
if unit in UALIASES:
unit = UALIASES[unit]
var_name = f"{matrix}#{_file_dummy.var_defs[var_idx].name}#{unit}"
if vars_to_read is not None:
if var_name not in vars_to_read:
continue
if var_name not in self._variables:
self._variables[var_name] = (
var_name,
_file_dummy.var_defs[var_idx]['unit'],
unit,
)

var_unit = _file_dummy.var_defs[var_idx].unit
var_unit = unit
stat_name = _file_dummy.meta["station_code"]
if stat_name not in self._stations:
country = _file_dummy.meta["station_code"][0:2]
# the location naming is not consistent
# try the two we have seen so far
try:
lat = float(_file_dummy.meta["station_latitude"])
lon = float(_file_dummy.meta["station_longitude"])
alt_str = _file_dummy.meta["station_altitude"]
except KeyError:
# might not always work either
try:
lat = float(_file_dummy.meta["measurement_latitude"])
lon = float(_file_dummy.meta["measurement_longitude"])
alt_str = _file_dummy.meta["measurement_altitude"]
except KeyError:
print(f"no lat / lon found in file {filename}. Skipping...")
return None
try:
# usually there's a blank between the value and the unit
alt = float(alt_str.split(" ")[0])
except ValueError:
# but unfortunately not always
# remove all non numbers
alt = float(re.sub(r"[^\d.-]+", "", alt_str))
lat, lon, alt = self._get_station_loc_data(filename, _file_dummy)
except EBASPMFReaderException:
return
# prepare some station based metadata
_meta_dummy = {}
_meta_dummy["file_metadata"] = {
filename: {
"meta": _file_dummy.meta,
"var_defs": _file_dummy.var_defs,
}
}

self._stations[stat_name] = Station(
{
Expand All @@ -152,7 +157,8 @@ def read_file(self, filename, vars_to_read=None):
"country": country,
"url": "",
"long_name": stat_name,
}
},
metadata=_meta_dummy,
)
else:
lat = self._stations[stat_name].latitude
Expand All @@ -162,7 +168,9 @@ def read_file(self, filename, vars_to_read=None):
# put only the 1st match in the data...
# because that is the one we should be interested in
if var_name in vars_read_in_file:
print(f"Warning! Variable {var_name} is already used in current file! Skipping...")
logger.info(
f"Warning! Variable {var_name} is already used in current file! Only important if the data looks wrong. Skipping..."
)
continue
else:
vars_read_in_file.append(var_name)
Expand All @@ -184,9 +192,6 @@ def read_file(self, filename, vars_to_read=None):
Flag.VALID,
np.nan,
)
# print(_file_dummy.stop_meas[t_idx])
# pass
assert True

def _unfiltered_data(self, varname) -> Data:
return self._data[varname]
Expand All @@ -200,6 +205,33 @@ def _unfiltered_variables(self) -> list[str]:
def close(self):
pass

def _get_station_loc_data(
self, filename: str, _file_dummy: EbasNasaAmesFile
) -> tuple[float, float, float]:
try:
lat = float(_file_dummy.meta["station_latitude"])
lon = float(_file_dummy.meta["station_longitude"])
alt_str = _file_dummy.meta["station_altitude"]
except KeyError:
# might not always work either
try:
lat = float(_file_dummy.meta["measurement_latitude"])
lon = float(_file_dummy.meta["measurement_longitude"])
alt_str = _file_dummy.meta["measurement_altitude"]
except KeyError:
logger.info(
f"no lat / lon found in file {filename}. Skipping the file..."
)
raise EBASPMFReaderException
try:
# usually there's a blank between the value and the unit
alt = float(alt_str.split(" ")[0])
except ValueError:
# but unfortunately not always
# remove all non numbers
alt = float(re.sub(r"[^\d.-]+", "", alt_str))
return lat, lon, alt


class EbasPmfTimeseriesEngine(AutoFilterReaderEngine.AutoFilterEngine):
def reader_class(self):
Expand Down
Loading

0 comments on commit 937cb98

Please sign in to comment.