diff --git a/README.md b/README.md index c90c5fe..85c18fb 100644 --- a/README.md +++ b/README.md @@ -40,8 +40,8 @@ conventions. ### nilupmfebas: EBAS format (Nasa-Ames) Reader for random EBAS data in NASA-AMES format. This reader is tested only with PMF data provided by NILU, but should in principle able to read any random text file in EBAS NASA-AMES. -The variables provided contain in EBAS terms a combination of matrix and component with a number sign (#) -as seperator (e.g. `pm10_pm25#total_carbon` or `pm10#organic_carbon` or `pm10#galactosan`) +The variables provided contain in EBAS terms a combination of matrix, component and unit with a number sign (#) +as seperator (e.g. `pm10_pm25#total_carbon#ug C m-3"` or `pm10#organic_carbon##ug C m-3` or `pm10#galactosan#ng m-3`) ## Usage ### aeronetsunreader @@ -143,23 +143,16 @@ with pyaro.open_timeseries( ``` -### geocoder_reverse_natural_earth -geocoder_reverse_natural_earth is small helper to identify country codes for obs networks that don't mention the -countrycode of a station in their location data - ### nilupmfebas ```python import pyaro -TEST_URL = "/home/jang/data/Python3/pyaro-readers/tests/testdata/PMF_EBAS/NO0042G.20171109070000.20220406124026.high_vol_sampler..pm10.4mo.1w.NO01L_hvs_week_no42_pm10.NO01L_NILU_sunset_002.lev2.nas" -TEST_URL = "/home/jang/data/Python3/pyaro-readers/tests/testdata/PMF_EBAS/NO0042G.20171109070000.20220406124026.high_vol_sampler..pm10.4mo.1w.NO01L_hvs_week_no42_pm10.NO01L_NILU_sunset_002.lev2.nas" - +TEST_URL = "testdata/PMF_EBAS/NO0042G.20171109070000.20220406124026.high_vol_sampler..pm10.4mo.1w.NO01L_hvs_week_no42_pm10.NO01L_NILU_sunset_002.lev2.nas" def main(): with pyaro.open_timeseries( 'nilupmfebas', TEST_URL, filters=[] ) as ts: variables = ts.variables() for var in variables: - data = ts.data(var) print(f"var:{var} ; unit:{data.units}") # stations @@ -167,8 +160,7 @@ def main(): # start_times print(data.start_times) for idx, time in enumerate(data.start_times): - print(f"{time}_{data.values[idx]}") - + print(f"{time}: {data.values[idx]}") # stop_times data.end_times # latitudes @@ -180,5 +172,7 @@ def main(): # values data.values +if __name__ == "__main__": + main() ``` diff --git a/setup.cfg b/setup.cfg index 1bc6a6f..9a16f9c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = pyaro_readers -version = 0.0.6 +version = 0.0.7 author = MET Norway description = implementations of pyaerocom reading plugings using pyaro as interface long_description = file: README.md @@ -32,10 +32,16 @@ install_requires = package_dir = =src -packages = pyaro_readers.aeronetsunreader, pyaro_readers.aeronetsdareader, pyaro_readers.ascii2netcdf, pyaro_readers.harpreader, pyaro_readers.nilupmfebas, pyaro_readers +packages = + find: + test_require = tox:tox +[options.packages.find] +where=src + [options.package_data] +* = *.csv [options.entry_points] pyaro.timeseries = diff --git a/src/pyaro_readers/nilupmfebas/EbasPmfReader.py b/src/pyaro_readers/nilupmfebas/EbasPmfReader.py index 7506d4c..f1234e0 100644 --- a/src/pyaro_readers/nilupmfebas/EbasPmfReader.py +++ b/src/pyaro_readers/nilupmfebas/EbasPmfReader.py @@ -9,15 +9,20 @@ Station, ) from tqdm import tqdm +from pyaro_readers.units_helpers import UALIASES from pathlib import Path -import cf_units import re logger = logging.getLogger(__name__) FILL_COUNTRY_FLAG = False FILE_MASK = "*.nas" +FIELDS_TO_SKIP = ["start_time of measurement", "end_time of measurement"] + + +class EBASPMFReaderException(Exception): + pass class EbasPmfTimeseriesReader(AutoFilterReaderEngine.AutoFilterReader): @@ -26,17 +31,26 @@ def __init__( filename: [Path, str], filters=[], tqdm_desc: [str, None] = None, - ts_type: str = "daily", filemask: str = FILE_MASK, - # files: list = None, vars_to_read: list[str] = None, ): + self._filters = filters self._stations = {} self._data = {} # var -> {data-array} self._set_filters(filters) self._header = [] self._opts = {"default": ReadEbasOptions()} self._variables = {} + self._metadata = {} + + # variable include filter comes like this + # {'variables': {'include': ['PM10_density']}} + # test for variable filter + if "variables" in filters: + if "include" in filters["variables"]: + vars_to_read = filters["variables"]["include"] + self._vars_to_read = vars_to_read + logger.info(f"applying variable include filter {vars_to_read}...") realpath = Path(filename).resolve() @@ -55,16 +69,13 @@ def __init__( bar.close() elif Path(realpath).is_file(): self.read_file(realpath) - # print(realpath) - else: # filename is something else - # Error - pass + raise EBASPMFReaderException(f"No such file or directory: {filename}") def read_file_basic( self, - filename, + filename: [Path, str], ): """Read EBAS NASA Ames file @@ -82,66 +93,60 @@ def read_file_basic( return data_out - def read_file(self, filename, vars_to_read=None): + def read_file(self, filename: [Path, str], vars_to_read: list[str] = None): """Read EBAS NASA Ames file and put the data in the object""" _file_dummy = self.read_file_basic(filename) matrix = _file_dummy.meta["matrix"] vars_read_in_file = [] # multicolumn file: ebas var names come from _file_dummy.col_names_vars - for var_idx in range(len(_file_dummy.var_defs)): + for var_idx, var_def in enumerate(_file_dummy.var_defs): # continue if the variable is not an actual data variable (but e.g. time) - if not _file_dummy.var_defs[var_idx].is_var: + if not var_def.is_var: + continue + # skip additional fields... + if var_def.name in FIELDS_TO_SKIP: continue # continue if the statistcs is to be ignored try: - if ( - _file_dummy.var_defs[var_idx].statistics - in self._opts["default"].ignore_statistics - ): + if var_def.statistics in self._opts["default"].ignore_statistics: continue except KeyError: # sometimes there's no statistics: pass pass - # var_name = f"{matrix}#{_file_dummy.var_defs[var_idx].name}" - # var_name = f"{matrix}#{_file_dummy.var_defs[var_idx].name}#{_file_dummy.meta['unit']}" - var_name = f"{matrix}#{_file_dummy.var_defs[var_idx].name}#{_file_dummy.var_defs[var_idx]['unit']}" + # adjust unit string + unit = var_def.unit + if unit in UALIASES: + unit = UALIASES[unit] + var_name = f"{matrix}#{_file_dummy.var_defs[var_idx].name}#{unit}" if vars_to_read is not None: if var_name not in vars_to_read: continue if var_name not in self._variables: self._variables[var_name] = ( var_name, - _file_dummy.var_defs[var_idx]['unit'], + unit, ) - var_unit = _file_dummy.var_defs[var_idx].unit + var_unit = unit stat_name = _file_dummy.meta["station_code"] if stat_name not in self._stations: country = _file_dummy.meta["station_code"][0:2] # the location naming is not consistent # try the two we have seen so far try: - lat = float(_file_dummy.meta["station_latitude"]) - lon = float(_file_dummy.meta["station_longitude"]) - alt_str = _file_dummy.meta["station_altitude"] - except KeyError: - # might not always work either - try: - lat = float(_file_dummy.meta["measurement_latitude"]) - lon = float(_file_dummy.meta["measurement_longitude"]) - alt_str = _file_dummy.meta["measurement_altitude"] - except KeyError: - print(f"no lat / lon found in file {filename}. Skipping...") - return None - try: - # usually there's a blank between the value and the unit - alt = float(alt_str.split(" ")[0]) - except ValueError: - # but unfortunately not always - # remove all non numbers - alt = float(re.sub(r"[^\d.-]+", "", alt_str)) + lat, lon, alt = self._get_station_loc_data(filename, _file_dummy) + except EBASPMFReaderException: + return + # prepare some station based metadata + _meta_dummy = {} + _meta_dummy["file_metadata"] = { + filename: { + "meta": _file_dummy.meta, + "var_defs": _file_dummy.var_defs, + } + } self._stations[stat_name] = Station( { @@ -152,7 +157,8 @@ def read_file(self, filename, vars_to_read=None): "country": country, "url": "", "long_name": stat_name, - } + }, + metadata=_meta_dummy, ) else: lat = self._stations[stat_name].latitude @@ -162,7 +168,9 @@ def read_file(self, filename, vars_to_read=None): # put only the 1st match in the data... # because that is the one we should be interested in if var_name in vars_read_in_file: - print(f"Warning! Variable {var_name} is already used in current file! Skipping...") + logger.info( + f"Warning! Variable {var_name} is already used in current file! Only important if the data looks wrong. Skipping..." + ) continue else: vars_read_in_file.append(var_name) @@ -184,9 +192,6 @@ def read_file(self, filename, vars_to_read=None): Flag.VALID, np.nan, ) - # print(_file_dummy.stop_meas[t_idx]) - # pass - assert True def _unfiltered_data(self, varname) -> Data: return self._data[varname] @@ -200,6 +205,33 @@ def _unfiltered_variables(self) -> list[str]: def close(self): pass + def _get_station_loc_data( + self, filename: str, _file_dummy: EbasNasaAmesFile + ) -> tuple[float, float, float]: + try: + lat = float(_file_dummy.meta["station_latitude"]) + lon = float(_file_dummy.meta["station_longitude"]) + alt_str = _file_dummy.meta["station_altitude"] + except KeyError: + # might not always work either + try: + lat = float(_file_dummy.meta["measurement_latitude"]) + lon = float(_file_dummy.meta["measurement_longitude"]) + alt_str = _file_dummy.meta["measurement_altitude"] + except KeyError: + logger.info( + f"no lat / lon found in file {filename}. Skipping the file..." + ) + raise EBASPMFReaderException + try: + # usually there's a blank between the value and the unit + alt = float(alt_str.split(" ")[0]) + except ValueError: + # but unfortunately not always + # remove all non numbers + alt = float(re.sub(r"[^\d.-]+", "", alt_str)) + return lat, lon, alt + class EbasPmfTimeseriesEngine(AutoFilterReaderEngine.AutoFilterEngine): def reader_class(self): diff --git a/src/pyaro_readers/nilupmfebas/_lowlevel_helpers.py b/src/pyaro_readers/nilupmfebas/_lowlevel_helpers.py index e51fd4a..a8a72ac 100644 --- a/src/pyaro_readers/nilupmfebas/_lowlevel_helpers.py +++ b/src/pyaro_readers/nilupmfebas/_lowlevel_helpers.py @@ -15,62 +15,6 @@ logger = logging.getLogger(__name__) -def invalid_input_err_str(argname, argval, argopts): - """Just a small helper to format an input error string for functions - - Parameters - ---------- - argname : str - name of input argument - argval - (invalid) value of input argument - argopts - possible input args for arg - - Returns - ------- - str - formatted string that can be parsed to an Exception - """ - - return f"Invalid input for {argname} ({argval}), choose from {argopts}" - - -def check_dir_access(path): - """Uses multiprocessing approach to check if location can be accessed - - Parameters - ---------- - loc : str - path that is supposed to be checked - - Returns - ------- - bool - True, if location is accessible, else False - """ - if not isinstance(path, str): - return False - - return os.access(path, os.R_OK) - - -def check_write_access(path): - """Check if input location provides write access - - Parameters - ---------- - path : str - directory to be tested - - """ - if not isinstance(path, str): - # not a path - return False - - return os.access(path, os.W_OK) - - def _class_name(obj): """Returns class name of an object""" return type(obj).__name__ @@ -95,43 +39,6 @@ def validate(self, val): pass -class TypeValidator(Validator): - def __init__(self, type): - self._type = type - - def validate(self, val): - if not isinstance(val, self._type): - raise ValueError(f"need instance of {self._type}") - return val - - -class StrType(Validator): - def validate(self, val): - if not isinstance(val, str): - raise ValueError(f"need str, got {val}") - return val - - -class StrWithDefault(Validator): - def __init__(self, default: str): - self.default = default - - def validate(self, val): - if not isinstance(val, str): - if val is None: - val = self.default - else: - raise ValueError(f"need str or None, got {val}") - return val - - -class DictType(Validator): - def validate(self, val): - if not isinstance(val, dict): - raise ValueError(f"need dict, got {val}") - return val - - class FlexList(Validator): """list that can be instantated via input str, tuple or list or None""" @@ -147,41 +54,6 @@ def validate(self, val): return val -class EitherOf(Validator): - _allowed = FlexList() - - def __init__(self, allowed: list): - self._allowed = allowed - - def validate(self, val): - if not any([x == val for x in self._allowed]): - raise ValueError( - f"invalid value {val}, needs to be either of {self._allowed}." - ) - return val - - -class ListOfStrings(FlexList): - def validate(self, val): - # make sure to have a list - val = super().validate(val) - # make sure all entries are strings - if not all([isinstance(x, str) for x in val]): - raise ValueError(f"not all items are str type in input list {val}") - return val - - -class DictStrKeysListVals(Validator): - def validate(self, val: dict): - if not isinstance(val, dict): - raise ValueError(f"need dict, got {val}") - if any(not isinstance(x, str) for x in val): - raise ValueError(f"all keys need to be str type in {val}") - if any(not isinstance(x, list) for x in val.values()): - raise ValueError(f"all values need to be list type in {val}") - return val - - class Loc(abc.ABC): """Abstract descriptor representing a path location @@ -246,18 +118,6 @@ def create(self, value): pass -class DirLoc(Loc): - def create(self, value): - os.makedirs(value, exist_ok=True) - self.logger.info(f"created directory {value}") - - -class AsciiFileLoc(Loc): - def create(self, value): - self.logger.info(f"create ascii file {value}") - open(value, "w").close() - - class BrowseDict(MutableMapping): """Dictionary-like object with getattr and setattr options @@ -585,25 +445,6 @@ def merge_dicts(dict1, dict2, discard_failing=True): return new -def chk_make_subdir(base, name): - """Check if sub-directory exists in parent directory""" - d = os.path.join(base, name) - if not os.path.exists(d): - os.mkdir(d) - return d - - -def check_dirs_exist(*dirs, **add_dirs): - for d in dirs: - if not os.path.exists(d): - print(f"Creating dir: {d}") - os.mkdir(d) - for k, d in add_dirs.items(): - if not os.path.exists(d): - os.mkdir(d) - print(f"Creating dir: {d} ({k})") - - def list_to_shortstr(lst, indent=0): """Custom function to convert a list into a short string representation""" @@ -638,34 +479,6 @@ def _short_lst_fmt(lin): return s -def sort_dict_by_name(d, pref_list: list = None) -> dict: - """Sort entries of input dictionary by their names and return ordered - - Parameters - ---------- - d : dict - input dictionary - pref_list : list, optional - preferred order of items (may be subset of keys in input dict) - - Returns - ------- - dict - sorted and ordered dictionary - """ - if pref_list is None: - pref_list = [] - s = {} - sorted_keys = sorted(d) - for k in pref_list: - if k in d: - s[k] = d[k] - for k in sorted_keys: - if not k in pref_list: - s[k] = d[k] - return s - - def dict_to_str(dictionary, indent=0, ignore_null=False): """Custom function to convert dictionary into string (e.g. for print) diff --git a/src/pyaro_readers/nilupmfebas/_warnings.py b/src/pyaro_readers/nilupmfebas/_warnings.py index be99cec..e3046b5 100644 --- a/src/pyaro_readers/nilupmfebas/_warnings.py +++ b/src/pyaro_readers/nilupmfebas/_warnings.py @@ -44,19 +44,3 @@ def warn_randomly_and_add_numbers(num1, num2): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=category, message=message) yield - - -def ignore_basemap_warning(): # pragma: no cover - warnings.filterwarnings( - "ignore", r".*install Basemap$", UserWarning, "geonum", append=True - ) - - -def ignore_earth_radius_warning(): # pragma: no cover - warnings.filterwarnings( - "ignore", - "Using DEFAULT_SPHERICAL_EARTH_RADIUS", - UserWarning, - "iris.*", - append=True, - ) diff --git a/src/pyaro_readers/nilupmfebas/ebas_flags.csv b/src/pyaro_readers/nilupmfebas/ebas_flags.csv new file mode 100644 index 0000000..f0edeb5 --- /dev/null +++ b/src/pyaro_readers/nilupmfebas/ebas_flags.csv @@ -0,0 +1,153 @@ +100,'Checked by data originator. Valid measurement, overrides any invalid flags','V' +101,'Denuder capture efficiency < 75%. Valid measurement','V' +102,'CV of replicate diffusion tubes > 30 %. Valid measurement','V' +103,'CV of replicate ALPHA samplers > 15 %. Valid measurement','V' +110,'Episode data checked and accepted by data originator. Valid measurement','V' +111,'Irregular data checked and accepted by data originator. Valid measurement','V' +120,'Sample reanalysed with similar results. Valid measurement','V' +147,'Below theoretical detection limit or formal Q/A limit, but a value has been measured and reported and is considered valid','V' +185,'Possible local contamination indicated by wind direction or velocity','V' +186,'Possible local contamination indicated by single scattering albedo (auto)','V' +187,'Possible local contamination indicated by occurrence of new particles (auto)','V' +188,'Possible local contamination indicated by low wind speed (auto)','V' +189,'Possible local contamination indicated by wind from contaminated sector (auto)','V' +190,'Not corrected for cross-sensitivity to particle scattering','V' +191,'Data not truncation corrected - Valid measurement','V' +210,'Episode data checked and accepted by database co-ordinator. Valid measurement','V' +211,'Irregular data checked and accepted by database co-ordinator. Valid measurement','V' +220,'Preliminary data','V' +247,'Overlapping sample interval was corrected by the database co-ordinator. Possible wrong sample time (used for historic data only).','V' +248,'Illegal flag was removed by the database co-ordinator. Lost flag information. (used for historic data only)','V' +249,'Apparent typing error corrected. Valid measurement','V' +250,'Considerable sea salt contribution, but considered valid','V' +251,'Invalid due to large sea salt contribution','I' +256,'Invalidated by database co-ordinator','I' +257,'Extremely low value, outside four times standard deviation in a log-normal distribution','V' +258,'Extremely high value, outside four times standard deviation in a log-normal distribution','V' +259,'Unspecified error expected','I' +260,'Contamination suspected','I' +275,'Inconsistency between measured and estimated conductivity, but considered valid','V' +276,'Inconsistency discovered through ion balance calculations, but considered valid','V' +277,'Invalid due to inconsistency between measured and estimated conductivity','I' +278,'Invalid due to inconsistency discovered through ion balance calculations','I' +298,'Gold trap inconsistency in mercury monitor','V' +299,'Inconsistent with another unspecified measurement','V' +370,'For monthly values using samples partly in two month, the number of days are used for weighing the sample','V' +380,'More than 50% of the measurements are below detection limit','V' +382,'More than 75% of the measurements are below detection limit','V' +388,'Data completeness less than 66%','V' +389,'Data completeness less than 66%','I' +390,'Data completeness less than 50%','V' +391,'Data completeness less than 50%, data considered invalid','I' +392,'Data completeness less than 75%','V' +393,'Data completeness less than 75%, data considered invalid','I' +394,'Data completeness less than 90%','V' +395,'Data completeness less than 90%, data considered invalid','I' +410,'Sahara dust event','V' +411,'Aeolian dust event','V' +420,'Preliminary data','V' +440,'Reconstructed or recalculated data','V' +450,'Considerable sea salt contribution, but considered valid','V' +451,'Invalid due to large sea salt contribution','I' +452,'Invalid due to large uncertainty','I' +456,'Invalidated by data originator','I' +457,'Extremely low value, outside four times standard deviation in a lognormal distribution','V' +458,'Extremely high value, outside four times standard deviation in a lognormal distribution','V' +459,'Extreme value, unspecified error','I' +460,'Contamination suspected','I' +470,'Particulate mass concentration higher than parallell mass concentration measurement with higher cut off i.e PM1_mass > PM25_mass and PM25_mass > PM10_mass','V' +471,'Particulate mass concentration higher than parallell mass concentration measurement with higher cut off i.e PM1_mass > PM25_mass and PM25_mass > PM10_mass. Considered invalid','I' +472,'Less accurate than normal due to high concentration(s)','V' +475,'Inconsistency between measured and estimated conductivity, but considerd valid','V' +476,'Inconsistency discovered through ion balance calculations, but considerd valid','V' +477,'Invalid due to inconsistency between measured and estimated conductivity','I' +478,'Invalid due to inconsistency discovered through ion balance calculations','I' +498,'Gold trap inconsistency in mercury monitor','V' +499,'Inconsistent with another unspecified measurement','V' +521,'Bactericide was added to sample for storage under warm climate. Considered valid','V' +530,'Invalid due to too low or too high recovery','I' +531,'Low recovery, analysis inaccurate','V' +532,'Data less accurate than normal due to high field blank value','V' +533,'Filters mixed up; incorrect analysis','I' +534,'Wrong coated denuder used','I' +540,'Spectral interference in laboratory analysis','I' +541,'Gold trap passiviated by unknown compound','I' +549,'Impure chemicals','I' +555,'Pollen and/or leaf contamination, but considered valid','V' +556,'Bird droppings, but considered valid','V' +557,'Insect contamination, but considered valid','V' +558,'Dust contamination, but considered valid','V' +559,'Unspecified contamination or local influence, but considered valid','V' +565,'Pollen and/or leaf contamination, considered invalid','I' +566,'Bird droppings, considered invalid','I' +567,'Insect contamination, considered invalid','I' +568,'Dust contamination, considered invalid','I' +578,'Large sea salt contribution (ratio between marine and excess sulphate is larger than 2.0). Used for old data only. For newer data use 451/450.','I' +591,'Agricultural contamination, considered invalid','I' +593,'Industrial contamination, considered invalid','I' +599,'Unspecified contamination or local influence','I' +620,'Too high filter breakthrough, considered invalid','I' +630,'POP concentration from the polyurethane foam (PUF) only','V' +632,'Lid of polyurethane foam (PUF) sampler not closed','V' +635,' Internal temperatures too far off target value, considered invalid','I' +640,'Instrument internal relative humidity above 40%','V' +641,'Aerosol filters installed incorrectly','I' +644,'Low instrument precision and/or calibration issues','V' +645,'Exceptional traffic nearby','V' +646,'Exceptional traffic nearby','I' +647,'Fire/wood burning nearby','V' +648,'Snow sampler','V' +649,'Temporary power fail has affected sampler operation','V' +650,'Precipitation collector failure','V' +651,'Agricultural activity nearby','V' +652,'Construction/acitivity nearby','V' +653,'Sampling period shorter than normal, considered representative. Observed values reported','V' +654,'Sampling period longer than normal, considered representative. Observed values reported','V' +655,'Estimated value created by averaging or spliting samples','V' +656,'Wet-only collector failure, operated as bulk collector','V' +657,'Precipitation collector overflow. Heavy snowfall/rain shower (squall)','V' +658,'Too small air volume','I' +659,'Unspecified sampling anomaly','I' +660,'Unspecified sampling anomaly, considered valid','V' +662,'Too high sampling flow, data considered valid','V' +663,'Too high sampling flow, data considered invalid','I' +664,'Instrument flow(s) too far off target value, considered invalid','I' +665,'Filter damaged, valid','V' +666,'Filter damaged, invalid','I' +668,'Moist or wet filter, valid','V' +669,'Moist or wet filter, invalid','I' +670,'Incomplete data acquisition for multi-component data sets','I' +674,'Icing or hoar frost in the intake, considered valid','V' +675,'no visibility data available','V' +676,'station inside cloud (visibility < 1000 m)','V' +677,'Icing or hoar frost in the intake','I' +678,'Hurricane','V' +679,'Unspecified meteorological condition','V' +680,'Undefined wind direction','V' +681,'Low data capture','I' +682,'Invalid due to calibration or zero/span check. Used for Level 0.','I' +683,'Invalid due to calibration. Used for Level 0.','I' +684,'Invalid due to zero/span check. Used for Level 0.','I' +685,'Invalid due to secondary standard gas measurement. Used for Level 0.','I' +699,'Mechanical problem, unspecified reason','I' +701,'Less accurate than usual, unspecified reason. (Used only with old data, for new data see groups 6 and 5)','I' +740,'Probably biased gas/particle ratio','V' +741,'Non refractory AMS concentrations. Dont include compounds that volatalises above 600 deg C','V' +750,'H+ not measured in alkaline sample','M' +760,'Value estimated by summing up the constituents measured','V' +770,'Value above range, data element contains estimated value','V' +771,'Value above range, data element contains upper range limit','V' +780,'Value below detection or quantification limit, data element contains estimated or measured value. Use of flag 147 is encouraged.','V' +781,'Value below detection limit, data element contains detection limit','V' +782,'Low precipitation, concentration estimated','V' +783,'Low precipitation, concentration unknown','M' +784,'Low precipitation, concentration estimated','I' +797,'Data element taken from co-located instrument','V' +798,'Measurement missing (unspecified reason), data element contains estimated value. Considered valid.','V' +799,'Measurement missing (unspecified reason), data element contains estimated value','I' +890,'Concentration in precipitation undefined, no precipitation','M' +899,'Measurement undefined, unspecified reason','M' +900,'Hidden and invalidated by data originator','H' +980,'Missing due to calibration or zero/span check','M' +990,'Precipitation not measured due to snow-fall. Needed for historic data, should not be needed for new data','M' +999,'Missing measurement, unspecified reason','M' diff --git a/src/pyaro_readers/nilupmfebas/ebas_nasa_ames.py b/src/pyaro_readers/nilupmfebas/ebas_nasa_ames.py index 86b68a8..c0dd328 100644 --- a/src/pyaro_readers/nilupmfebas/ebas_nasa_ames.py +++ b/src/pyaro_readers/nilupmfebas/ebas_nasa_ames.py @@ -269,7 +269,7 @@ def __init__(self, raw_data, interpret_on_init=True): @property def FLAG_INFO(self): """Detailed information about EBAS flag definitions""" - return const.ebas_flag_info + return read_ebas_flags_file() @property def decoded(self): @@ -632,8 +632,8 @@ def read_file( lc = 0 # line counter dc = 0 # data block line counter mc = 0 # meta block counter - END_VAR_DEF = np.nan # will be set (info stored in header) - IN_DATA = False + end_var_def = np.nan # will be set (info stored in header) + in_data = False data = [] self.file = nasa_ames_file try: @@ -647,10 +647,9 @@ def read_file( return for line in lines: - if IN_DATA: # in data block (end of file) + if in_data: # in data block (end of file) try: data.append(tuple(float(x.strip()) for x in line.strip().split())) - # data.append([float(x.strip()) for x in line.strip().split()]) except Exception as e: logger.warning( f"EbasNasaAmesFile: Failed to read data row {dc}. Reason: {e}" @@ -677,18 +676,18 @@ def read_file( logger.warning(msg) else: # behind header section and before data definition (contains column defs and meta info) if mc == 0: # still in column definition - END_VAR_DEF = self._NUM_FIXLINES + self.num_cols_dependent - 1 + end_var_def = self._NUM_FIXLINES + self.num_cols_dependent - 1 NUM_HEAD_LINES = self.num_head_lines try: self.var_defs.append(self._read_vardef_line(line)) except Exception as e: logger.warning(repr(e)) - elif lc < END_VAR_DEF: + elif lc < end_var_def: self.var_defs.append(self._read_vardef_line(line)) elif lc == NUM_HEAD_LINES - 1: - IN_DATA = True + in_data = True self._data_header = h = [x.strip() for x in line.split()] # append information of first two columns to variable # definition array. @@ -707,7 +706,7 @@ def read_file( if only_head: return logger.debug("REACHED DATA BLOCK") - elif lc >= END_VAR_DEF + 2: + elif lc >= end_var_def + 2: try: name, val = line.split( ":", 1 @@ -804,3 +803,54 @@ def __str__(self): s += f"\n\n{str_underline('Data', indent=3)}" s += f"\n{self._data_short_str()}" return s + + +def read_ebas_flags_file(ebas_flags_csv: None): + """Reads file ebas_flags.csv + + Parameters + ---------- + ebas_flags_csv : str + file containing flag info + + Returns + ------- + dict + dict with loaded flag info + """ + valid = {} + values = {} + info = {} + if ebas_flags_csv is None: + ebas_flags_csv = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "ebas_flags.csv", + ) + with open(ebas_flags_csv) as fio: + for line in fio: + spl = line.strip().split(",") + num = int(spl[0].strip()) + try: + val_str = spl[-1][1:-1] + except Exception: + raise OSError( + f"Failed to read flag information in row {line} " + f"(Check if entries in ebas_flags.csv are quoted)" + ) + info_str = ",".join(spl[1:-1]) + try: + info_str = info_str[1:-1] + except Exception: + raise OSError( + f"Failed to read flag information in row {line} " + f"(Check if entries in ebas_flags.csv are quoted)" + ) + isvalid = True if val_str == "V" else False + valid[num] = isvalid + values[num] = val_str + info[num] = info_str + result = {} + result["valid"] = valid + result["info"] = info + result["vals"] = values + return result diff --git a/src/pyaro_readers/nilupmfebas/exceptions.py b/src/pyaro_readers/nilupmfebas/exceptions.py index c95b33f..724c2da 100644 --- a/src/pyaro_readers/nilupmfebas/exceptions.py +++ b/src/pyaro_readers/nilupmfebas/exceptions.py @@ -3,184 +3,9 @@ """ -class AeronetReadError(IOError): - # Aeronet reading failed somehow - pass - - -class CachingError(IOError): - pass - - -class CacheWriteError(CachingError): - pass - - -class CacheReadError(CachingError): - pass - - -class ColocationError(ValueError): - pass - - -class ColocationSetupError(ValueError): - pass - - -class CoordinateError(ValueError): - pass - - -class CoordinateNameError(CoordinateError): - pass - - -class DataRetrievalError(IOError): - pass - - -class DataCoverageError(ValueError): - pass - - -class DataDimensionError(ValueError): - pass - - -class DataIdError(ValueError): - pass - - -class DataQueryError(ValueError): - pass - - -class DataSourceError(ValueError): - pass - - -class DataUnitError(ValueError): - pass - - -class DeprecationError(AttributeError, ValueError): - pass - - -class DimensionOrderError(DataDimensionError): - pass - - -class DataExtractionError(ValueError): - pass - - -class DataSearchError(IOError): - pass - - -class EvalEntryNameError(KeyError): - pass - - class NasaAmesReadError(IOError): pass -class EbasFileError(ValueError): - pass - - -class EEAv2FileError(ValueError): - pass - - -class EntryNotAvailable(KeyError): - pass - - -class InitialisationError(ValueError): - pass - - -class FileConventionError(IOError): - pass - - -class LongitudeConstraintError(ValueError): - pass - - -class MetaDataError(AttributeError): - pass - - -class NetworkNotSupported(NotImplementedError): - pass - - -class NetworkNotImplemented(NotImplementedError): - pass - - -class NetcdfError(IOError): - pass - - -class NotInFileError(IOError): - pass - - -class ResamplingError(ValueError): - pass - - -class StationCoordinateError(CoordinateError): - pass - - -class StationNotFoundError(AttributeError): - pass - - class TimeZoneError(AttributeError): pass - - -class TimeMatchError(AttributeError): - pass - - -class TemporalResolutionError(ValueError): - pass - - -class TemporalSamplingError(ValueError): - pass - - -class UnitConversionError(ValueError): - pass - - -class UnknownRegion(ValueError): - pass - - -class UnresolvableTimeDefinitionError(DataDimensionError, NetcdfError): - """Is raised if time definition in NetCDF file is wrong and cannot be corrected""" - - pass - - -class VarNotAvailableError(DataCoverageError): - pass - - -class VariableDefinitionError(IOError): - pass - - -class VariableNotFoundError(IOError): - pass diff --git a/src/pyaro_readers/units_helpers.py b/src/pyaro_readers/units_helpers.py index 56a772a..fe1ef75 100644 --- a/src/pyaro_readers/units_helpers.py +++ b/src/pyaro_readers/units_helpers.py @@ -72,6 +72,7 @@ class UnitConversionError(ValueError): # "ug C m-3": "ug C/m3", # "ug N m-3": "ug N/m3", "ugC/m3": "ug C m-3", + "ug C/m3": "ug C m-3", "ug/m3": "ug m-3", # deposition rates (implicit) ## sulphur species @@ -98,4 +99,5 @@ class UnitConversionError(ValueError): "MM/H": "mm h-1", # others "/m": "m-1", + "ng/m3": "ng m-3", } diff --git a/tests/test_EbasPmfTimeSeriesReader.py b/tests/test_EbasPmfTimeSeriesReader.py index 3218b80..aa2e69d 100644 --- a/tests/test_EbasPmfTimeSeriesReader.py +++ b/tests/test_EbasPmfTimeSeriesReader.py @@ -1,13 +1,12 @@ import unittest -import urllib.request import os import pyaro import pyaro.timeseries -from pyaro.timeseries.Wrappers import VariableNameChangingReader +from pyaro_readers.nilupmfebas.ebas_nasa_ames import read_ebas_flags_file -class TestAERONETTimeSeriesReader(unittest.TestCase): +class TestPMFEBASTimeSeriesReader(unittest.TestCase): engine = "nilupmfebas" file = os.path.join( @@ -17,6 +16,35 @@ class TestAERONETTimeSeriesReader(unittest.TestCase): "SI0008R.20171129230000.20210615130447.low_vol_sampler..pm25.32d.1d.SI01L_ARSO_pm25vz_2.SI01L_ARSO_ECOC_1.lev2.nas", ) + test_vars = [ + "#elemental_carbon#ug C m-3", + "#galactosan#ng m-3", + "#levoglucosan#ng m-3", + "#mannosan#ng m-3", + "#organic_carbon#ug C m-3", + "#total_carbon#ug C m-3", + "pm1#elemental_carbon#ug C m-3", + "pm1#levoglucosan#ng m-3", + "pm1#organic_carbon#ug C m-3", + "pm1#total_carbon#ug C m-3", + "pm10#elemental_carbon#ug C m-3", + "pm10#galactosan#ng m-3", + "pm10#levoglucosan#ng m-3", + "pm10#mannosan#ng m-3", + "pm10#organic_carbon#ug C m-3", + "pm10#pressure#hPa", + "pm10#temperature#K", + "pm10#total_carbon#ug C m-3", + "pm10_pm25#organic_carbon#ug C m-3", + "pm10_pm25#total_carbon#ug C m-3", + "pm25#elemental_carbon#ug C m-3", + "pm25#galactosan#ng m-3", + "pm25#levoglucosan#ng m-3", + "pm25#mannosan#ng m-3", + "pm25#organic_carbon#ug C m-3", + "pm25#total_carbon#ug C m-3", + ] + testdata_dir = os.path.join( os.path.dirname(os.path.realpath(__file__)), "testdata", "PMF_EBAS" ) @@ -27,17 +55,22 @@ def test_0engine(self): def test_1open_single_file(self): with pyaro.open_timeseries(self.engine, self.file, filters=[]) as ts: self.assertGreaterEqual(len(ts.variables()), 1) + for var in ts.variables(): + assert var in self.test_vars self.assertEqual(len(ts.stations()), 1) def test_2open_directory(self): with pyaro.open_timeseries(self.engine, self.testdata_dir, filters=[]) as ts: - self.assertGreaterEqual(len(ts.variables()), 3) - self.assertEqual(len(ts.stations()), 7) + self.assertGreaterEqual(len(ts.variables()), 1) + for var in ts.variables(): + assert var in self.test_vars - # def test_3open_single_file(self): - # with pyaro.open_timeseries(self.engine, self.file, filters=[]) as ts: - # self.assertGreaterEqual(len(ts.variables()), 1) - # self.assertEqual(len(ts.stations()), 1) + def test_3open_ebascsvfile(self): + dummy = read_ebas_flags_file(None) + assert isinstance(dummy, dict) + assert isinstance(dummy["valid"], dict) + assert isinstance(dummy["info"], dict) + assert isinstance(dummy["vals"], dict) if __name__ == "__main__":