Skip to content

Commit

Permalink
Merge pull request #38 from metno/37-harp-reader-is-missing-mandatory…
Browse files Browse the repository at this point in the history
…-parameter-filters

add filters argument
  • Loading branch information
jgriesfeller authored May 6, 2024
2 parents 1e9dc62 + 458edb7 commit 5a476bc
Show file tree
Hide file tree
Showing 6 changed files with 273 additions and 65 deletions.
6 changes: 3 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = pyaro_readers
version = 0.0.4dev
version = 0.0.5
author = MET Norway
description = implementations of pyaerocom reading plugings using pyaro as interface
long_description = file: README.md
Expand All @@ -22,7 +22,7 @@ url = https://github.com/metno/pyaro-readers
python_version = >=3.9
install_requires =
pyaro >= 0.0.6
geocoder_reverse_natural_earth >= 0.0.1
geocoder_reverse_natural_earth >= 0.0.2
netCDF4
requests
tqdm
Expand All @@ -31,7 +31,7 @@ install_requires =

package_dir =
=src
packages = pyaro_readers.aeronetsunreader, pyaro_readers.aeronetsdareader, pyaro_readers.ascii2netcdf, pyaro_readers.harpreader
packages = pyaro_readers.aeronetsunreader, pyaro_readers.aeronetsdareader, pyaro_readers.ascii2netcdf, pyaro_readers.harpreader, pyaro_readers
test_require = tox:tox

[options.package_data]
Expand Down
5 changes: 5 additions & 0 deletions src/pyaro_readers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from importlib import metadata

__version__ = metadata.version(__package__)

from .units_helpers import *
179 changes: 121 additions & 58 deletions src/pyaro_readers/harpreader/harpreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
import os
import xarray as xr
import numpy as np
from collections import namedtuple
import re
from pathlib import Path
from tqdm import tqdm
import cfunits
import pyaro
from pyaro_readers.units_helpers import UALIASES

logger = logging.getLogger(__name__)

Expand All @@ -28,26 +28,76 @@ class AeronetHARPReader(AutoFilterReaderEngine.AutoFilterReader):
Reader for netCDF files which follow the HARP convention.
"""

def __init__(self, file: str):
self._filters = []
if os.path.isfile(file):
self._file = file
FILE_MASK = "*.nc"
COORD_NAMES = [
"latitude",
"longitude",
"altitude",
"datetime_start",
"datetime_stop",
]

def __init__(
self,
file: [Path, str],
filters=[],
vars_to_read: list[str] = None,
):
self._filters = filters
realpath = Path(file).resolve()

self._data = {}
self._files = []
self._stations = {}
self._vars_to_read = vars_to_read
self._set_filters(filters)

# variable include filter comes like this
# {'variables': {'include': ['PM10_density']}}
# test for variable filter
if "variables" in filters:
if "include" in filters["variables"]:
vars_to_read = filters["variables"]["include"]
self._vars_to_read = vars_to_read
logger.info(f"applying variable include filter {vars_to_read}...")

if os.path.isfile(realpath) or os.path.isdir(realpath):
pass
else:
raise HARPReaderException(f"No such file: {file}")
raise HARPReaderException(f"No such file or directory: {file}")

with xr.open_dataset(self._file) as harp:
if harp.attrs.get("Conventions", None) != "HARP-1.0":
raise ValueError(f"File is not a HARP file.")

self._variables = self._read_file_variables()

def _unfiltered_stations(self) -> dict[str, Station]:
pass

def close(self):
pass

def _read_file_variables(self) -> dict[str, str]:
if os.path.isdir(file):
pattern = os.path.join(file, self.FILE_MASK)
self._files = glob.glob(pattern)
else:
self._files.append(file)

bar = tqdm(total=len(self._files))

for f_idx, _file in enumerate(self._files):
logger.info(f"Reading {_file}")
bar.update(1)
self._variables = self._read_file_variables(_file)
# initialise all variables if not done yet
for _var in self._variables:
# skip coordinate names
if _var in self.COORD_NAMES:
continue
if vars_to_read is not None and _var not in vars_to_read:
logger.info(f"Skipping {_var}")
continue
if _var not in self._data:
units = self._variables[_var]
data = NpStructuredData(_var, units)
self._data[_var] = data

self._get_data_from_single_file(
_file,
_var,
)
bar.close()

def _read_file_variables(self, filename) -> dict[str, str]:
"""Returns a mapping of variable name to unit for the dataset.
Returns:
Expand All @@ -57,44 +107,26 @@ def _read_file_variables(self) -> dict[str, str]:
"""
variables = {}
with xr.open_dataset(self._file, decode_cf=False) as d:
with xr.open_dataset(
filename,
decode_cf=False,
) as d:
for vname, var in d.data_vars.items():
variables[vname] = cfunits.Units(var.attrs["units"])
if vname in self._vars_to_read:
# Units in pyaro arte by definition strings, but this way
# we can make sure that cfunits understands them
# otherwise variables[vname] = var.attrs["units"] should work as well
variables[vname] = str(cfunits.Units(var.attrs["units"]))
if variables[vname] in UALIASES:
variables[vname] = UALIASES[variables[vname]]

return variables

def _unfiltered_data(self, varname: str) -> NpStructuredData:
"""Returns unfiltered data for a variable.
Parameters:
-----------
varname : str
The variable name for which to return the data.
Returns:
--------
NpStructuredArray
The data.
"""

units = self._variables[varname]
data = NpStructuredData(varname, units)

pattern = ""
if os.path.isdir(self._file):
pattern = os.path.join(self._file, "*.nc")
else:
pattern = self._file

for f in glob.glob(pattern):
self._get_data_from_single_file(f, varname, data)

return data

def _get_data_from_single_file(
self, file: str, varname: str, data: NpStructuredData
) -> None:
self,
file: str,
varname: str,
) -> bool:
"""Loads data for a variable from a single file.
Parameters:
Expand All @@ -107,20 +139,30 @@ def _get_data_from_single_file(
Data instance to which the data will be appended to in-place.
"""
dt = xr.open_dataset(file)
dt = xr.load_dataset(file)

if dt.attrs.get("Conventions", None) != "HARP-1.0":
raise ValueError(f"File {file} is not a HARP file.")

values = dt[varname].to_numpy()
# take station name from filename since there is no name in the data...
stat_name = os.path.basename(file).split("-")[2]

values_length = len(values)
start_time = np.asarray(dt["datetime_start"])
stop_time = np.asarray(dt["datetime_stop"])
# start and stop time have been the same in the 1st data revision
# check that and assume hourly data if it's still the case
t_diff = stop_time - start_time
if t_diff.sum() == 0:
stop_time = stop_time + np.timedelta64(1, "h")
lat = np.asarray([dt["latitude"]] * values_length)
long = np.asarray([dt["longitude"]] * values_length)
station = np.asarray([np.nan] * values_length)
station = np.asarray([stat_name] * values_length)
altitude = np.asarray([dt["altitude"]] * values_length)

flags = np.asarray([Flag.VALID] * values_length)
data.append(
self._data[varname].append(
value=values,
station=station,
latitude=lat,
Expand All @@ -133,14 +175,35 @@ def _get_data_from_single_file(
standard_deviation=np.asarray([np.nan] * values_length),
)

# fill self._stations

if not stat_name in self._stations:
self._stations[stat_name] = Station(
{
"station": stat_name,
"longitude": long[0],
"latitude": lat[0],
"altitude": altitude[0],
"country": "NN",
"url": "",
"long_name": stat_name,
}
)

def _unfiltered_variables(self) -> list[str]:
"""Returns a list of the variable names.
Returns:
list[str]
The list of variable names.
"""
return list(self._variables.keys())
return self._data.keys()

def _unfiltered_data(self, varname) -> Data:
return self._data[varname]

def _unfiltered_stations(self) -> dict[str, Station]:
return self._stations

def close(self):
pass
Expand Down
101 changes: 101 additions & 0 deletions src/pyaro_readers/units_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import pandas as pd


class UnitConversionError(ValueError):
pass


#: default frequency for rates variables (e.g. deposition, precip)
RATES_FREQ_DEFAULT = "d"

# 1. DEFINITION OF ATOM and MOLECULAR MASSES

# Atoms
M_O = 15.999 # u
M_S = 32.065 # u
M_N = 14.0067 # u
M_H = 1.00784 # u

# Molecules
M_SO2 = M_S + 2 * M_O
M_SO4 = M_S + 4 * M_O

M_NO2 = M_N + 2 * M_O
M_NO3 = M_N + 3 * M_O

M_NH3 = M_N + 3 * M_H
M_NH4 = M_N + 4 * M_H

# Unit conversion and custom units definitions

# 2.1 Other conversion factors
HA_TO_SQM = 10000 # hectar to square metre.

# 3. LOOKUP TABLE FOR CONVERSION FACTORS

#: Custom unit conversion factors for certain variables
#: columns: variable -> from unit -> to_unit -> conversion
#: factor
UCONV_MUL_FACS = pd.DataFrame(
[
# ["dryso4", "mg/m2/d", "mgS m-2 d-1", M_S / M_SO4],
# ["drynh4", "mg/m2/d", "mgN m-2 d-1", M_N/ M_NH4],
# ["concso4", "ug S/m3", "ug m-3", M_SO4 / M_S],
# ["SO4ugSm3", "ug/m3", "ug S m-3", M_S / M_SO4],
# ["concso4pm25", "ug S/m3", "ug m-3", M_SO4 / M_S],
# ["concso4pm10", "ug S/m3", "ug m-3", M_SO4 / M_S],
["concso2", "ug S/m3", "ug m-3", M_SO2 / M_S],
["concbc", "ug C/m3", "ug m-3", 1.0],
["concoa", "ug C/m3", "ug m-3", 1.0],
["concoc", "ug C/m3", "ug m-3", 1.0],
["conctc", "ug C/m3", "ug m-3", 1.0],
# a little hacky for ratpm10pm25...
# ["ratpm10pm25", "ug m-3", "1", 1.0],
["concpm25", "ug m-3", "1", 1.0],
["concpm10", "ug m-3", "1", 1.0],
["concno2", "ug N/m3", "ug m-3", M_NO2 / M_N],
# ["concno3", "ug N/m3", "ug m-3", M_NO3 / M_N],
["concnh3", "ug N/m3", "ug m-3", M_NH3 / M_N],
# ["concnh4", "ug N/m3", "ug m-3", M_NH4 / M_N],
["wetso4", "kg S/ha", "kg m-2", M_SO4 / M_S / HA_TO_SQM],
["concso4pr", "mg S/L", "g m-3", M_SO4 / M_S],
],
columns=["var_name", "from", "to", "fac"],
).set_index(["var_name", "from"])

# may be used to specify alternative names for custom units defined
# in UCONV_MUL_FACS

UALIASES = {
# mass concentrations
# "ug S m-3": "ug S/m3",
# "ug C m-3": "ug C/m3",
# "ug N m-3": "ug N/m3",
"ugC/m3": "ug C m-3",
"ug/m3": "ug m-3",
# deposition rates (implicit)
## sulphur species
"mgS/m2": "mg S m-2",
"mgSm-2": "mg S m-2",
## nitrogen species
"mgN/m2": "mg N m-2",
"mgNm-2": "mg N m-2",
# deposition rates (explicit)
## sulphur species
"mgS/m2/h": "mg S m-2 h-1",
"mg/m2/h": "mg m-2 h-1",
"mgS/m**2/h": "mg S m-2 h-1",
"mgSm-2h-1": "mg S m-2 h-1",
"mgSm**-2h-1": "mg S m-2 h-1",
"mgS/m2/d": "mg S m-2 d-1",
## nitrogen species
"mgN/m2/h": "mg N m-2 h-1",
"mgN/m**2/h": "mg N m-2 h-1",
"mgNm-2h-1": "mg N m-2 h-1",
"mgNm**-2h-1": "mg N m-2 h-1",
"mgN/m2/d": "mg N m-2 d-1",
## others
"MM/H": "mm h-1",
# others
"/m": "m-1",
}
Loading

0 comments on commit 5a476bc

Please sign in to comment.