Skip to content

Commit

Permalink
HARP reader (#36)
Browse files Browse the repository at this point in the history
Implement reader for (some) harp files.

---------

Co-authored-by: Thorbjørn Lundin <[email protected]>
  • Loading branch information
thorbjoernl and thorbjoernl authored Apr 25, 2024
1 parent a43296d commit 1e9dc62
Show file tree
Hide file tree
Showing 9 changed files with 229 additions and 5 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ jobs:
experimental: [ false ]
os: [ ubuntu-22.04 ]
steps:
- name: Install system packages
run: |
sudo apt update
sudo apt install libudunits2-dev
- uses: actions/checkout@v3
with:
lfs: 'true'
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,5 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/

.vscode/
33 changes: 31 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ The MSC-W database contains the EBAS database for 1990-2021 and the EEA_Airquip
contain already hourly data if enough hours have been measured. Therefore, `resolution` is a
required parameter.

### harp
Reader for NetCDF files that follow the [HARP](http://stcorp.github.io/harp/doc/html/conventions/)
conventions.


## Usage
### aeronetsunreader
Expand Down Expand Up @@ -86,7 +90,7 @@ with pyaro.open_timeseries("aeronetsdareader", TEST_URL, filters=[], fill_countr
import pyaro
TEST_URL = "/lustre/storeB/project/fou/kl/emep/Auxiliary/NILU/"
with pyaro.open_timeseries(
'ascii2netcdf', EBAS_URL, resolution="daily", filters=[]
'ascii2netcdf', TEST_URL, resolution="daily", filters=[]
) as ts:
data = ts.data("sulphur_dioxide_in_air")
data.units # ug
Expand All @@ -104,9 +108,34 @@ with pyaro.open_timeseries(
data.altitudes
# values
data.values

```

### harpreader
```python
import pyaro

TEST_URL = "/lustre/storeB/project/aerocom/aerocom1/AEROCOM_OBSDATA/CNEMC/aggregated/sinca-surface-157-999999-001.nc"
with pyaro.open_timeseries(
'harp', TEST_URL
) as ts:
data = ts.data("CO_volume_mixing_ratio")
data.units # ppm
# stations
data.stations
# start_times
data.start_times
# stop_times
data.end_times
# latitudes
data.latitudes
# longitudes
data.longitudes
# altitudes
data.altitudes
# values
data.values

```

### geocoder_reverse_natural_earth
geocoder_reverse_natural_earth is small helper to identify country codes for obs networks that don't mention the
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml_future
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ dependencies = [
"shapely",
"rtree",
"tqdm",
"xarray",
"cfunits"
]

[tool.setuptools]
Expand Down
8 changes: 5 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = pyaro_readers
version = 0.0.3dev
version = 0.0.4dev
author = MET Norway
description = implementations of pyaerocom reading plugings using pyaro as interface
long_description = file: README.md
Expand All @@ -26,10 +26,12 @@ install_requires =
netCDF4
requests
tqdm
xarray
cfunits

package_dir =
=src
packages = pyaro_readers.aeronetsunreader, pyaro_readers.aeronetsdareader, pyaro_readers.ascii2netcdf
packages = pyaro_readers.aeronetsunreader, pyaro_readers.aeronetsdareader, pyaro_readers.ascii2netcdf, pyaro_readers.harpreader
test_require = tox:tox

[options.package_data]
Expand All @@ -39,7 +41,7 @@ pyaro.timeseries =
aeronetsunreader = pyaro_readers.aeronetsunreader:AeronetSunTimeseriesEngine
aeronetsdareader = pyaro_readers.aeronetsdareader:AeronetSdaTimeseriesEngine
ascii2netcdf = pyaro_readers.ascii2netcdf:Ascii2NetcdfTimeseriesEngine

harp = pyaro_readers.harpreader:AeronetHARPEngine

[tox:tox]
min_version = 4.0
Expand Down
4 changes: 4 additions & 0 deletions src/pyaro_readers/harpreader/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .harpreader import (
AeronetHARPEngine,
AeronetHARPReader,
)
160 changes: 160 additions & 0 deletions src/pyaro_readers/harpreader/harpreader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
import glob
import inspect
from pyaro.timeseries import (
AutoFilterReaderEngine,
Station,
Data,
NpStructuredData,
Flag,
)
import logging
import os
import xarray as xr
import numpy as np
from collections import namedtuple
import re
import cfunits
import pyaro

logger = logging.getLogger(__name__)


class HARPReaderException(Exception):
pass


class AeronetHARPReader(AutoFilterReaderEngine.AutoFilterReader):
"""
Reader for netCDF files which follow the HARP convention.
"""

def __init__(self, file: str):
self._filters = []
if os.path.isfile(file):
self._file = file
else:
raise HARPReaderException(f"No such file: {file}")

with xr.open_dataset(self._file) as harp:
if harp.attrs.get("Conventions", None) != "HARP-1.0":
raise ValueError(f"File is not a HARP file.")

self._variables = self._read_file_variables()

def _unfiltered_stations(self) -> dict[str, Station]:
pass

def close(self):
pass

def _read_file_variables(self) -> dict[str, str]:
"""Returns a mapping of variable name to unit for the dataset.
Returns:
--------
dict[str, str] :
A dictionary mapping variable name to its corresponding unit.
"""
variables = {}
with xr.open_dataset(self._file, decode_cf=False) as d:
for vname, var in d.data_vars.items():
variables[vname] = cfunits.Units(var.attrs["units"])

return variables

def _unfiltered_data(self, varname: str) -> NpStructuredData:
"""Returns unfiltered data for a variable.
Parameters:
-----------
varname : str
The variable name for which to return the data.
Returns:
--------
NpStructuredArray
The data.
"""

units = self._variables[varname]
data = NpStructuredData(varname, units)

pattern = ""
if os.path.isdir(self._file):
pattern = os.path.join(self._file, "*.nc")
else:
pattern = self._file

for f in glob.glob(pattern):
self._get_data_from_single_file(f, varname, data)

return data

def _get_data_from_single_file(
self, file: str, varname: str, data: NpStructuredData
) -> None:
"""Loads data for a variable from a single file.
Parameters:
-----------
file : str
The file path.
varname : str
The variable name.
data : NpStructuredData
Data instance to which the data will be appended to in-place.
"""
dt = xr.open_dataset(file)

values = dt[varname].to_numpy()

values_length = len(values)
start_time = np.asarray(dt["datetime_start"])
stop_time = np.asarray(dt["datetime_stop"])
lat = np.asarray([dt["latitude"]] * values_length)
long = np.asarray([dt["longitude"]] * values_length)
station = np.asarray([np.nan] * values_length)
altitude = np.asarray([dt["altitude"]] * values_length)

flags = np.asarray([Flag.VALID] * values_length)
data.append(
value=values,
station=station,
latitude=lat,
longitude=long,
altitude=altitude,
start_time=start_time,
end_time=stop_time,
# TODO: Currently assuming that all observations are valid.
flag=flags,
standard_deviation=np.asarray([np.nan] * values_length),
)

def _unfiltered_variables(self) -> list[str]:
"""Returns a list of the variable names.
Returns:
list[str]
The list of variable names.
"""
return list(self._variables.keys())

def close(self):
pass


class AeronetHARPEngine(AutoFilterReaderEngine.AutoFilterEngine):
def reader_class(self):
return AeronetHARPReader

def open(self, filename: str, *args, **kwargs) -> AeronetHARPReader:
return self.reader_class()(filename, *args, **kwargs)

def description(self):
return inspect.doc(self)

def url(self):
return "https://github.com/metno/pyaro-readers"
18 changes: 18 additions & 0 deletions tests/test_HARPReader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import unittest
import pyaro
import pyaro.timeseries
import cfunits


class TestHARPReader(unittest.TestCase):
engine = "harp"

def test_1read(self):
with pyaro.open_timeseries(
self.engine,
"tests/testdata/sinca-surface-157-999999-001.nc",
) as ts:
data = ts.data("CO_volume_mixing_ratio")

self.assertGreater(len(data), 10000)
self.assertEqual(data.units, cfunits.Units("ppm"))
3 changes: 3 additions & 0 deletions tests/testdata/sinca-surface-157-999999-001.nc
Git LFS file not shown

0 comments on commit 1e9dc62

Please sign in to comment.