Skip to content

Commit

Permalink
[WIP] Revision strings
Browse files Browse the repository at this point in the history
  • Loading branch information
thorbjoernl committed Jul 31, 2024
1 parent 6924bd1 commit dc84b8a
Show file tree
Hide file tree
Showing 9 changed files with 50 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,9 @@ def __init__(
)
bar.close()

def metadata(self):
return dict()

def _unfiltered_data(self, varname) -> Data:
return self._data[varname]

Expand Down
11 changes: 10 additions & 1 deletion src/pyaro_readers/aeronetsunreader/AeronetSunTimeseriesReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
Station,
)
from tqdm import tqdm
import hashlib

# default URL
BASE_URL = "https://aeronet.gsfc.nasa.gov/data_push/V3/All_Sites_Times_Daily_Averages_AOD20.zip"
Expand Down Expand Up @@ -109,6 +110,7 @@ def __init__(
else:
with open(self._filename, newline="") as csvfile:
lines = csvfile.readlines()
self._revisionstr = hashlib.md5("".join(lines).encode()).hexdigest()

for _hidx in range(HEADER_LINE_NO - 1):
self._header.append(lines.pop(0))
Expand Down Expand Up @@ -191,7 +193,14 @@ def __init__(
value, station, lat, lon, alt, start, end, Flag.VALID, np.nan
)
bar.close()


def metadata(self):
metadata = dict()
if self._revisionstr is not None:
metadata["revision"] = self._revisionstr

return metadata

def _unfiltered_data(self, varname) -> Data:
return self._data[varname]

Expand Down
3 changes: 3 additions & 0 deletions src/pyaro_readers/ascii2netcdf/Ascii2NetcdfTimeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ def __init__(
)
return

def metadata(self):
return dict()

def _is_year_in_filters(self, year):
start_year = np.datetime64(f"{year}-01-01 00:00:00")
end_year = np.datetime64(f"{year}-12-31 23:59:59")
Expand Down
17 changes: 16 additions & 1 deletion src/pyaro_readers/harpreader/harpreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from tqdm import tqdm
import cfunits
from pyaro_readers.units_helpers import UALIASES
import hashlib

logger = logging.getLogger(__name__)

Expand All @@ -39,7 +40,7 @@ class AeronetHARPReader(AutoFilterReaderEngine.AutoFilterReader):

def __init__(
self,
file: [Path, str],
file: Path | str,
filters=[],
vars_to_read: list[str] = None,
):
Expand Down Expand Up @@ -97,6 +98,20 @@ def __init__(
)
bar.close()

def metadata(self):
metadata = dict()

hash = ""
for f in self._files:
with xr.open_dataset(f) as d:
hist: str = d.attrs.get("history", "")

hash = hashlib.md5((hash+hist).encode()).hexdigest()

metadata["revision"] = hash

return metadata

def _read_file_variables(self, filename) -> dict[str, str]:
"""Returns a mapping of variable name to unit for the dataset.
Expand Down
3 changes: 3 additions & 0 deletions src/pyaro_readers/netcdf_rw/Netcdf_RWTimeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ def __init__(
raise Netcdf_RWTimeseriesException(f"unable to read definition-file: {ex}")
return

def metadata(self):
return dict()

def _read_json(self, file, empty):
filepath = os.path.join(self._directory, file)
res = empty
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,9 @@ def _process_open_file(
np.nan,
)

def metadata(self):
return dict()

def _unfiltered_data(self, varname) -> Data:
return self._data[varname]

Expand Down
3 changes: 3 additions & 0 deletions src/pyaro_readers/nilupmfebas/EbasPmfReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ def __init__(
# filename is something else
raise EBASPMFReaderException(f"No such file or directory: {filename}")

def metadata(self):
return dict()

def read_file_basic(
self,
filename: [Path, str],
Expand Down
5 changes: 5 additions & 0 deletions tests/test_AERONETTimeSeriesReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import pyaro.timeseries
from pyaro.timeseries.Wrappers import VariableNameChangingReader

revision_md5 = "035e8b31b2ffdf34703206061aa13ebf"

TEST_URL = "https://pyaerocom.met.no/pyaro-suppl/testdata/aeronetsun_testdata.csv"
TEST_ZIP_URL = (
"https://pyaerocom.met.no/pyaro-suppl/testdata/aeronetsun_testdata.csv.zip"
Expand Down Expand Up @@ -44,6 +46,7 @@ def test_dl_data_unzipped(self):
count += len(ts.data(var))
self.assertEqual(count, 49965)
self.assertEqual(len(ts.stations()), 4)
self.assertEqual(ts.metadata()["revision"], revision_md5)

def test_dl_data_zipped(self):
if not self.external_resource_available(TEST_ZIP_URL):
Expand All @@ -60,6 +63,7 @@ def test_dl_data_zipped(self):
count += len(ts.data(var))
self.assertEqual(count, 49965)
self.assertEqual(len(ts.stations()), 4)
self.assertEqual(ts.metadata()["revision"], revision_md5)

def test_aeronet_data_zipped(self):
if not os.path.exists("/lustre"):
Expand All @@ -79,6 +83,7 @@ def test_aeronet_data_zipped(self):
count += len(ts.data(var))
self.assertGreaterEqual(count, 49965)
self.assertGreaterEqual(len(ts.stations()), 4)
self.assertEqual(ts.metadata()["revision"], revision_md5)

def test_init(self):
engine = pyaro.list_timeseries_engines()["aeronetsunreader"]
Expand Down
4 changes: 4 additions & 0 deletions tests/test_HARPReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def test_1read(self):
self.assertGreaterEqual(len(ts.variables()), 2)
self.assertGreaterEqual(len(ts.stations()), 1)

assert ts.metadata()["revision"] == 'c45239c7df6eb158211aea32c6ba6da6'

def test_2open_directory(self):
if os.path.exists(self.testdata_dir):
with pyaro.open_timeseries(
Expand All @@ -46,6 +48,8 @@ def test_2open_directory(self):
assert isinstance(data.units, str)
self.assertGreaterEqual(len(ts.variables()), 2)
self.assertGreaterEqual(len(ts.stations()), 7)

assert ts.metadata()["revision"] == ""
else:
pass

Expand Down

0 comments on commit dc84b8a

Please sign in to comment.