diff --git a/src/pyaro_readers/aeronetsdareader/AeronetSdaTimeseriesReader.py b/src/pyaro_readers/aeronetsdareader/AeronetSdaTimeseriesReader.py index 7b0e1f4..c0dd50a 100644 --- a/src/pyaro_readers/aeronetsdareader/AeronetSdaTimeseriesReader.py +++ b/src/pyaro_readers/aeronetsdareader/AeronetSdaTimeseriesReader.py @@ -258,6 +258,9 @@ def __init__( ) bar.close() + def metadata(self): + return dict() + def _unfiltered_data(self, varname) -> Data: return self._data[varname] diff --git a/src/pyaro_readers/aeronetsunreader/AeronetSunTimeseriesReader.py b/src/pyaro_readers/aeronetsunreader/AeronetSunTimeseriesReader.py index bbb1965..8dd2d2c 100644 --- a/src/pyaro_readers/aeronetsunreader/AeronetSunTimeseriesReader.py +++ b/src/pyaro_readers/aeronetsunreader/AeronetSunTimeseriesReader.py @@ -18,6 +18,7 @@ Station, ) from tqdm import tqdm +import hashlib # default URL BASE_URL = "https://aeronet.gsfc.nasa.gov/data_push/V3/All_Sites_Times_Daily_Averages_AOD20.zip" @@ -109,6 +110,7 @@ def __init__( else: with open(self._filename, newline="") as csvfile: lines = csvfile.readlines() + self._revisionstr = hashlib.md5("".join(lines).encode()).hexdigest() for _hidx in range(HEADER_LINE_NO - 1): self._header.append(lines.pop(0)) @@ -191,7 +193,14 @@ def __init__( value, station, lat, lon, alt, start, end, Flag.VALID, np.nan ) bar.close() - + + def metadata(self): + metadata = dict() + if self._revisionstr is not None: + metadata["revision"] = self._revisionstr + + return metadata + def _unfiltered_data(self, varname) -> Data: return self._data[varname] diff --git a/src/pyaro_readers/ascii2netcdf/Ascii2NetcdfTimeseries.py b/src/pyaro_readers/ascii2netcdf/Ascii2NetcdfTimeseries.py index 674166f..6c2df3f 100644 --- a/src/pyaro_readers/ascii2netcdf/Ascii2NetcdfTimeseries.py +++ b/src/pyaro_readers/ascii2netcdf/Ascii2NetcdfTimeseries.py @@ -85,6 +85,9 @@ def __init__( ) return + def metadata(self): + return dict() + def _is_year_in_filters(self, year): start_year = np.datetime64(f"{year}-01-01 00:00:00") end_year = np.datetime64(f"{year}-12-31 23:59:59") diff --git a/src/pyaro_readers/harpreader/harpreader.py b/src/pyaro_readers/harpreader/harpreader.py index e9cab7d..8ba07ca 100644 --- a/src/pyaro_readers/harpreader/harpreader.py +++ b/src/pyaro_readers/harpreader/harpreader.py @@ -15,6 +15,7 @@ from tqdm import tqdm import cfunits from pyaro_readers.units_helpers import UALIASES +import hashlib logger = logging.getLogger(__name__) @@ -39,7 +40,7 @@ class AeronetHARPReader(AutoFilterReaderEngine.AutoFilterReader): def __init__( self, - file: [Path, str], + file: Path | str, filters=[], vars_to_read: list[str] = None, ): @@ -97,6 +98,20 @@ def __init__( ) bar.close() + def metadata(self): + metadata = dict() + + hash = "" + for f in self._files: + with xr.open_dataset(f) as d: + hist: str = d.attrs.get("history", "") + + hash = hashlib.md5((hash+hist).encode()).hexdigest() + + metadata["revision"] = hash + + return metadata + def _read_file_variables(self, filename) -> dict[str, str]: """Returns a mapping of variable name to unit for the dataset. diff --git a/src/pyaro_readers/netcdf_rw/Netcdf_RWTimeseries.py b/src/pyaro_readers/netcdf_rw/Netcdf_RWTimeseries.py index efa8dd0..2f9569d 100644 --- a/src/pyaro_readers/netcdf_rw/Netcdf_RWTimeseries.py +++ b/src/pyaro_readers/netcdf_rw/Netcdf_RWTimeseries.py @@ -67,6 +67,9 @@ def __init__( raise Netcdf_RWTimeseriesException(f"unable to read definition-file: {ex}") return + def metadata(self): + return dict() + def _read_json(self, file, empty): filepath = os.path.join(self._directory, file) res = empty diff --git a/src/pyaro_readers/nilupmfabsorptionreader/NILUPMFAbsorptionReader.py b/src/pyaro_readers/nilupmfabsorptionreader/NILUPMFAbsorptionReader.py index cd75c91..671322f 100644 --- a/src/pyaro_readers/nilupmfabsorptionreader/NILUPMFAbsorptionReader.py +++ b/src/pyaro_readers/nilupmfabsorptionreader/NILUPMFAbsorptionReader.py @@ -176,6 +176,9 @@ def _process_open_file( np.nan, ) + def metadata(self): + return dict() + def _unfiltered_data(self, varname) -> Data: return self._data[varname] diff --git a/src/pyaro_readers/nilupmfebas/EbasPmfReader.py b/src/pyaro_readers/nilupmfebas/EbasPmfReader.py index f1234e0..4c11752 100644 --- a/src/pyaro_readers/nilupmfebas/EbasPmfReader.py +++ b/src/pyaro_readers/nilupmfebas/EbasPmfReader.py @@ -73,6 +73,9 @@ def __init__( # filename is something else raise EBASPMFReaderException(f"No such file or directory: {filename}") + def metadata(self): + return dict() + def read_file_basic( self, filename: [Path, str], diff --git a/tests/test_AERONETTimeSeriesReader.py b/tests/test_AERONETTimeSeriesReader.py index 44c8445..1536d66 100644 --- a/tests/test_AERONETTimeSeriesReader.py +++ b/tests/test_AERONETTimeSeriesReader.py @@ -6,6 +6,8 @@ import pyaro.timeseries from pyaro.timeseries.Wrappers import VariableNameChangingReader +revision_md5 = "035e8b31b2ffdf34703206061aa13ebf" + TEST_URL = "https://pyaerocom.met.no/pyaro-suppl/testdata/aeronetsun_testdata.csv" TEST_ZIP_URL = ( "https://pyaerocom.met.no/pyaro-suppl/testdata/aeronetsun_testdata.csv.zip" @@ -44,6 +46,7 @@ def test_dl_data_unzipped(self): count += len(ts.data(var)) self.assertEqual(count, 49965) self.assertEqual(len(ts.stations()), 4) + self.assertEqual(ts.metadata()["revision"], revision_md5) def test_dl_data_zipped(self): if not self.external_resource_available(TEST_ZIP_URL): @@ -60,6 +63,7 @@ def test_dl_data_zipped(self): count += len(ts.data(var)) self.assertEqual(count, 49965) self.assertEqual(len(ts.stations()), 4) + self.assertEqual(ts.metadata()["revision"], revision_md5) def test_aeronet_data_zipped(self): if not os.path.exists("/lustre"): @@ -79,6 +83,7 @@ def test_aeronet_data_zipped(self): count += len(ts.data(var)) self.assertGreaterEqual(count, 49965) self.assertGreaterEqual(len(ts.stations()), 4) + self.assertEqual(ts.metadata()["revision"], revision_md5) def test_init(self): engine = pyaro.list_timeseries_engines()["aeronetsunreader"] diff --git a/tests/test_HARPReader.py b/tests/test_HARPReader.py index 18510c1..25a7c13 100644 --- a/tests/test_HARPReader.py +++ b/tests/test_HARPReader.py @@ -36,6 +36,8 @@ def test_1read(self): self.assertGreaterEqual(len(ts.variables()), 2) self.assertGreaterEqual(len(ts.stations()), 1) + assert ts.metadata()["revision"] == 'c45239c7df6eb158211aea32c6ba6da6' + def test_2open_directory(self): if os.path.exists(self.testdata_dir): with pyaro.open_timeseries( @@ -46,6 +48,8 @@ def test_2open_directory(self): assert isinstance(data.units, str) self.assertGreaterEqual(len(ts.variables()), 2) self.assertGreaterEqual(len(ts.stations()), 7) + + assert ts.metadata()["revision"] == "" else: pass