diff --git a/src/pyaro_readers/eeareader/EEATimeseriesReader.py b/src/pyaro_readers/eeareader/EEATimeseriesReader.py index 3fd5d61..2a4e738 100644 --- a/src/pyaro_readers/eeareader/EEATimeseriesReader.py +++ b/src/pyaro_readers/eeareader/EEATimeseriesReader.py @@ -108,7 +108,7 @@ def _read(filepath: Path, pyarrow_filters) -> polars.DataFrame: @dataclasses.dataclass class _Filters: - pyarrow: list[list[Any]] + pyarrow: list[tuple[str, str, datetime]] country: pyaro.timeseries.Filter.CountryFilter | None time: pyaro.timeseries.Filter.TimeBoundsFilter | None @@ -229,17 +229,21 @@ def data(self, varname: str) -> Data: def _read( self, - variable: str, + variable: str | int, ) -> _DataFrame: # https://dd.eionet.europa.eu/vocabulary/aq/pollutant - pollutant_candidates = self._metadata_pollutant.filter( - polars.col("Notation").eq(variable) - ) - if len(pollutant_candidates) == 0: - raise Exception(f"No variable ID found for {variable}") + if isinstance(variable, int): + variable_id = variable + else: + # Might be more than one, but we choose the first one + pollutant_candidates = self._metadata_pollutant.filter( + polars.col("Notation").eq(variable) + ) + if len(pollutant_candidates) == 0: + raise Exception(f"No variable ID found for {variable}") + variable_id = pollutant_candidates["Id"][0] - # Might be more than one, but we choose the first one - variable_id = pollutant_candidates["Id"][0] + filters = _transform_filters(self._filters, variable_id) # historical_path = self._data_directory.joinpath("historical") # verified_path = self._data_directory.joinpath("verified") @@ -248,8 +252,6 @@ def _read( # TODO: Enable depending on data wanted from e.g. time requested searchpaths = [unverified_path] - filters = _transform_filters(self._filters, variable_id) - dataset = polars.DataFrame( schema={ "Samplingpoint": str, @@ -268,10 +270,6 @@ def _read( ) countries = _country_code_mappings_eea.values() - assert set(i.name for i in unverified_path.iterdir()).issubset( - countries - ), "Some directories has an unknown country code" - paths = [] for countrycode in countries: if filters.country is not None: @@ -281,6 +279,9 @@ def _read( continue for searchpath in searchpaths: + assert set(i.name for i in searchpath.iterdir()).issubset( + countries + ), "Some directories has an unknown country code" countrypath = searchpath.joinpath(countrycode) if not countrypath.exists(): continue