diff --git a/src/openeo_gfmap/features/feature_extractor.py b/src/openeo_gfmap/features/feature_extractor.py index abf2323..b0a46e3 100644 --- a/src/openeo_gfmap/features/feature_extractor.py +++ b/src/openeo_gfmap/features/feature_extractor.py @@ -1,6 +1,7 @@ """Feature extractor functionalities. Such as a base class to assist the implementation of feature extractors of a UDF. """ + import functools import inspect import logging @@ -32,6 +33,8 @@ class FeatureExtractor(ABC): """ def __init__(self) -> None: + self._epsg = None + logging.basicConfig(level=logging.INFO) self.logger = logging.getLogger(self.__class__.__name__) @@ -88,6 +91,10 @@ def epsg(self) -> int: """Returns the EPSG code of the datacube.""" return self._epsg + @epsg.setter + def epsg(self, value: int): + self._epsg = value + def dependencies(self) -> list: """Returns the additional dependencies such as wheels or zip files. Dependencies should be returned as a list of string, which will set-up at the top of the @@ -204,6 +211,7 @@ def _rescale_s1_backscatter(self, arr: xr.DataArray) -> xr.DataArray: arr.loc[dict(bands=s1_bands_to_select)] = data_to_rescale return arr + # TODO to remove the fixed transpose as it contributes to unclear code. def _execute(self, cube: XarrayDataCube, parameters: dict) -> XarrayDataCube: arr = cube.get_array().transpose("bands", "t", "y", "x") arr = self._common_preparations(arr, parameters) diff --git a/src/openeo_gfmap/fetching/generic.py b/src/openeo_gfmap/fetching/generic.py index 203c4df..21dc209 100644 --- a/src/openeo_gfmap/fetching/generic.py +++ b/src/openeo_gfmap/fetching/generic.py @@ -5,7 +5,6 @@ from typing import Callable import openeo -from geojson import GeoJSON from openeo_gfmap.backend import Backend, BackendContext from openeo_gfmap.fetching import CollectionFetcher, FetchType, _log diff --git a/src/openeo_gfmap/inference/model_inference.py b/src/openeo_gfmap/inference/model_inference.py index 05c1bea..ca7ff41 100644 --- a/src/openeo_gfmap/inference/model_inference.py +++ b/src/openeo_gfmap/inference/model_inference.py @@ -1,6 +1,7 @@ """Inference functionalities. Such as a base class to assist the implementation of inference models on an UDF. """ + import functools import inspect import logging diff --git a/src/openeo_gfmap/utils/catalogue.py b/src/openeo_gfmap/utils/catalogue.py index d20648b..d6fd60e 100644 --- a/src/openeo_gfmap/utils/catalogue.py +++ b/src/openeo_gfmap/utils/catalogue.py @@ -1,4 +1,5 @@ """Functionalities to interract with product catalogues.""" + import geojson import requests from pyproj.crs import CRS diff --git a/tests/test_openeo_gfmap/test_cloud_masking.py b/tests/test_openeo_gfmap/test_cloud_masking.py index 95a811d..feae6ff 100644 --- a/tests/test_openeo_gfmap/test_cloud_masking.py +++ b/tests/test_openeo_gfmap/test_cloud_masking.py @@ -29,6 +29,11 @@ temporal_extent = TemporalContext(start_date="2022-11-01", end_date="2023-02-28") +# TODO; integration test; to consider if we want to move this to the python client itself. +# where do we want the official BAP implementation to end up? +# we should include an assert functionality to measure regression. +# unit test with dummy cube? +@pytest.mark.skip @pytest.mark.parametrize("backend", backends) def test_bap_score(backend: Backend): connection = BACKEND_CONNECTIONS[backend]() @@ -71,6 +76,11 @@ def test_bap_score(backend: Backend): ) +# TODO; integration test; to consider if we want to move this to the python client itself. +# where do we want the official BAP implementation to end up? +# we should include an assert functionality to measure regression. +# unit test with dummy cube? +@pytest.mark.skip @pytest.mark.parametrize("backend", backends) def test_bap_masking(backend: Backend): connection = BACKEND_CONNECTIONS[backend]() @@ -118,6 +128,10 @@ def test_bap_masking(backend: Backend): ) +# TODO; A convoluted test which contains a unit test for the intervals, +# followed with a integration test on BAP masking. +# unclear why the post-processing is included? +@pytest.mark.skip @pytest.mark.parametrize("backend", backends) def test_bap_quintad(backend: Backend): connection = BACKEND_CONNECTIONS[backend]() diff --git a/tests/test_openeo_gfmap/test_feature_extractors.py b/tests/test_openeo_gfmap/test_feature_extractors.py index b283dcf..630d1bb 100644 --- a/tests/test_openeo_gfmap/test_feature_extractors.py +++ b/tests/test_openeo_gfmap/test_feature_extractors.py @@ -88,6 +88,10 @@ def execute(self, inarr: xr.DataArray) -> xr.DataArray: return inarr.transpose("bands", "y", "x") +# TODO; A convoluted test. I would write unit test functions for the functionalities defined within the Feature extractor class. +# Then we can adapt this test to an integration/regression test +# Is the idea to test the extractor? We want to catch data unavailibility? +@pytest.mark.skip @pytest.mark.parametrize("backend", backends) def test_patch_feature_udf(backend: Backend): connection = BACKEND_CONNECTIONS[backend]() @@ -131,6 +135,8 @@ def test_patch_feature_udf(backend: Backend): assert set(output_cube.keys()) == set(["red", "green", "blue", "crs"]) +# TODO Similar as above, but for S1 +@pytest.mark.skip @pytest.mark.parametrize("backend", backends) def test_s1_rescale(backend: Backend): connection = BACKEND_CONNECTIONS[backend]() @@ -174,6 +180,8 @@ def test_s1_rescale(backend: Backend): assert output_path.exists() +# TODO Replace by unit test on the functionalities defined in PatchFeatureExtractor/PointFeatureExtractor +@pytest.mark.skip @pytest.mark.parametrize("backend", backends) def test_latlon_extractor(backend: Backend): connection = BACKEND_CONNECTIONS[backend]() @@ -218,6 +226,8 @@ def test_latlon_extractor(backend: Backend): assert set(output_cube.keys()) == set(["red", "lat", "lon", "crs"]) +# TODO; will local processing be part of the API? +@pytest.mark.skip def test_patch_feature_local(): input_path = Path(__file__).parent / "resources/test_optical_cube.nc" diff --git a/tests/test_openeo_gfmap/test_managers.py b/tests/test_openeo_gfmap/test_managers.py index 71a65aa..cabb24c 100644 --- a/tests/test_openeo_gfmap/test_managers.py +++ b/tests/test_openeo_gfmap/test_managers.py @@ -7,6 +7,8 @@ from openeo_gfmap.manager.job_splitters import split_job_hex +# TODO can we instead assert on exact numbers ? +# would remove the print statement def test_split_jobs(): dataset_path = Path(__file__).parent / "resources/wc_extraction_dataset.gpkg" diff --git a/tests/test_openeo_gfmap/test_model_inference.py b/tests/test_openeo_gfmap/test_model_inference.py index 937bba4..f1619e0 100644 --- a/tests/test_openeo_gfmap/test_model_inference.py +++ b/tests/test_openeo_gfmap/test_model_inference.py @@ -3,6 +3,7 @@ from pathlib import Path import numpy as np +import pytest import rasterio from openeo.udf import XarrayDataCube @@ -36,6 +37,8 @@ dependency_url = "https://artifactory.vgt.vito.be/artifactory/auxdata-public/openeo/onnx_dependencies_1.16.3.zip" +# TODO; as an addition we could include an assert on the output values, however this edges towards MLOPS +@pytest.mark.skip def test_onnx_inference_local(): """Test the ONNX Model inference locally""" inds = load_dataarray_url(resources_file) @@ -64,6 +67,8 @@ def test_onnx_inference_local(): output.to_netcdf(output_path) +# TODO; integration test of +- full pipeline +@pytest.mark.skip def test_onnx_inference(): """Simple test on the ONNX Model Inference class""" connection = cdse_connection() diff --git a/tests/test_openeo_gfmap/test_s1_fetchers.py b/tests/test_openeo_gfmap/test_s1_fetchers.py index c36c099..8f1e644 100644 --- a/tests/test_openeo_gfmap/test_s1_fetchers.py +++ b/tests/test_openeo_gfmap/test_s1_fetchers.py @@ -29,6 +29,7 @@ from .test_s2_fetchers import POINT_EXTRACTION_DF, test_backends, test_configurations +# integration test checks if the output S1 cube has the correct band names; class TestS1Extractors: """Build collection extractor for different S1 collections on different backends. @@ -89,6 +90,8 @@ def sentinel1_grd( for harmonierd_name in expected_harmonized_bands: assert harmonierd_name in results.keys() + # TODO; convoluted comparisson; we can use a utility function which calculates a + # statistic for every band, better to make use of pytest.approx def compare_sentinel1_tiles(): """Compare the different tiles gathered from different backends, they should be similar, if they are computed with the same @@ -132,6 +135,7 @@ def compare_sentinel1_tiles(): similarity_score = arrays_cosine_similarity(first_tile, tile_to_compare) assert similarity_score >= 0.95 + # TODO integration test def sentinel1_grd_point_based( spatial_context: SpatialContext, temporal_context: TemporalContext, @@ -189,6 +193,7 @@ def sentinel1_grd_point_based( df.to_parquet(str(output_file).replace(".json", ".parquet")) + # TODO integration test def sentinel1_grd_polygon_based( spatial_context: SpatialContext, temporal_context: TemporalContext, @@ -240,6 +245,7 @@ def sentinel1_grd_polygon_based( assert len(extracted_files) == len(spatial_context["features"]) +@pytest.mark.skip @pytest.mark.parametrize( "spatial_context, temporal_context, backend", test_configurations ) @@ -252,11 +258,13 @@ def test_sentinel1_grd( ) +@pytest.mark.skip @pytest.mark.depends(on=["test_sentinel1_grd"]) def test_compare_sentinel1_tiles(): TestS1Extractors.compare_sentinel1_tiles() +@pytest.mark.skip @pytest.mark.parametrize("backend", test_backends) def test_sentinel1_grd_point_based(backend: Backend): connection = BACKEND_CONNECTIONS[backend]() diff --git a/tests/test_openeo_gfmap/test_s2_fetchers.py b/tests/test_openeo_gfmap/test_s2_fetchers.py index e7318bb..a39336d 100644 --- a/tests/test_openeo_gfmap/test_s2_fetchers.py +++ b/tests/test_openeo_gfmap/test_s2_fetchers.py @@ -253,6 +253,7 @@ def sentinel2_l2a_polygon_based( assert len(extracted_files) == len(spatial_context["features"]) +@pytest.mark.skip @pytest.mark.parametrize( "spatial_context, temporal_context, backend", test_configurations ) @@ -265,11 +266,13 @@ def test_sentinel2_l2a( ) +@pytest.mark.skip @pytest.mark.depends(on=["test_sentinel2_l2a"]) def test_compare_sentinel2_tiles(): TestS2Extractors.compare_sentinel2_tiles() +@pytest.mark.skip @pytest.mark.parametrize("backend", test_backends) def test_sentinel2_l2a_point_based(backend: Backend): connection = BACKEND_CONNECTIONS[backend]() @@ -293,6 +296,7 @@ def test_sentinel2_l2a_point_based(backend: Backend): ) +@pytest.mark.skip @pytest.mark.parametrize("backend", test_backends) def test_sentinel2_l2a_polygon_based(backend: Backend): connection = BACKEND_CONNECTIONS[backend]() diff --git a/tests/test_openeo_gfmap/test_unit_patch_extractors.py b/tests/test_openeo_gfmap/test_unit_patch_extractors.py new file mode 100644 index 0000000..4660675 --- /dev/null +++ b/tests/test_openeo_gfmap/test_unit_patch_extractors.py @@ -0,0 +1,103 @@ +from unittest.mock import MagicMock + +import numpy as np +import pytest +import xarray as xr + +from openeo_gfmap.features import PatchFeatureExtractor + +LAT_HARMONIZED_NAME = "GEO-LAT" +LON_HARMONIZED_NAME = "GEO-LON" +EPSG_HARMONIZED_NAME = "GEO-EPSG" + + +# Mock class for the patch feature extractor +class DummyPatchFeatureExtractor(PatchFeatureExtractor): + def output_labels(self): + return ["label1", "label2"] + + def execute(self, inarr: xr.DataArray) -> xr.DataArray: + return inarr # Simplified for testing purposes + + +@pytest.fixture +def mock_feature_extractor(): + return DummyPatchFeatureExtractor() + + +@pytest.fixture +def mock_data_array(): + return xr.DataArray(np.array([[1, 2], [3, 4]]), dims=["y", "x"]) + + +def test_get_latlons_epsg_none(mock_feature_extractor, mock_data_array): + mock_feature_extractor._epsg = None + with pytest.raises(Exception): + mock_feature_extractor.get_latlons(mock_data_array) + + +def test_get_latlons_epsg_4326(mock_feature_extractor, mock_data_array): + mock_feature_extractor._epsg = 4326 + result = mock_feature_extractor.get_latlons(mock_data_array) + assert LAT_HARMONIZED_NAME in result.coords["bands"].values + assert LON_HARMONIZED_NAME in result.coords["bands"].values + + +def test_get_latlons_reproject(mock_feature_extractor, mock_data_array): + mock_feature_extractor._epsg = ( + 3857 # Set the EPSG code to the desired projection (e.g., Web Mercator) + ) + + # Create mock coordinates matching the 'x' and 'y' dimensions + x_coords = mock_data_array.coords["x"].values + y_coords = mock_data_array.coords["y"].values + + xx, yy = np.meshgrid(x_coords, y_coords) + result = mock_feature_extractor.get_latlons(mock_data_array) + + # Assert the expected behavior (add your specific assertions here) + assert result is not None + assert result[0].shape == xx.shape + assert result[1].shape == yy.shape + + +# test rescaling +def test_rescale_s1_backscatter_valid(mock_feature_extractor, mock_data_array): + s1_bands = ["S1-SIGMA0-VV", "S1-SIGMA0-VH"] + data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], dtype=np.uint16) + mock_data_array = xr.DataArray( + data, dims=["bands", "y", "x"], coords={"bands": s1_bands} + ) + + result = mock_feature_extractor._rescale_s1_backscatter(mock_data_array) + assert result.dtype == np.uint16 + + +# TODO +@pytest.mark.skip( + reason="Skipping test for since underlying excecutor needs to be changed" +) +def test_execute(): + # Create an instance of the extractor + extractor = DummyPatchFeatureExtractor() + extractor._parameters = {"rescale_s1": True} + + # Mock the cube + data = np.ones((1, 2, 2, 2)) + mock_cube = MagicMock() + mock_cube.get_array.return_value = xr.DataArray(data, dims=["bands", "t", "y", "x"]) + + # Mock the methods + extractor._common_preparations = MagicMock(return_value=mock_cube.get_array()) + extractor._rescale_s1_backscatter = MagicMock(return_value=mock_cube.get_array()) + + # Execute the method + result = extractor._execute(mock_cube, {}) + + # Ensure the result is correctly transposed to have dimensions ["bands", "y", "x"] + expected_dims = ["bands", "t", "y", "x"] + assert result.get_array().dims == expected_dims + + # Check that the mock methods were called + extractor._common_preparations.assert_called() + extractor._rescale_s1_backscatter.assert_called()