Open-EO · HansVRP · Jul 15, 2024 · Jun 5, 2024 · Jun 6, 2024 · Jun 6, 2024
diff --git a/src/openeo_gfmap/features/feature_extractor.py b/src/openeo_gfmap/features/feature_extractor.py
@@ -1,6 +1,7 @@
 """Feature extractor functionalities. Such as a base class to assist the
 implementation of feature extractors of a UDF.
 """
+
 import functools
 import inspect
 import logging
@@ -32,6 +33,8 @@ class FeatureExtractor(ABC):
     """
 
     def __init__(self) -> None:
+        self._epsg = None
+
         logging.basicConfig(level=logging.INFO)
         self.logger = logging.getLogger(self.__class__.__name__)
 
@@ -88,6 +91,10 @@ def epsg(self) -> int:
         """Returns the EPSG code of the datacube."""
         return self._epsg
 
+    @epsg.setter
+    def epsg(self, value: int):
+        self._epsg = value
+
     def dependencies(self) -> list:
         """Returns the additional dependencies such as wheels or zip files.
         Dependencies should be returned as a list of string, which will set-up at the top of the
@@ -204,6 +211,7 @@ def _rescale_s1_backscatter(self, arr: xr.DataArray) -> xr.DataArray:
         arr.loc[dict(bands=s1_bands_to_select)] = data_to_rescale
         return arr
 
+    # TODO to remove the fixed transpose as it contributes to unclear code.
     def _execute(self, cube: XarrayDataCube, parameters: dict) -> XarrayDataCube:
         arr = cube.get_array().transpose("bands", "t", "y", "x")
         arr = self._common_preparations(arr, parameters)

diff --git a/src/openeo_gfmap/fetching/generic.py b/src/openeo_gfmap/fetching/generic.py
@@ -5,7 +5,6 @@
 from typing import Callable
 
 import openeo
-from geojson import GeoJSON
 
 from openeo_gfmap.backend import Backend, BackendContext
 from openeo_gfmap.fetching import CollectionFetcher, FetchType, _log

diff --git a/src/openeo_gfmap/inference/model_inference.py b/src/openeo_gfmap/inference/model_inference.py
@@ -1,6 +1,7 @@
 """Inference functionalities. Such as a base class to assist the implementation
 of inference models on an UDF.
 """
+
 import functools
 import inspect
 import logging

diff --git a/src/openeo_gfmap/utils/catalogue.py b/src/openeo_gfmap/utils/catalogue.py
@@ -1,4 +1,5 @@
 """Functionalities to interract with product catalogues."""
+
 import geojson
 import requests
 from pyproj.crs import CRS

diff --git a/tests/test_openeo_gfmap/test_cloud_masking.py b/tests/test_openeo_gfmap/test_cloud_masking.py
@@ -29,6 +29,11 @@
 temporal_extent = TemporalContext(start_date="2022-11-01", end_date="2023-02-28")
 
 
+# TODO; integration test; to consider if we want to move this to the python client itself.
+# where do we want the official BAP implementation to end up?
+# we should include an assert functionality to measure regression.
+# unit test with dummy cube?
+@pytest.mark.skip
 @pytest.mark.parametrize("backend", backends)
 def test_bap_score(backend: Backend):
     connection = BACKEND_CONNECTIONS[backend]()
@@ -71,6 +76,11 @@ def test_bap_score(backend: Backend):
             )
 
 
+# TODO; integration test; to consider if we want to move this to the python client itself.
+# where do we want the official BAP implementation to end up?
+# we should include an assert functionality to measure regression.
+# unit test with dummy cube?
+@pytest.mark.skip
 @pytest.mark.parametrize("backend", backends)
 def test_bap_masking(backend: Backend):
     connection = BACKEND_CONNECTIONS[backend]()
@@ -118,6 +128,10 @@ def test_bap_masking(backend: Backend):
             )
 
 
+# TODO; A convoluted test which contains a unit test for the intervals,
+# followed with a integration test on BAP masking.
+# unclear why the post-processing is included?
+@pytest.mark.skip
 @pytest.mark.parametrize("backend", backends)
 def test_bap_quintad(backend: Backend):
     connection = BACKEND_CONNECTIONS[backend]()

diff --git a/tests/test_openeo_gfmap/test_feature_extractors.py b/tests/test_openeo_gfmap/test_feature_extractors.py
@@ -88,6 +88,10 @@ def execute(self, inarr: xr.DataArray) -> xr.DataArray:
         return inarr.transpose("bands", "y", "x")
 
 
+# TODO; A convoluted test. I would write unit test functions for the functionalities defined within the Feature extractor class.
+# Then we can adapt this test to an integration/regression test
+# Is the idea to test the extractor? We want to catch data unavailibility?
+@pytest.mark.skip
 @pytest.mark.parametrize("backend", backends)
 def test_patch_feature_udf(backend: Backend):
     connection = BACKEND_CONNECTIONS[backend]()
@@ -131,6 +135,8 @@ def test_patch_feature_udf(backend: Backend):
     assert set(output_cube.keys()) == set(["red", "green", "blue", "crs"])
 
 
+# TODO Similar as above, but for S1
+@pytest.mark.skip
 @pytest.mark.parametrize("backend", backends)
 def test_s1_rescale(backend: Backend):
     connection = BACKEND_CONNECTIONS[backend]()
@@ -174,6 +180,8 @@ def test_s1_rescale(backend: Backend):
     assert output_path.exists()
 
 
+# TODO Replace by unit test on the functionalities defined in PatchFeatureExtractor/PointFeatureExtractor
+@pytest.mark.skip
 @pytest.mark.parametrize("backend", backends)
 def test_latlon_extractor(backend: Backend):
     connection = BACKEND_CONNECTIONS[backend]()
@@ -218,6 +226,8 @@ def test_latlon_extractor(backend: Backend):
     assert set(output_cube.keys()) == set(["red", "lat", "lon", "crs"])
 
 
+# TODO; will local processing be part of the API?
+@pytest.mark.skip
 def test_patch_feature_local():
     input_path = Path(__file__).parent / "resources/test_optical_cube.nc"
 

diff --git a/tests/test_openeo_gfmap/test_managers.py b/tests/test_openeo_gfmap/test_managers.py
@@ -7,6 +7,8 @@
 from openeo_gfmap.manager.job_splitters import split_job_hex
 
 
+# TODO can we instead assert on exact numbers ?
+# would remove the print statement
 def test_split_jobs():
     dataset_path = Path(__file__).parent / "resources/wc_extraction_dataset.gpkg"
 

diff --git a/tests/test_openeo_gfmap/test_model_inference.py b/tests/test_openeo_gfmap/test_model_inference.py
@@ -3,6 +3,7 @@
 from pathlib import Path
 
 import numpy as np
+import pytest
 import rasterio
 from openeo.udf import XarrayDataCube
 
@@ -36,6 +37,8 @@
 dependency_url = "https://artifactory.vgt.vito.be/artifactory/auxdata-public/openeo/onnx_dependencies_1.16.3.zip"
 
 
+# TODO; as an addition we could include an assert on the output values, however this edges towards MLOPS
+@pytest.mark.skip
 def test_onnx_inference_local():
     """Test the ONNX Model inference locally"""
     inds = load_dataarray_url(resources_file)
@@ -64,6 +67,8 @@ def test_onnx_inference_local():
     output.to_netcdf(output_path)
 
 
+# TODO; integration test of +- full pipeline
+@pytest.mark.skip
 def test_onnx_inference():
     """Simple test on the ONNX Model Inference class"""
     connection = cdse_connection()

diff --git a/tests/test_openeo_gfmap/test_s1_fetchers.py b/tests/test_openeo_gfmap/test_s1_fetchers.py
@@ -29,6 +29,7 @@
 from .test_s2_fetchers import POINT_EXTRACTION_DF, test_backends, test_configurations
 
 
+# integration test checks if the output S1 cube has the correct band names;
 class TestS1Extractors:
     """Build collection extractor for different S1 collections on different
     backends.
@@ -89,6 +90,8 @@ def sentinel1_grd(
         for harmonierd_name in expected_harmonized_bands:
             assert harmonierd_name in results.keys()
 
+    # TODO; convoluted comparisson; we can use a utility function which calculates a
+    # statistic for every band, better to make use of pytest.approx
     def compare_sentinel1_tiles():
         """Compare the different tiles gathered from different backends,
         they should be similar, if they are computed with the same
@@ -132,6 +135,7 @@ def compare_sentinel1_tiles():
             similarity_score = arrays_cosine_similarity(first_tile, tile_to_compare)
             assert similarity_score >= 0.95
 
+    # TODO integration test
     def sentinel1_grd_point_based(
         spatial_context: SpatialContext,
         temporal_context: TemporalContext,
@@ -189,6 +193,7 @@ def sentinel1_grd_point_based(
 
         df.to_parquet(str(output_file).replace(".json", ".parquet"))
 
+    # TODO integration test
     def sentinel1_grd_polygon_based(
         spatial_context: SpatialContext,
         temporal_context: TemporalContext,
@@ -240,6 +245,7 @@ def sentinel1_grd_polygon_based(
         assert len(extracted_files) == len(spatial_context["features"])
 
 
+@pytest.mark.skip
 @pytest.mark.parametrize(
     "spatial_context, temporal_context, backend", test_configurations
 )
@@ -252,11 +258,13 @@ def test_sentinel1_grd(
     )
 
 
+@pytest.mark.skip
 @pytest.mark.depends(on=["test_sentinel1_grd"])
 def test_compare_sentinel1_tiles():
     TestS1Extractors.compare_sentinel1_tiles()
 
 
+@pytest.mark.skip
 @pytest.mark.parametrize("backend", test_backends)
 def test_sentinel1_grd_point_based(backend: Backend):
     connection = BACKEND_CONNECTIONS[backend]()

diff --git a/tests/test_openeo_gfmap/test_s2_fetchers.py b/tests/test_openeo_gfmap/test_s2_fetchers.py
@@ -253,6 +253,7 @@ def sentinel2_l2a_polygon_based(
         assert len(extracted_files) == len(spatial_context["features"])
 
 
+@pytest.mark.skip
 @pytest.mark.parametrize(
     "spatial_context, temporal_context, backend", test_configurations
 )
@@ -265,11 +266,13 @@ def test_sentinel2_l2a(
     )
 
 
+@pytest.mark.skip
 @pytest.mark.depends(on=["test_sentinel2_l2a"])
 def test_compare_sentinel2_tiles():
     TestS2Extractors.compare_sentinel2_tiles()
 
 
+@pytest.mark.skip
 @pytest.mark.parametrize("backend", test_backends)
 def test_sentinel2_l2a_point_based(backend: Backend):
     connection = BACKEND_CONNECTIONS[backend]()
@@ -293,6 +296,7 @@ def test_sentinel2_l2a_point_based(backend: Backend):
     )
 
 
+@pytest.mark.skip
 @pytest.mark.parametrize("backend", test_backends)
 def test_sentinel2_l2a_polygon_based(backend: Backend):
     connection = BACKEND_CONNECTIONS[backend]()

diff --git a/tests/test_openeo_gfmap/test_unit_patch_extractors.py b/tests/test_openeo_gfmap/test_unit_patch_extractors.py
@@ -0,0 +1,103 @@
+from unittest.mock import MagicMock
+
+import numpy as np
+import pytest
+import xarray as xr
+
+from openeo_gfmap.features import PatchFeatureExtractor
+
+LAT_HARMONIZED_NAME = "GEO-LAT"
+LON_HARMONIZED_NAME = "GEO-LON"
+EPSG_HARMONIZED_NAME = "GEO-EPSG"
+
+
+# Mock class for the patch feature extractor
+class DummyPatchFeatureExtractor(PatchFeatureExtractor):
+    def output_labels(self):
+        return ["label1", "label2"]
+
+    def execute(self, inarr: xr.DataArray) -> xr.DataArray:
+        return inarr  # Simplified for testing purposes
+
+
+@pytest.fixture
+def mock_feature_extractor():
+    return DummyPatchFeatureExtractor()
+
+
+@pytest.fixture
+def mock_data_array():
+    return xr.DataArray(np.array([[1, 2], [3, 4]]), dims=["y", "x"])
+
+
+def test_get_latlons_epsg_none(mock_feature_extractor, mock_data_array):
+    mock_feature_extractor._epsg = None
+    with pytest.raises(Exception):
+        mock_feature_extractor.get_latlons(mock_data_array)
+
+
+def test_get_latlons_epsg_4326(mock_feature_extractor, mock_data_array):
+    mock_feature_extractor._epsg = 4326
+    result = mock_feature_extractor.get_latlons(mock_data_array)
+    assert LAT_HARMONIZED_NAME in result.coords["bands"].values
+    assert LON_HARMONIZED_NAME in result.coords["bands"].values
+
+
+def test_get_latlons_reproject(mock_feature_extractor, mock_data_array):
+    mock_feature_extractor._epsg = (
+        3857  # Set the EPSG code to the desired projection (e.g., Web Mercator)
+    )
+
+    # Create mock coordinates matching the 'x' and 'y' dimensions
+    x_coords = mock_data_array.coords["x"].values
+    y_coords = mock_data_array.coords["y"].values
+
+    xx, yy = np.meshgrid(x_coords, y_coords)
+    result = mock_feature_extractor.get_latlons(mock_data_array)
+
+    # Assert the expected behavior (add your specific assertions here)
+    assert result is not None
+    assert result[0].shape == xx.shape
+    assert result[1].shape == yy.shape
+
+
+# test rescaling
+def test_rescale_s1_backscatter_valid(mock_feature_extractor, mock_data_array):
+    s1_bands = ["S1-SIGMA0-VV", "S1-SIGMA0-VH"]
+    data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], dtype=np.uint16)
+    mock_data_array = xr.DataArray(
+        data, dims=["bands", "y", "x"], coords={"bands": s1_bands}
+    )
+
+    result = mock_feature_extractor._rescale_s1_backscatter(mock_data_array)
+    assert result.dtype == np.uint16
+
+
+# TODO
+@pytest.mark.skip(
+    reason="Skipping test for since underlying excecutor needs to be changed"
+)
+def test_execute():
+    # Create an instance of the extractor
+    extractor = DummyPatchFeatureExtractor()
+    extractor._parameters = {"rescale_s1": True}
+
+    # Mock the cube
+    data = np.ones((1, 2, 2, 2))
+    mock_cube = MagicMock()
+    mock_cube.get_array.return_value = xr.DataArray(data, dims=["bands", "t", "y", "x"])
+
+    # Mock the methods
+    extractor._common_preparations = MagicMock(return_value=mock_cube.get_array())
+    extractor._rescale_s1_backscatter = MagicMock(return_value=mock_cube.get_array())
+
+    # Execute the method
+    result = extractor._execute(mock_cube, {})
+
+    # Ensure the result is correctly transposed to have dimensions ["bands", "y", "x"]
+    expected_dims = ["bands", "t", "y", "x"]
+    assert result.get_array().dims == expected_dims
+
+    # Check that the mock methods were called
+    extractor._common_preparations.assert_called()
+    extractor._rescale_s1_backscatter.assert_called()