Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hv check test suite #120

Merged
merged 41 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from 40 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
3a7d87a
fix: added comments in the tests
HansVRP Jun 5, 2024
cfebc1f
test feature extration unit test
HansVRP Jun 6, 2024
d8d45c8
test feature extraction unit test
HansVRP Jun 6, 2024
dd5e7da
fix: add abstract methods to dummyclass
HansVRP Jun 6, 2024
7b62eae
fix: add espg setter to feature extractor
HansVRP Jun 6, 2024
8762aa1
fix: lat lon assert
HansVRP Jun 6, 2024
d75f11c
fix: mockarray for excecute test
HansVRP Jun 6, 2024
2237b92
fix: work on mocks
HansVRP Jun 6, 2024
43b6559
fix: work on mocks
HansVRP Jun 6, 2024
ecf11d3
fix: work on mocks
HansVRP Jun 6, 2024
8ffdabe
fix: work on mocks
HansVRP Jun 6, 2024
21ab48c
fix: work on mocks
HansVRP Jun 6, 2024
8f60edf
fix: work on mocks
HansVRP Jun 6, 2024
3eb1635
fix: work on mocks
HansVRP Jun 6, 2024
a5605f2
fix: work on mocks
HansVRP Jun 6, 2024
34be739
fix: work on mocks
HansVRP Jun 6, 2024
7246d9e
fix: work on mocks
HansVRP Jun 6, 2024
428f1a3
fix: work on mocks
HansVRP Jun 6, 2024
f23230c
fix: work on mocks
HansVRP Jun 7, 2024
2fadb34
fix: work on mocks
HansVRP Jun 7, 2024
478a692
fix: work on mocks
HansVRP Jun 7, 2024
66d21c4
fix: work on mocks
HansVRP Jun 10, 2024
7ccfddb
fix: work on mocks
HansVRP Jun 10, 2024
9d76a18
fix: include test_unit_patch
HansVRP Jun 10, 2024
65cd4f7
fix: improve readibility and avoid random
HansVRP Jun 10, 2024
b621528
fix: improve readibility and avoid random
HansVRP Jun 10, 2024
ee42242
pass through precommit
HansVRP Jun 11, 2024
e59d8fb
disable integration tests
HansVRP Jun 11, 2024
8276b11
Merge branch 'main' into hv_check_test_suite
HansVRP Jun 11, 2024
4cbffc6
fix: resolve merge conflict
HansVRP Jun 11, 2024
441dec5
fix: resolve merge conflict
HansVRP Jun 11, 2024
973d959
fix: avoid mocking pyproj
HansVRP Jun 11, 2024
647ed52
fix: add assert on pyproj
HansVRP Jun 11, 2024
60cc32a
fix: remove random from test and remove mock pyproj
HansVRP Jun 12, 2024
ee46130
clean up darker
HansVRP Jun 17, 2024
76fab68
fix: skip excecute test
HansVRP Jul 4, 2024
6cb75c2
fix: skip excecute test
HansVRP Jul 4, 2024
41d408b
Merge branch 'main' into hv_check_test_suite
HansVRP Jul 5, 2024
c4da77f
remove comment
HansVRP Jul 5, 2024
7b13472
resolve merge conflict
HansVRP Jul 5, 2024
c5d1394
fix; remove .nc
HansVRP Jul 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/openeo_gfmap/features/feature_extractor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Feature extractor functionalities. Such as a base class to assist the
implementation of feature extractors of a UDF.
"""

import functools
import inspect
import logging
Expand Down Expand Up @@ -32,6 +33,8 @@ class FeatureExtractor(ABC):
"""

def __init__(self) -> None:
self._epsg = None

logging.basicConfig(level=logging.INFO)
self.logger = logging.getLogger(self.__class__.__name__)

Expand Down Expand Up @@ -88,6 +91,10 @@ def epsg(self) -> int:
"""Returns the EPSG code of the datacube."""
return self._epsg

@epsg.setter
def epsg(self, value: int):
self._epsg = value

def dependencies(self) -> list:
"""Returns the additional dependencies such as wheels or zip files.
Dependencies should be returned as a list of string, which will set-up at the top of the
Expand Down Expand Up @@ -204,6 +211,7 @@ def _rescale_s1_backscatter(self, arr: xr.DataArray) -> xr.DataArray:
arr.loc[dict(bands=s1_bands_to_select)] = data_to_rescale
return arr

# TODO to remove the fixed transpose as it contributes to unclear code.
def _execute(self, cube: XarrayDataCube, parameters: dict) -> XarrayDataCube:
arr = cube.get_array().transpose("bands", "t", "y", "x")
arr = self._common_preparations(arr, parameters)
Expand Down
1 change: 0 additions & 1 deletion src/openeo_gfmap/fetching/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from typing import Callable

import openeo
from geojson import GeoJSON

from openeo_gfmap.backend import Backend, BackendContext
from openeo_gfmap.fetching import CollectionFetcher, FetchType, _log
Expand Down
1 change: 1 addition & 0 deletions src/openeo_gfmap/inference/model_inference.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Inference functionalities. Such as a base class to assist the implementation
of inference models on an UDF.
"""

import functools
import inspect
import logging
Expand Down
1 change: 1 addition & 0 deletions src/openeo_gfmap/utils/catalogue.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Functionalities to interract with product catalogues."""

import geojson
import requests
from pyproj.crs import CRS
Expand Down
Binary file added temp.nc
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this file be commited?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably not

Binary file not shown.
14 changes: 14 additions & 0 deletions tests/test_openeo_gfmap/test_cloud_masking.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@
temporal_extent = TemporalContext(start_date="2022-11-01", end_date="2023-02-28")


# TODO; integration test; to consider if we want to move this to the python client itself.
# where do we want the official BAP implementation to end up?
# we should include an assert functionality to measure regression.
# unit test with dummy cube?
@pytest.mark.skip
@pytest.mark.parametrize("backend", backends)
def test_bap_score(backend: Backend):
connection = BACKEND_CONNECTIONS[backend]()
Expand Down Expand Up @@ -71,6 +76,11 @@ def test_bap_score(backend: Backend):
)


# TODO; integration test; to consider if we want to move this to the python client itself.
# where do we want the official BAP implementation to end up?
# we should include an assert functionality to measure regression.
# unit test with dummy cube?
@pytest.mark.skip
@pytest.mark.parametrize("backend", backends)
def test_bap_masking(backend: Backend):
connection = BACKEND_CONNECTIONS[backend]()
Expand Down Expand Up @@ -118,6 +128,10 @@ def test_bap_masking(backend: Backend):
)


# TODO; A convoluted test which contains a unit test for the intervals,
# followed with a integration test on BAP masking.
# unclear why the post-processing is included?
@pytest.mark.skip
@pytest.mark.parametrize("backend", backends)
def test_bap_quintad(backend: Backend):
connection = BACKEND_CONNECTIONS[backend]()
Expand Down
10 changes: 10 additions & 0 deletions tests/test_openeo_gfmap/test_feature_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ def execute(self, inarr: xr.DataArray) -> xr.DataArray:
return inarr.transpose("bands", "y", "x")


# TODO; A convoluted test. I would write unit test functions for the functionalities defined within the Feature extractor class.
# Then we can adapt this test to an integration/regression test
# Is the idea to test the extractor? We want to catch data unavailibility?
@pytest.mark.skip
@pytest.mark.parametrize("backend", backends)
def test_patch_feature_udf(backend: Backend):
connection = BACKEND_CONNECTIONS[backend]()
Expand Down Expand Up @@ -131,6 +135,8 @@ def test_patch_feature_udf(backend: Backend):
assert set(output_cube.keys()) == set(["red", "green", "blue", "crs"])


# TODO Similar as above, but for S1
@pytest.mark.skip
@pytest.mark.parametrize("backend", backends)
def test_s1_rescale(backend: Backend):
connection = BACKEND_CONNECTIONS[backend]()
Expand Down Expand Up @@ -174,6 +180,8 @@ def test_s1_rescale(backend: Backend):
assert output_path.exists()


# TODO Replace by unit test on the functionalities defined in PatchFeatureExtractor/PointFeatureExtractor
@pytest.mark.skip
@pytest.mark.parametrize("backend", backends)
def test_latlon_extractor(backend: Backend):
connection = BACKEND_CONNECTIONS[backend]()
Expand Down Expand Up @@ -218,6 +226,8 @@ def test_latlon_extractor(backend: Backend):
assert set(output_cube.keys()) == set(["red", "lat", "lon", "crs"])


# TODO; will local processing be part of the API?
@pytest.mark.skip
def test_patch_feature_local():
input_path = Path(__file__).parent / "resources/test_optical_cube.nc"

Expand Down
2 changes: 2 additions & 0 deletions tests/test_openeo_gfmap/test_managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from openeo_gfmap.manager.job_splitters import split_job_hex


# TODO can we instead assert on exact numbers ?
# would remove the print statement
def test_split_jobs():
dataset_path = Path(__file__).parent / "resources/wc_extraction_dataset.gpkg"

Expand Down
5 changes: 5 additions & 0 deletions tests/test_openeo_gfmap/test_model_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from pathlib import Path

import numpy as np
import pytest
import rasterio
from openeo.udf import XarrayDataCube

Expand Down Expand Up @@ -36,6 +37,8 @@
dependency_url = "https://artifactory.vgt.vito.be/artifactory/auxdata-public/openeo/onnx_dependencies_1.16.3.zip"


# TODO; as an addition we could include an assert on the output values, however this edges towards MLOPS
@pytest.mark.skip
def test_onnx_inference_local():
"""Test the ONNX Model inference locally"""
inds = load_dataarray_url(resources_file)
Expand Down Expand Up @@ -64,6 +67,8 @@ def test_onnx_inference_local():
output.to_netcdf(output_path)


# TODO; integration test of +- full pipeline
@pytest.mark.skip
def test_onnx_inference():
"""Simple test on the ONNX Model Inference class"""
connection = cdse_connection()
Expand Down
8 changes: 8 additions & 0 deletions tests/test_openeo_gfmap/test_s1_fetchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from .test_s2_fetchers import POINT_EXTRACTION_DF, test_backends, test_configurations


# integration test checks if the output S1 cube has the correct band names;
class TestS1Extractors:
"""Build collection extractor for different S1 collections on different
backends.
Expand Down Expand Up @@ -89,6 +90,8 @@ def sentinel1_grd(
for harmonierd_name in expected_harmonized_bands:
assert harmonierd_name in results.keys()

# TODO; convoluted comparisson; we can use a utility function which calculates a
# statistic for every band, better to make use of pytest.approx
def compare_sentinel1_tiles():
"""Compare the different tiles gathered from different backends,
they should be similar, if they are computed with the same
Expand Down Expand Up @@ -132,6 +135,7 @@ def compare_sentinel1_tiles():
similarity_score = arrays_cosine_similarity(first_tile, tile_to_compare)
assert similarity_score >= 0.95

# TODO integration test
def sentinel1_grd_point_based(
spatial_context: SpatialContext,
temporal_context: TemporalContext,
Expand Down Expand Up @@ -189,6 +193,7 @@ def sentinel1_grd_point_based(

df.to_parquet(str(output_file).replace(".json", ".parquet"))

# TODO integration test
def sentinel1_grd_polygon_based(
spatial_context: SpatialContext,
temporal_context: TemporalContext,
Expand Down Expand Up @@ -240,6 +245,7 @@ def sentinel1_grd_polygon_based(
assert len(extracted_files) == len(spatial_context["features"])


@pytest.mark.skip
@pytest.mark.parametrize(
"spatial_context, temporal_context, backend", test_configurations
)
Expand All @@ -252,11 +258,13 @@ def test_sentinel1_grd(
)


@pytest.mark.skip
@pytest.mark.depends(on=["test_sentinel1_grd"])
def test_compare_sentinel1_tiles():
TestS1Extractors.compare_sentinel1_tiles()


@pytest.mark.skip
@pytest.mark.parametrize("backend", test_backends)
def test_sentinel1_grd_point_based(backend: Backend):
connection = BACKEND_CONNECTIONS[backend]()
Expand Down
4 changes: 4 additions & 0 deletions tests/test_openeo_gfmap/test_s2_fetchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ def sentinel2_l2a_polygon_based(
assert len(extracted_files) == len(spatial_context["features"])


@pytest.mark.skip
@pytest.mark.parametrize(
"spatial_context, temporal_context, backend", test_configurations
)
Expand All @@ -265,11 +266,13 @@ def test_sentinel2_l2a(
)


@pytest.mark.skip
@pytest.mark.depends(on=["test_sentinel2_l2a"])
def test_compare_sentinel2_tiles():
TestS2Extractors.compare_sentinel2_tiles()


@pytest.mark.skip
@pytest.mark.parametrize("backend", test_backends)
def test_sentinel2_l2a_point_based(backend: Backend):
connection = BACKEND_CONNECTIONS[backend]()
Expand All @@ -293,6 +296,7 @@ def test_sentinel2_l2a_point_based(backend: Backend):
)


@pytest.mark.skip
@pytest.mark.parametrize("backend", test_backends)
def test_sentinel2_l2a_polygon_based(backend: Backend):
connection = BACKEND_CONNECTIONS[backend]()
Expand Down
103 changes: 103 additions & 0 deletions tests/test_openeo_gfmap/test_unit_patch_extractors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
from unittest.mock import MagicMock

import numpy as np
import pytest
import xarray as xr

from openeo_gfmap.features import PatchFeatureExtractor

LAT_HARMONIZED_NAME = "GEO-LAT"
LON_HARMONIZED_NAME = "GEO-LON"
EPSG_HARMONIZED_NAME = "GEO-EPSG"


# Mock class for the patch feature extractor
class DummyPatchFeatureExtractor(PatchFeatureExtractor):
def output_labels(self):
return ["label1", "label2"]

def execute(self, inarr: xr.DataArray) -> xr.DataArray:
return inarr # Simplified for testing purposes


@pytest.fixture
def mock_feature_extractor():
return DummyPatchFeatureExtractor()


@pytest.fixture
def mock_data_array():
return xr.DataArray(np.array([[1, 2], [3, 4]]), dims=["y", "x"])


def test_get_latlons_epsg_none(mock_feature_extractor, mock_data_array):
mock_feature_extractor._epsg = None
with pytest.raises(Exception):
mock_feature_extractor.get_latlons(mock_data_array)


def test_get_latlons_epsg_4326(mock_feature_extractor, mock_data_array):
mock_feature_extractor._epsg = 4326
result = mock_feature_extractor.get_latlons(mock_data_array)
assert LAT_HARMONIZED_NAME in result.coords["bands"].values
assert LON_HARMONIZED_NAME in result.coords["bands"].values


def test_get_latlons_reproject(mock_feature_extractor, mock_data_array):
mock_feature_extractor._epsg = (
3857 # Set the EPSG code to the desired projection (e.g., Web Mercator)
)

# Create mock coordinates matching the 'x' and 'y' dimensions
x_coords = mock_data_array.coords["x"].values
y_coords = mock_data_array.coords["y"].values

xx, yy = np.meshgrid(x_coords, y_coords)
result = mock_feature_extractor.get_latlons(mock_data_array)

# Assert the expected behavior (add your specific assertions here)
assert result is not None
assert result[0].shape == xx.shape
assert result[1].shape == yy.shape


# test rescaling
def test_rescale_s1_backscatter_valid(mock_feature_extractor, mock_data_array):
s1_bands = ["S1-SIGMA0-VV", "S1-SIGMA0-VH"]
data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], dtype=np.uint16)
mock_data_array = xr.DataArray(
data, dims=["bands", "y", "x"], coords={"bands": s1_bands}
)

result = mock_feature_extractor._rescale_s1_backscatter(mock_data_array)
assert result.dtype == np.uint16


# TODO
@pytest.mark.skip(
reason="Skipping test for since underlying excecutor needs to be changed"
)
def test_execute():
# Create an instance of the extractor
extractor = DummyPatchFeatureExtractor()
extractor._parameters = {"rescale_s1": True}

# Mock the cube
data = np.ones((1, 2, 2, 2))
mock_cube = MagicMock()
mock_cube.get_array.return_value = xr.DataArray(data, dims=["bands", "t", "y", "x"])

# Mock the methods
extractor._common_preparations = MagicMock(return_value=mock_cube.get_array())
extractor._rescale_s1_backscatter = MagicMock(return_value=mock_cube.get_array())

# Execute the method
result = extractor._execute(mock_cube, {})

# Ensure the result is correctly transposed to have dimensions ["bands", "y", "x"]
expected_dims = ["bands", "t", "y", "x"]
assert result.get_array().dims == expected_dims

# Check that the mock methods were called
extractor._common_preparations.assert_called()
extractor._rescale_s1_backscatter.assert_called()
Loading