Skip to content

Commit

Permalink
Implemented sentinel1 scale retriever on feature extraction; changed …
Browse files Browse the repository at this point in the history
…resample_spatial in s2 & s1 to not be by default; added pre_merge options before pre_mask
  • Loading branch information
GriffinBabe committed Mar 21, 2024
1 parent 36e46ad commit ecfd6d1
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 22 deletions.
38 changes: 38 additions & 0 deletions src/openeo_gfmap/features/feature_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,47 @@ def get_latlons(self, inarr: xr.DataArray) -> xr.DataArray:
},
)

def _rescale_s1_backscatter(self, arr: xr.DataArray) -> xr.DataArray:
"""Rescales the input array from uint16 to float32 decibel values.
The input array should be in uint16 format, as this optimizes memory usage in Open-EO
processes. This function is called automatically on the bands of the input array, except
if the parameter `rescale_s1` is set to False.
"""
s1_bands = ["S1-SIGMA0-VV", "S1-SIGMA0-VH", "S1-SIGMA0-HV", "S1-SIGMA0-HH"]
s1_bands_to_select = list(set(arr.bands.values) & set(s1_bands))

if len(s1_bands_to_select) == 0:
return arr

data_to_rescale = arr.sel(bands=s1_bands_to_select).astype(np.float32).data

# Assert that the values are set between 1 and 65535
if data_to_rescale.min().item() < 1 or data_to_rescale.max().item() > 65535:
raise ValueError(
"The input array should be in uint16 format, with values between 1 and 65535. "
"This restriction assures that the data was processed according to the S1 fetcher "
"preprocessor. The user can disable this scaling manually by setting the "
"`rescale_s1` parameter to False in the feature extractor."
)

# Converting back to power values
data_to_rescale = 20.0 * np.log10(data_to_rescale) - 83.0
data_to_rescale = np.power(10, data_to_rescale / 10.0)
data_to_rescale[~np.isfinite(data_to_rescale)] = np.nan

# Converting power values to decibels
data_to_rescale = 10.0 * np.log10(data_to_rescale)

# Change the bands within the array
arr.loc[dict(bands=s1_bands_to_select)] = data_to_rescale
return arr

def _execute(self, cube: XarrayDataCube, parameters: dict) -> XarrayDataCube:
arr = cube.get_array().transpose("bands", "t", "y", "x")
arr = self._common_preparations(arr, parameters)
if self._parameters.get("rescale_s1", True):
arr = self._rescale_s1_backscatter(arr)

arr = self.execute(arr).transpose("bands", "y", "x")
return XarrayDataCube(arr)

Expand Down
20 changes: 10 additions & 10 deletions src/openeo_gfmap/fetching/commons.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,22 +127,22 @@ def load_collection(
properties=load_collection_parameters,
)

# Merges additional bands continuing the operations.
pre_merge_cube = params.get("pre_merge", None)
if pre_merge_cube is not None:
assert isinstance(pre_merge_cube, openeo.DataCube), (
f"The 'pre_merge' parameter value must be an openeo datacube, "
f"got {pre_merge_cube}."
)
cube = cube.merge_cubes(pre_merge_cube)

# Peforming pre-mask optimization
pre_mask = params.get("pre_mask", None)
if pre_mask is not None:
assert isinstance(pre_mask, openeo.DataCube), (
f"The 'pre_mask' parameter must be an openeo datacube, " f"got {pre_mask}."
)
cube = cube.mask(pre_mask.resample_cube_spatial(cube))

# Include a band containing the SCL dilated band
additional_mask = params.get("additional_mask", None)
if additional_mask is not None:
assert isinstance(additional_mask, openeo.DataCube), (
f"The 'include_scl_dilation' parameter must be an openeo datacube, "
f"got {additional_mask}."
)
cube = cube.merge_cubes(additional_mask.resample_cube_spatial(cube))
cube = cube.mask(pre_mask)

if fetch_type == FetchType.POLYGON:
if isinstance(spatial_extent, str):
Expand Down
18 changes: 14 additions & 4 deletions src/openeo_gfmap/fetching/s1.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,16 @@ def s1_grd_default_processor(cube: openeo.DataCube, **params):
local_incidence_angle=False,
)

cube = resample_reproject(
cube, params.get("target_resolution", 10.0), params.get("target_crs", None)
)
# Reproject collection data to target CRS and resolution, if specified so.
# Can be disabled by setting target_resolution=None in the parameters
if params.get("target_resolution", True) is not None:
cube = resample_reproject(
cube, params.get("target_resolution", 10.0), params.get("target_crs", None)
)
elif params.get("target_crs") is not None:
raise ValueError(
"In fetching parameters: `target_crs` specified but not `target_resolution`, which is required to perform reprojection."
)

# Scaling the bands from float32 power values to uint16 for memory optimization

Expand Down Expand Up @@ -149,10 +156,13 @@ def s1_grd_default_processor(cube: openeo.DataCube, **params):
]
),
)
cube = cube.linear_scale_range(1, 65534, 1, 65534)

# Harmonizing the collection band names to the default GFMAP band names
cube = rename_bands(cube, BASE_SENTINEL1_GRD_MAPPING)

# Change the data type to uint16 for optimization purposes
cube = cube.linear_scale_range(1, 65534, 1, 65534)

return cube

return s1_grd_default_processor
Expand Down
19 changes: 13 additions & 6 deletions src/openeo_gfmap/fetching/s2.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,14 +162,21 @@ def s2_l2a_default_processor(cube: openeo.DataCube, **params):
at 10m resolution as well as band reprojection. Finally, it converts
the type of the cube values to uint16
"""
# Reproject collection data to target CRS, if specified so
cube = resample_reproject(
cube, params.get("target_resolution", 10.0), params.get("target_crs", None)
)

# Reproject collection data to target CRS and resolution, if specified so.
# Can be disabled by setting target_resolution=None in the parameters
if params.get("target_resolution", True) is not None:
cube = resample_reproject(
cube, params.get("target_resolution", 10.0), params.get("target_crs", None)
)
elif params.get("target_crs") is not None:
raise ValueError(
"In fetching parameters: `target_crs` specified but not `target_resolution`, which is required to perform reprojection."
)

# Harmonizing the collection band names to the default GFMAP band names
cube = rename_bands(cube, BASE_SENTINEL2_L2A_MAPPING)

# Change the data type to uint16, giving optimization
# Change the data type to uint16 for optimization purposes
cube = cube.linear_scale_range(0, 65534, 0, 65534)

return cube
Expand Down
60 changes: 58 additions & 2 deletions tests/test_openeo_gfmap/test_feature_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,21 @@
import xarray as xr

from openeo_gfmap import BoundingBoxExtent, FetchType, TemporalContext
from openeo_gfmap.backend import BACKEND_CONNECTIONS, Backend, BackendContext
from openeo_gfmap.backend import (
BACKEND_CONNECTIONS,
Backend,
BackendContext,
cdse_connection,
)
from openeo_gfmap.features import (
PatchFeatureExtractor,
apply_feature_extractor,
apply_feature_extractor_local,
)
from openeo_gfmap.fetching import build_sentinel2_l2a_extractor
from openeo_gfmap.fetching import (
build_sentinel1_grd_extractor,
build_sentinel2_l2a_extractor,
)

SPATIAL_CONTEXT = BoundingBoxExtent(
west=4.261,
Expand Down Expand Up @@ -49,6 +57,14 @@ def execute(self, inarr: xr.DataArray):
return rgb_bands.transpose("bands", "y", "x")


class DummyS1PassthroughExtractor(PatchFeatureExtractor):
def output_labels(self) -> list:
return ["S1-SIGMA0-VH", "S1-SIGMA0-VV"]

def execute(self, inarr: xr.DataArray):
return inarr.mean(dim="t")


class LatLonExtractor(PatchFeatureExtractor):
"""Sample extractor that compute the latitude and longitude values
and concatenates them in a new array.
Expand Down Expand Up @@ -112,6 +128,46 @@ def test_patch_feature_udf(backend: Backend, connection_fn: Callable):
assert set(output_cube.keys()) == set(["red", "green", "blue", "crs"])


CUSTOM_BACKEND_CONNECTIONS = {
Backend.CDSE: cdse_connection,
}


@pytest.mark.parametrize("backend, connection_fn", CUSTOM_BACKEND_CONNECTIONS.items())
def test_s1_rescale(backend: Backend, connection_fn: Callable):
backend_context = BackendContext(backend=backend)
connection = connection_fn()
output_path = Path(__file__).parent / f"results/s1_rescaled_features_{backend.value}.nc"

REDUCED_TEMPORAL_CONTEXT = TemporalContext(start_date="2023-06-01", end_date="2023-06-30")

bands_to_extract = ["S1-SIGMA0-VH", "S1-SIGMA0-VV"]

extractor = build_sentinel1_grd_extractor(backend_context, bands_to_extract, FetchType.TILE)

cube = extractor.get_cube(connection, SPATIAL_CONTEXT, REDUCED_TEMPORAL_CONTEXT)

features = apply_feature_extractor(
DummyS1PassthroughExtractor,
cube,
parameters={},
size=[
{"dimension": "x", "unit": "px", "value": 128},
{"dimension": "y", "unit": "px", "value": 128},
],
)

job = features.create_job(title="s1_rescale_feature_extractor", out_format="NetCDF")
job.start_and_wait()

for asset in job.get_results().get_assets():
if asset.metadata["type"].startswith("application/x-netcdf"):
asset.download(output_path)
break

assert output_path.exists()


@pytest.mark.parametrize("backend, connection_fn", BACKEND_CONNECTIONS.items())
def test_latlon_extractor(backend: Backend, connection_fn: Callable):
backend_context = BackendContext(backend=backend)
Expand Down

0 comments on commit ecfd6d1

Please sign in to comment.