Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

154-s1orbitselection #158

Merged
merged 8 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions src/openeo_gfmap/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""This sub-module contains utilitary function and tools for OpenEO-GFMap"""

import logging

from openeo_gfmap.utils.build_df import load_json
from openeo_gfmap.utils.intervals import quintad_intervals
from openeo_gfmap.utils.netcdf import update_nc_attributes
Expand All @@ -12,6 +14,18 @@
select_sar_bands,
)

_log = logging.getLogger(__name__)
_log.setLevel(logging.INFO)

ch = logging.StreamHandler()
ch.setLevel(logging.INFO)

formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
ch.setFormatter(formatter)

_log.addHandler(ch)


__all__ = [
"load_json",
"normalize_array",
Expand Down
132 changes: 113 additions & 19 deletions src/openeo_gfmap/utils/catalogue.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Optional

import geojson
import pandas as pd
import requests
from pyproj.crs import CRS
from rasterio.warp import transform_bounds
Expand All @@ -17,6 +18,7 @@
SpatialContext,
TemporalContext,
)
from openeo_gfmap.utils import _log

request_sessions: Optional[requests.Session] = None

Expand All @@ -41,13 +43,21 @@ class UncoveredS1Exception(Exception):


def _parse_cdse_products(response: dict):
"""Parses the geometry of products from the CDSE catalogue."""
geoemetries = []
"""Parses the geometry and timestamps of products from the CDSE catalogue."""
geometries = []
timestamps = []
products = response["features"]

for product in products:
geoemetries.append(shape(product["geometry"]))
return geoemetries
if "geometry" in product and "startDate" in product["properties"]:
geometries.append(shape(product["geometry"]))
timestamps.append(pd.to_datetime(product["properties"]["startDate"]))
else:
_log.warning(
"Cannot parse product %s does not have a geometry or timestamp.",
product["properties"]["id"],
)
return geometries, timestamps


def _query_cdse_catalogue(
Expand Down Expand Up @@ -133,14 +143,45 @@ def _check_cdse_catalogue(
return len(grd_tiles) > 0


def _compute_max_gap_days(
temporal_extent: TemporalContext, timestamps: list[pd.DatetimeIndex]
) -> int:
"""Computes the maximum temporal gap in days from the timestamps parsed from the catalogue.
Requires the start and end date to be included in the timestamps to compute the gap before
and after the first and last observation.

Parameters
----------
temporal_extent : TemporalContext
The temporal extent to be checked. Same as used to query the catalogue.
timestamps : list[pd.DatetimeIndex]
The list of timestamps parsed from the catalogue and to compute the gap from.

Returns
-------
days : int
The maximum temporal gap in days.
"""
# Computes max temporal gap. Include requested start and end date so we dont miss
# any start or end gap before first/last observation
timestamps = pd.DatetimeIndex(
sorted(
[pd.to_datetime(temporal_extent.start_date, utc=True)]
+ timestamps
+ [pd.to_datetime(temporal_extent.end_date, utc=True)]
)
)
return timestamps.to_series().diff().max().days


def s1_area_per_orbitstate_vvvh(
backend: BackendContext,
spatial_extent: SpatialContext,
temporal_extent: TemporalContext,
) -> dict:
"""
Evaluates for both the ascending and descending state orbits the area of interesection for the
available products with a VV&VH polarisation.
Evaluates for both the ascending and descending state orbits the area of interesection and
maximum temporal gap for the available products with a VV&VH polarisation.

Parameters
----------
Expand All @@ -155,8 +196,8 @@ def s1_area_per_orbitstate_vvvh(
Returns
------
dict
Keys containing the orbit state and values containing the total area of intersection in
km^2
Keys containing the orbit state and values containing the total area of intersection and
in km^2 and maximum temporal gap in days.
"""
if isinstance(spatial_extent, geojson.FeatureCollection):
# Transform geojson into shapely geometry and compute bounds
Expand Down Expand Up @@ -184,7 +225,7 @@ def s1_area_per_orbitstate_vvvh(

# Queries the products in the catalogues
if backend.backend in [Backend.CDSE, Backend.CDSE_STAGING, Backend.FED]:
ascending_products = _parse_cdse_products(
ascending_products, ascending_timestamps = _parse_cdse_products(
_query_cdse_catalogue(
"Sentinel1",
bounds,
Expand All @@ -193,7 +234,7 @@ def s1_area_per_orbitstate_vvvh(
polarisation="VV%26VH",
)
)
descending_products = _parse_cdse_products(
descending_products, descending_timestamps = _parse_cdse_products(
_query_cdse_catalogue(
"Sentinel1",
bounds,
Expand Down Expand Up @@ -221,13 +262,19 @@ def s1_area_per_orbitstate_vvvh(
return {
"ASCENDING": {
"full_overlap": ascending_covers,
"max_temporal_gap": _compute_max_gap_days(
temporal_extent, ascending_timestamps
),
"area": sum(
product.intersection(spatial_extent).area
for product in ascending_products
),
},
"DESCENDING": {
"full_overlap": descending_covers,
"max_temporal_gap": _compute_max_gap_days(
temporal_extent, descending_timestamps
),
"area": sum(
product.intersection(spatial_extent).area
for product in descending_products
Expand All @@ -240,9 +287,15 @@ def select_s1_orbitstate_vvvh(
backend: BackendContext,
spatial_extent: SpatialContext,
temporal_extent: TemporalContext,
max_temporal_gap: int = 60,
) -> str:
"""Selects the orbit state that covers the most area of intersection for the
available products with a VV&VH polarisation.
"""Selects the orbit state based on some predefined rules that
are checked in sequential order:
1. prefer an orbit with full coverage over the requested bounds
2. prefer an orbit with a maximum temporal gap under a
predefined threshold
3. prefer the orbit that covers the most area of intersection
for the available products

Parameters
----------
Expand All @@ -253,6 +306,8 @@ def select_s1_orbitstate_vvvh(
The spatial extent to be checked, it will check within its bounding box.
temporal_extent : TemporalContext
The temporal period to be checked.
max_temporal_gap: int, optional, default: 30
The maximum temporal gap in days to be considered for the orbit state.

Returns
------
Expand All @@ -265,21 +320,60 @@ def select_s1_orbitstate_vvvh(

ascending_overlap = areas["ASCENDING"]["full_overlap"]
descending_overlap = areas["DESCENDING"]["full_overlap"]
ascending_gap_too_large = areas["ASCENDING"]["max_temporal_gap"] > max_temporal_gap
descending_gap_too_large = (
areas["DESCENDING"]["max_temporal_gap"] > max_temporal_gap
)

orbit_choice = None

if not ascending_overlap and not descending_overlap:
raise UncoveredS1Exception(
"No product available to fully cover the requested area in both orbit states."
)

# Rule 1: Prefer an orbit with full coverage over the requested bounds
if ascending_overlap and not descending_overlap:
return "ASCENDING"
orbit_choice = "ASCENDING"
reason = "Only orbit fully covering the requested area."
elif descending_overlap and not ascending_overlap:
return "DESCENDING"
orbit_choice = "DESCENDING"
reason = "Only orbit fully covering the requested area."

# Rule 2: Prefer an orbit with a maximum temporal gap under a predefined threshold
elif ascending_gap_too_large and not descending_gap_too_large:
orbit_choice = "DESCENDING"
reason = (
"Only orbit with temporal gap under the threshold. "
f"{areas['DESCENDING']['max_temporal_gap']} days < {max_temporal_gap} days"
)
elif descending_gap_too_large and not ascending_gap_too_large:
orbit_choice = "ASCENDING"
reason = (
"Only orbit with temporal gap under the threshold. "
f"{areas['ASCENDING']['max_temporal_gap']} days < {max_temporal_gap} days"
)
# Rule 3: Prefer the orbit that covers the most area of intersection
# for the available products
elif ascending_overlap and descending_overlap:
ascending_cover_area = areas["ASCENDING"]["area"]
descending_cover_area = areas["DESCENDING"]["area"]

# Selects the orbit state that covers the most area
if ascending_cover_area > descending_cover_area:
return "ASCENDING"
orbit_choice = "ASCENDING"
reason = (
"Orbit has more cumulative intersected area. "
f"{ascending_cover_area} > {descending_cover_area}"
)
else:
return "DESCENDING"
reason = (
"Orbit has more cumulative intersected area. "
f"{descending_cover_area} > {ascending_cover_area}"
)
orbit_choice = "DESCENDING"

raise UncoveredS1Exception(
"No product available to fully cover the given spatio-temporal context."
)
if orbit_choice is not None:
GriffinBabe marked this conversation as resolved.
Show resolved Hide resolved
_log.info(f"Selected orbit state: {orbit_choice}. Reason: {reason}")
return orbit_choice
raise UncoveredS1Exception("Failed to select suitable Sentinel-1 orbit.")

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

86 changes: 86 additions & 0 deletions tests/test_openeo_gfmap/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
import hashlib
import json
import os
from pathlib import Path
from unittest.mock import patch

import geojson
import pandas as pd
import pystac
import pytest
from netCDF4 import Dataset

from openeo_gfmap import Backend, BackendContext, BoundingBoxExtent, TemporalContext
from openeo_gfmap.utils import split_collection_by_epsg, update_nc_attributes
from openeo_gfmap.utils.catalogue import (
_compute_max_gap_days,
s1_area_per_orbitstate_vvvh,
select_s1_orbitstate_vvvh,
)
Expand All @@ -21,6 +27,34 @@
TEMPORAL_CONTEXT = TemporalContext(start_date="2023-06-21", end_date="2023-09-21")


def mock_query_cdse_catalogue(
collection: str,
bounds: list,
temporal_extent: TemporalContext,
**additional_parameters: dict,
):
"""Mocks the results of the CDSE catalogue query by computing the hash of the input parameters
and returning the results from a resource file if it exists.
"""
# Compute the hash of the input parameters
arguments = "".join([str(x) for x in [collection, bounds, temporal_extent]])
kw_arguments = "".join(
[f"{key}{value}" for key, value in additional_parameters.items()]
)
combined_arguments = arguments + kw_arguments
hash_value = hashlib.sha256(combined_arguments.encode()).hexdigest()

src_path = (
Path(__file__).parent / f"resources/{hash_value[:8]}_query_cdse_results.json"
)

if not src_path.exists():
raise ValueError("No cached results for the given parameters.")

return json.loads(src_path.read_text())


@patch("openeo_gfmap.utils.catalogue._query_cdse_catalogue", mock_query_cdse_catalogue)
def test_query_cdse_catalogue():
backend_context = BackendContext(Backend.CDSE)

Expand All @@ -44,6 +78,9 @@ def test_query_cdse_catalogue():
assert response["ASCENDING"]["full_overlap"] is True
assert response["DESCENDING"]["full_overlap"] is True

assert response["ASCENDING"]["max_temporal_gap"] > 0.0
assert response["DESCENDING"]["max_temporal_gap"] > 0.0

# Testing the decision maker, it should return DESCENDING
decision = select_s1_orbitstate_vvvh(
backend=backend_context,
Expand All @@ -54,6 +91,30 @@ def test_query_cdse_catalogue():
assert decision == "DESCENDING"


@patch("openeo_gfmap.utils.catalogue._query_cdse_catalogue", mock_query_cdse_catalogue)
def test_query_cdse_catalogue_with_s1_gap():
GriffinBabe marked this conversation as resolved.
Show resolved Hide resolved
"""This example has a large S1 gap in ASCENDING,
so the decision should be DESCENDING
"""
backend_context = BackendContext(Backend.CDSE)

spatial_extent = geojson.loads(
(
'{"features": [{"geometry": {"coordinates": [[[35.85799, 49.705688], [35.85799, 49.797363], [36.039682, 49.797363], '
'[36.039682, 49.705688], [35.85799, 49.705688]]], "type": "Polygon"}, "id": "0", "properties": '
'{"GT_available": true, "extract": 1, "index": 12, "sample_id": "ukraine_sunflower", "tile": '
'"36UYA", "valid_time": "2019-05-01", "year": 2019}, "type": "Feature"}], "type": "FeatureCollection"}'
)
)
temporal_extent = TemporalContext("2019-01-30", "2019-08-31")

decision = select_s1_orbitstate_vvvh(
backend_context, spatial_extent, temporal_extent
)

assert decision == "DESCENDING"


@pytest.fixture
def temp_nc_file():
temp_file = Path("temp_test.nc")
Expand Down Expand Up @@ -176,3 +237,28 @@ def test_split_collection_by_epsg(tmp_path):
collection.add_item(missing_epsg_item)
collection.normalize_and_save(input_dir)
split_collection_by_epsg(path=input_dir, output_dir=output_dir)


@patch("openeo_gfmap.utils.catalogue._query_cdse_catalogue", mock_query_cdse_catalogue)
def test_compute_max_gap():
start_date = "2020-01-01"
end_date = "2020-01-31"

temporal_context = TemporalContext(start_date, end_date)

resulting_dates = [
"2020-01-03",
"2020-01-05",
"2020-01-10",
"2020-01-25",
"2020-01-26",
"2020-01-27",
]

resulting_dates = [
pd.to_datetime(date, format="%Y-%m-%d", utc=True) for date in resulting_dates
]

max_gap = _compute_max_gap_days(temporal_context, resulting_dates)

assert max_gap == 15
Loading