diff --git a/.github/workflows/format-tests.yml b/.github/workflows/format-tests.yml index d036b2f7..0bff30ab 100644 --- a/.github/workflows/format-tests.yml +++ b/.github/workflows/format-tests.yml @@ -27,7 +27,7 @@ jobs: - name: Build containers run: | - docker build . --tag topo-imagery --label "github_run_id=${GITHUB_RUN_ID}" + docker build --build-arg=GIT_HASH=test --build-arg=GIT_VERSION=test --label "github_run_id=${GITHUB_RUN_ID}" --tag topo-imagery . - name: End to end test - Aerial Imagery run: | diff --git a/Dockerfile b/Dockerfile index d514ee4e..61addb44 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,6 +24,11 @@ RUN /root/.local/bin/poetry bundle venv --no-ansi --no-interaction --only=main - FROM ghcr.io/osgeo/gdal:ubuntu-small-3.9.0@sha256:d1a38af532e5d9e3991c4a6bddc2f2cb52644dc30a4eb8242101e8e23c3f83f6 +ARG GIT_HASH +ENV GIT_HASH=${GIT_HASH:?'GIT_HASH is mandatory'} +ARG GIT_VERSION +ENV GIT_VERSION=${GIT_VERSION:?'GIT_VERSION is mandatory'} + ENV TZ=Etc/UTC # Copy just the bundle from the first stage diff --git a/README.md b/README.md index ee676671..0a362831 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ The scripts have been implemented to be run inside the Docker container only. Th - Build the `Docker` image: ```bash -docker build . -t topo-imagery +docker build --build-arg=GIT_HASH=dev --build-arg=GIT_VERSION=dev --tag=topo-imagery . ``` - Running `standardising_validate.py` script diff --git a/scripts/stac/imagery/create_stac.py b/scripts/stac/imagery/create_stac.py index 1948d975..0851fc82 100644 --- a/scripts/stac/imagery/create_stac.py +++ b/scripts/stac/imagery/create_stac.py @@ -18,6 +18,7 @@ def create_item( start_datetime: str, end_datetime: str, collection_id: str, + gdal_version: str, gdalinfo_result: GdalInfo | None = None, derived_from: list[str] | None = None, ) -> ImageryItem: @@ -28,6 +29,7 @@ def create_item( start_datetime: start date of the survey end_datetime: end date of the survey collection_id: collection id to link to the Item + gdal_version: GDAL version gdalinfo_result: result of the gdalinfo command. Defaults to None. derived_from: list of STAC Items from where this Item is derived. Defaults to None. @@ -41,7 +43,7 @@ def create_item( geometry, bbox = get_extents(gdalinfo_result) - item = ImageryItem(id_, file, utc_now) + item = ImageryItem(id_, file, gdal_version, utc_now) if derived_from is not None: for derived in derived_from: diff --git a/scripts/stac/imagery/item.py b/scripts/stac/imagery/item.py index 0e3e78af..943dfdd1 100644 --- a/scripts/stac/imagery/item.py +++ b/scripts/stac/imagery/item.py @@ -1,6 +1,7 @@ import os from collections.abc import Callable from datetime import datetime +from os import environ from typing import Any from scripts.datetimes import format_rfc_3339_datetime_string @@ -16,7 +17,7 @@ class ImageryItem: stac: dict[str, Any] - def __init__(self, id_: str, file: str, now: Callable[[], datetime]) -> None: + def __init__(self, id_: str, file: str, gdal_version: str, now: Callable[[], datetime]) -> None: file_content = fs.read(file) file_modified_datetime = format_rfc_3339_datetime_string(modified(file)) now_string = format_rfc_3339_datetime_string(now()) @@ -34,8 +35,17 @@ def __init__(self, id_: str, file: str, now: Callable[[], datetime]) -> None: "updated": file_modified_datetime, } }, - "stac_extensions": [StacExtensions.file.value], - "properties": {"created": now_string, "updated": now_string}, + "stac_extensions": [StacExtensions.file.value, StacExtensions.processing.value], + "properties": { + "created": now_string, + "updated": now_string, + "processing:datetime": now_string, + "processing:software": { + "gdal": gdal_version, + "linz/topo-imagery": f"https://github.com/linz/topo-imagery/commit/{environ['GIT_HASH']}", + }, + "processing:version": environ["GIT_VERSION"], + }, } def update_datetime(self, start_datetime: str, end_datetime: str) -> None: diff --git a/scripts/stac/imagery/tests/collection_test.py b/scripts/stac/imagery/tests/collection_test.py index de5634a2..0cf0bae8 100644 --- a/scripts/stac/imagery/tests/collection_test.py +++ b/scripts/stac/imagery/tests/collection_test.py @@ -5,6 +5,7 @@ from datetime import datetime, timezone from shutil import rmtree from tempfile import mkdtemp +from unittest.mock import patch import pytest import shapely.geometry @@ -133,7 +134,8 @@ def test_add_item(metadata: CollectionMetadata, subtests: SubTests) -> None: item_file_path = "./scripts/tests/data/empty.tiff" modified_datetime = datetime(2001, 2, 3, hour=4, minute=5, second=6, tzinfo=timezone.utc) os.utime(item_file_path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp())) - item = ImageryItem("BR34_5000_0304", item_file_path, now_function) + with patch.dict(os.environ, {"GIT_HASH": "any Git hash", "GIT_VERSION": "any Git version"}): + item = ImageryItem("BR34_5000_0304", item_file_path, "any GDAL version", now_function) geometry = { "type": "Polygon", "coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]], diff --git a/scripts/stac/imagery/tests/create_stac_test.py b/scripts/stac/imagery/tests/create_stac_test.py index a99c4b60..da74eaab 100644 --- a/scripts/stac/imagery/tests/create_stac_test.py +++ b/scripts/stac/imagery/tests/create_stac_test.py @@ -1,6 +1,8 @@ import json +from os import environ from pathlib import Path from typing import cast +from unittest.mock import patch from scripts.gdal.gdalinfo import GdalInfo from scripts.stac.imagery.create_stac import create_item @@ -18,7 +20,16 @@ def test_create_item_with_derived_from(tmp_path: Path) -> None: GdalInfo, {"wgs84Extent": {"type": "Polygon", "coordinates": [[[0, 1], [1, 1], [1, 0], [0, 0]]]}} ) - item = create_item("./scripts/tests/data/empty.tiff", "", "", "abc123", fake_gdal_info, [derived_from_path.as_posix()]) + with patch.dict(environ, {"GIT_HASH": "any Git hash", "GIT_VERSION": "any Git version"}): + item = create_item( + "./scripts/tests/data/empty.tiff", + "", + "", + "abc123", + "any GDAL version", + fake_gdal_info, + [derived_from_path.as_posix()], + ) assert { "href": derived_from_path.as_posix(), @@ -47,14 +58,16 @@ def test_create_item_with_derived_from_datetimes(tmp_path: Path) -> None: GdalInfo, {"wgs84Extent": {"type": "Polygon", "coordinates": [[[0, 1], [1, 1], [1, 0], [0, 0]]]}} ) - item = create_item( - "./scripts/tests/data/empty.tiff", - "", - "", - "abc123", - fake_gdal_info, - [derived_from_path_a.as_posix(), derived_from_path_b.as_posix()], - ) + with patch.dict(environ, {"GIT_HASH": "any Git hash", "GIT_VERSION": "any Git version"}): + item = create_item( + "./scripts/tests/data/empty.tiff", + "", + "", + "abc123", + "any GDAL version", + fake_gdal_info, + [derived_from_path_a.as_posix(), derived_from_path_b.as_posix()], + ) assert item.stac["properties"]["start_datetime"] == "1998-02-12T11:00:00Z" assert item.stac["properties"]["end_datetime"] == "2024-09-02T12:00:00Z" diff --git a/scripts/stac/imagery/tests/item_test.py b/scripts/stac/imagery/tests/item_test.py index 5ec7a65f..5ff24929 100644 --- a/scripts/stac/imagery/tests/item_test.py +++ b/scripts/stac/imagery/tests/item_test.py @@ -1,4 +1,6 @@ -from datetime import datetime +from datetime import datetime, timezone +from os import environ +from unittest.mock import patch from pytest_mock import MockerFixture from pytest_subtests import SubTests @@ -7,6 +9,7 @@ from scripts.stac.imagery.collection import ImageryCollection from scripts.stac.imagery.item import ImageryItem from scripts.stac.imagery.metadata_constants import CollectionMetadata +from scripts.stac.util.stac_extensions import StacExtensions from scripts.tests.datetimes_test import any_epoch_datetime @@ -24,7 +27,14 @@ def test_imagery_stac_item(mocker: MockerFixture, subtests: SubTests) -> None: start_datetime = "2021-01-27T00:00:00Z" end_datetime = "2021-01-27T00:00:00Z" - item = ImageryItem(id_, path, any_epoch_datetime) + def fake_now() -> datetime: + return datetime(1979, 1, 1, tzinfo=timezone.utc) + + git_hash = "any Git hash" + git_version = "any Git version string" + gdal_version_string = "any GDAL version string" + with patch.dict(environ, {"GIT_HASH": git_hash, "GIT_VERSION": git_version}): + item = ImageryItem(id_, path, gdal_version_string, fake_now) item.update_spatial(geometry, bbox) item.update_datetime(start_datetime, end_datetime) # checks @@ -40,6 +50,26 @@ def test_imagery_stac_item(mocker: MockerFixture, subtests: SubTests) -> None: with subtests.test(): assert item.stac["properties"]["datetime"] is None + with subtests.test(): + assert ( + item.stac["properties"]["created"] + == item.stac["properties"]["updated"] + == item.stac["properties"]["processing:datetime"] + == "1979-01-01T00:00:00Z" + ) + + with subtests.test(): + assert item.stac["properties"]["processing:version"] == git_version + + with subtests.test(): + assert item.stac["properties"]["processing:software"] == { + "gdal": gdal_version_string, + "linz/topo-imagery": f"https://github.com/linz/topo-imagery/commit/{git_hash}", + } + + with subtests.test(): + assert item.stac["stac_extensions"] == [StacExtensions.file.value, StacExtensions.processing.value] + with subtests.test(): assert item.stac["geometry"]["coordinates"] == geometry["coordinates"] @@ -78,7 +108,8 @@ def test_imagery_add_collection(mocker: MockerFixture, subtests: SubTests) -> No path = "./scripts/tests/data/empty.tiff" id_ = get_file_name_from_path(path) mocker.patch("scripts.files.fs.read", return_value=b"") - item = ImageryItem(id_, path, any_epoch_datetime) + with patch.dict(environ, {"GIT_HASH": "any Git hash", "GIT_VERSION": "any Git version"}): + item = ImageryItem(id_, path, "any GDAL version", any_epoch_datetime) item.add_collection(collection.stac["id"]) diff --git a/scripts/stac/util/stac_extensions.py b/scripts/stac/util/stac_extensions.py index 4567f0c6..fa6ba459 100644 --- a/scripts/stac/util/stac_extensions.py +++ b/scripts/stac/util/stac_extensions.py @@ -3,3 +3,4 @@ class StacExtensions(str, Enum): file = "https://stac-extensions.github.io/file/v2.0.0/schema.json" + processing = "https://stac-extensions.github.io/processing/v1.2.0/schema.json" diff --git a/scripts/standardise_validate.py b/scripts/standardise_validate.py index 40acbc6d..d62f1891 100644 --- a/scripts/standardise_validate.py +++ b/scripts/standardise_validate.py @@ -8,7 +8,7 @@ from scripts.datetimes import format_rfc_3339_nz_midnight_datetime_string from scripts.files.files_helper import SUFFIX_JSON, ContentType from scripts.files.fs import exists, write -from scripts.gdal.gdal_helper import get_srs, get_vfs_path +from scripts.gdal.gdal_helper import get_gdal_version, get_srs, get_vfs_path from scripts.json_codec import dict_to_json_bytes from scripts.stac.imagery.create_stac import create_item from scripts.standardising import run_standardising @@ -84,6 +84,8 @@ def main() -> None: if is_argo(): concurrency = 4 + gdal_version = get_gdal_version() + tiff_files = run_standardising( tile_files, arguments.preset, @@ -93,6 +95,7 @@ def main() -> None: arguments.target_epsg, arguments.gsd, arguments.create_footprints, + gdal_version, arguments.target, ) @@ -137,6 +140,7 @@ def main() -> None: start_datetime, end_datetime, arguments.collection_id, + gdal_version, file.get_gdalinfo(), file.get_derived_from_paths(), ) diff --git a/scripts/standardising.py b/scripts/standardising.py index b009f21c..139a8270 100644 --- a/scripts/standardising.py +++ b/scripts/standardising.py @@ -12,7 +12,7 @@ from scripts.files.files_helper import SUFFIX_FOOTPRINT, ContentType, is_tiff from scripts.files.fs import exists, read, write, write_all, write_sidecars from scripts.gdal.gdal_bands import get_gdal_band_offset -from scripts.gdal.gdal_helper import EpsgNumber, gdal_info, get_gdal_version, run_gdal +from scripts.gdal.gdal_helper import EpsgNumber, gdal_info, run_gdal from scripts.gdal.gdal_preset import ( get_alpha_command, get_build_vrt_command, @@ -34,6 +34,7 @@ def run_standardising( target_epsg: str, gsd: str, create_footprints: bool, + gdal_version: str, target_output: str = "/tmp/", ) -> list[FileTiff]: """Run `standardising()` in parallel (`concurrency`). @@ -46,6 +47,7 @@ def run_standardising( source_epsg: EPSG code of the source file target_epsg: EPSG code of reprojection gsd: Ground Sample Distance in meters + gdal_version: version of GDAL used for standardising target_output: output directory path. Defaults to "/tmp/" Returns: @@ -54,7 +56,6 @@ def run_standardising( # pylint: disable-msg=too-many-arguments start_time = time_in_ms() - gdal_version = get_gdal_version() get_log().info("standardising_start", gdalVersion=gdal_version, fileCount=len(todo)) with Pool(concurrency) as p: diff --git a/scripts/tests/collection_from_items_test.py b/scripts/tests/collection_from_items_test.py index 38b33c68..6ba4bccd 100644 --- a/scripts/tests/collection_from_items_test.py +++ b/scripts/tests/collection_from_items_test.py @@ -1,5 +1,7 @@ from collections.abc import Generator from datetime import datetime +from os import environ +from unittest.mock import patch import pytest from boto3 import client, resource @@ -20,7 +22,8 @@ @pytest.fixture(name="item", autouse=True) def setup() -> Generator[ImageryItem, None, None]: # Create mocked STAC Item - item = ImageryItem("123", "./scripts/tests/data/empty.tiff", utc_now) + with patch.dict(environ, {"GIT_HASH": "any Git hash", "GIT_VERSION": "any Git version"}): + item = ImageryItem("123", "./scripts/tests/data/empty.tiff", "any GDAL version", utc_now) geometry = { "type": "Polygon", "coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]],