Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add topo-imagery version information to STAC TDE-1265 #1080

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/format-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:

- name: Build containers
run: |
docker build . --tag topo-imagery --label "github_run_id=${GITHUB_RUN_ID}"
docker build --build-arg=GIT_HASH=test --build-arg=GIT_VERSION=test --label "github_run_id=${GITHUB_RUN_ID}" --tag topo-imagery .

- name: End to end test - Aerial Imagery
run: |
Expand Down
5 changes: 5 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ RUN /root/.local/bin/poetry bundle venv --no-ansi --no-interaction --only=main -

FROM ghcr.io/osgeo/gdal:ubuntu-small-3.9.0@sha256:d1a38af532e5d9e3991c4a6bddc2f2cb52644dc30a4eb8242101e8e23c3f83f6

ARG GIT_HASH
ENV GIT_HASH=${GIT_HASH:?'GIT_HASH is mandatory'}
ARG GIT_VERSION
ENV GIT_VERSION=${GIT_VERSION:?'GIT_VERSION is mandatory'}

ENV TZ=Etc/UTC

# Copy just the bundle from the first stage
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ The scripts have been implemented to be run inside the Docker container only. Th
- Build the `Docker` image:

```bash
docker build . -t topo-imagery
docker build --build-arg=GIT_HASH=dev --build-arg=GIT_VERSION=dev --tag=topo-imagery .
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to make the local build commands more complicated? what value does adding "dev"/"dev" add here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There was some discussion about the best way to check that these exist when running in CI, open to suggestions! I think you are right we should change it as I can foresee they will end up commented out in the Dockerfile which would be a bad sign.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the first command in the README.md it is likely what people will run to first build this system, is making this command much more complicated to add "dev" "dev" seem excessive?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason for making these mandatory was to ensure that we don't forget to add them, which would only be detected at runtime.

```

- Running `standardising_validate.py` script
Expand Down
4 changes: 3 additions & 1 deletion scripts/stac/imagery/create_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def create_item(
start_datetime: str,
end_datetime: str,
collection_id: str,
gdal_version: str,
gdalinfo_result: GdalInfo | None = None,
derived_from: list[str] | None = None,
) -> ImageryItem:
Expand All @@ -28,6 +29,7 @@ def create_item(
start_datetime: start date of the survey
end_datetime: end date of the survey
collection_id: collection id to link to the Item
gdal_version: GDAL version
gdalinfo_result: result of the gdalinfo command. Defaults to None.
derived_from: list of STAC Items from where this Item is derived. Defaults to None.

Expand All @@ -41,7 +43,7 @@ def create_item(

geometry, bbox = get_extents(gdalinfo_result)

item = ImageryItem(id_, file, utc_now)
item = ImageryItem(id_, file, gdal_version, utc_now)

if derived_from is not None:
for derived in derived_from:
Expand Down
16 changes: 13 additions & 3 deletions scripts/stac/imagery/item.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from collections.abc import Callable
from datetime import datetime
from os import environ
from typing import Any

from scripts.datetimes import format_rfc_3339_datetime_string
Expand All @@ -16,7 +17,7 @@
class ImageryItem:
stac: dict[str, Any]

def __init__(self, id_: str, file: str, now: Callable[[], datetime]) -> None:
def __init__(self, id_: str, file: str, gdal_version: str, now: Callable[[], datetime]) -> None:
file_content = fs.read(file)
file_modified_datetime = format_rfc_3339_datetime_string(modified(file))
now_string = format_rfc_3339_datetime_string(now())
Expand All @@ -34,8 +35,17 @@ def __init__(self, id_: str, file: str, now: Callable[[], datetime]) -> None:
"updated": file_modified_datetime,
}
},
"stac_extensions": [StacExtensions.file.value],
"properties": {"created": now_string, "updated": now_string},
"stac_extensions": [StacExtensions.file.value, StacExtensions.processing.value],
"properties": {
"created": now_string,
"updated": now_string,
"processing:datetime": now_string,
"processing:software": {
"gdal": gdal_version,
"linz/topo-imagery": f"https://github.com/linz/topo-imagery/commit/{environ['GIT_HASH']}",
},
"processing:version": environ["GIT_VERSION"],
},
}

def update_datetime(self, start_datetime: str, end_datetime: str) -> None:
Expand Down
4 changes: 3 additions & 1 deletion scripts/stac/imagery/tests/collection_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from datetime import datetime, timezone
from shutil import rmtree
from tempfile import mkdtemp
from unittest.mock import patch

import pytest
import shapely.geometry
Expand Down Expand Up @@ -133,7 +134,8 @@ def test_add_item(metadata: CollectionMetadata, subtests: SubTests) -> None:
item_file_path = "./scripts/tests/data/empty.tiff"
modified_datetime = datetime(2001, 2, 3, hour=4, minute=5, second=6, tzinfo=timezone.utc)
os.utime(item_file_path, times=(any_epoch_datetime().timestamp(), modified_datetime.timestamp()))
item = ImageryItem("BR34_5000_0304", item_file_path, now_function)
with patch.dict(os.environ, {"GIT_HASH": "any Git hash", "GIT_VERSION": "any Git version"}):
item = ImageryItem("BR34_5000_0304", item_file_path, "any GDAL version", now_function)
geometry = {
"type": "Polygon",
"coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]],
Expand Down
31 changes: 22 additions & 9 deletions scripts/stac/imagery/tests/create_stac_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
from os import environ
from pathlib import Path
from typing import cast
from unittest.mock import patch

from scripts.gdal.gdalinfo import GdalInfo
from scripts.stac.imagery.create_stac import create_item
Expand All @@ -18,7 +20,16 @@ def test_create_item_with_derived_from(tmp_path: Path) -> None:
GdalInfo, {"wgs84Extent": {"type": "Polygon", "coordinates": [[[0, 1], [1, 1], [1, 0], [0, 0]]]}}
)

item = create_item("./scripts/tests/data/empty.tiff", "", "", "abc123", fake_gdal_info, [derived_from_path.as_posix()])
with patch.dict(environ, {"GIT_HASH": "any Git hash", "GIT_VERSION": "any Git version"}):
item = create_item(
"./scripts/tests/data/empty.tiff",
"",
"",
"abc123",
"any GDAL version",
fake_gdal_info,
[derived_from_path.as_posix()],
)

assert {
"href": derived_from_path.as_posix(),
Expand Down Expand Up @@ -47,14 +58,16 @@ def test_create_item_with_derived_from_datetimes(tmp_path: Path) -> None:
GdalInfo, {"wgs84Extent": {"type": "Polygon", "coordinates": [[[0, 1], [1, 1], [1, 0], [0, 0]]]}}
)

item = create_item(
"./scripts/tests/data/empty.tiff",
"",
"",
"abc123",
fake_gdal_info,
[derived_from_path_a.as_posix(), derived_from_path_b.as_posix()],
)
with patch.dict(environ, {"GIT_HASH": "any Git hash", "GIT_VERSION": "any Git version"}):
item = create_item(
"./scripts/tests/data/empty.tiff",
"",
"",
"abc123",
"any GDAL version",
fake_gdal_info,
[derived_from_path_a.as_posix(), derived_from_path_b.as_posix()],
)

assert item.stac["properties"]["start_datetime"] == "1998-02-12T11:00:00Z"
assert item.stac["properties"]["end_datetime"] == "2024-09-02T12:00:00Z"
37 changes: 34 additions & 3 deletions scripts/stac/imagery/tests/item_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from datetime import datetime
from datetime import datetime, timezone
from os import environ
from unittest.mock import patch

from pytest_mock import MockerFixture
from pytest_subtests import SubTests
Expand All @@ -7,6 +9,7 @@
from scripts.stac.imagery.collection import ImageryCollection
from scripts.stac.imagery.item import ImageryItem
from scripts.stac.imagery.metadata_constants import CollectionMetadata
from scripts.stac.util.stac_extensions import StacExtensions
from scripts.tests.datetimes_test import any_epoch_datetime


Expand All @@ -24,7 +27,14 @@ def test_imagery_stac_item(mocker: MockerFixture, subtests: SubTests) -> None:
start_datetime = "2021-01-27T00:00:00Z"
end_datetime = "2021-01-27T00:00:00Z"

item = ImageryItem(id_, path, any_epoch_datetime)
def fake_now() -> datetime:
return datetime(1979, 1, 1, tzinfo=timezone.utc)

git_hash = "any Git hash"
git_version = "any Git version string"
gdal_version_string = "any GDAL version string"
with patch.dict(environ, {"GIT_HASH": git_hash, "GIT_VERSION": git_version}):
item = ImageryItem(id_, path, gdal_version_string, fake_now)
item.update_spatial(geometry, bbox)
item.update_datetime(start_datetime, end_datetime)
# checks
Expand All @@ -40,6 +50,26 @@ def test_imagery_stac_item(mocker: MockerFixture, subtests: SubTests) -> None:
with subtests.test():
assert item.stac["properties"]["datetime"] is None

with subtests.test():
assert (
item.stac["properties"]["created"]
== item.stac["properties"]["updated"]
== item.stac["properties"]["processing:datetime"]
== "1979-01-01T00:00:00Z"
)

with subtests.test():
assert item.stac["properties"]["processing:version"] == git_version

with subtests.test():
assert item.stac["properties"]["processing:software"] == {
"gdal": gdal_version_string,
"linz/topo-imagery": f"https://github.com/linz/topo-imagery/commit/{git_hash}",
}

with subtests.test():
assert item.stac["stac_extensions"] == [StacExtensions.file.value, StacExtensions.processing.value]

with subtests.test():
assert item.stac["geometry"]["coordinates"] == geometry["coordinates"]

Expand Down Expand Up @@ -78,7 +108,8 @@ def test_imagery_add_collection(mocker: MockerFixture, subtests: SubTests) -> No
path = "./scripts/tests/data/empty.tiff"
id_ = get_file_name_from_path(path)
mocker.patch("scripts.files.fs.read", return_value=b"")
item = ImageryItem(id_, path, any_epoch_datetime)
with patch.dict(environ, {"GIT_HASH": "any Git hash", "GIT_VERSION": "any Git version"}):
item = ImageryItem(id_, path, "any GDAL version", any_epoch_datetime)

item.add_collection(collection.stac["id"])

Expand Down
1 change: 1 addition & 0 deletions scripts/stac/util/stac_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@

class StacExtensions(str, Enum):
file = "https://stac-extensions.github.io/file/v2.0.0/schema.json"
processing = "https://stac-extensions.github.io/processing/v1.2.0/schema.json"
6 changes: 5 additions & 1 deletion scripts/standardise_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from scripts.datetimes import format_rfc_3339_nz_midnight_datetime_string
from scripts.files.files_helper import SUFFIX_JSON, ContentType
from scripts.files.fs import exists, write
from scripts.gdal.gdal_helper import get_srs, get_vfs_path
from scripts.gdal.gdal_helper import get_gdal_version, get_srs, get_vfs_path
from scripts.json_codec import dict_to_json_bytes
from scripts.stac.imagery.create_stac import create_item
from scripts.standardising import run_standardising
Expand Down Expand Up @@ -84,6 +84,8 @@ def main() -> None:
if is_argo():
concurrency = 4

gdal_version = get_gdal_version()

tiff_files = run_standardising(
tile_files,
arguments.preset,
Expand All @@ -93,6 +95,7 @@ def main() -> None:
arguments.target_epsg,
arguments.gsd,
arguments.create_footprints,
gdal_version,
arguments.target,
)

Expand Down Expand Up @@ -137,6 +140,7 @@ def main() -> None:
start_datetime,
end_datetime,
arguments.collection_id,
gdal_version,
file.get_gdalinfo(),
file.get_derived_from_paths(),
)
Expand Down
5 changes: 3 additions & 2 deletions scripts/standardising.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from scripts.files.files_helper import SUFFIX_FOOTPRINT, ContentType, is_tiff
from scripts.files.fs import exists, read, write, write_all, write_sidecars
from scripts.gdal.gdal_bands import get_gdal_band_offset
from scripts.gdal.gdal_helper import EpsgNumber, gdal_info, get_gdal_version, run_gdal
from scripts.gdal.gdal_helper import EpsgNumber, gdal_info, run_gdal
from scripts.gdal.gdal_preset import (
get_alpha_command,
get_build_vrt_command,
Expand All @@ -34,6 +34,7 @@ def run_standardising(
target_epsg: str,
gsd: str,
create_footprints: bool,
gdal_version: str,
target_output: str = "/tmp/",
) -> list[FileTiff]:
"""Run `standardising()` in parallel (`concurrency`).
Expand All @@ -50,11 +51,11 @@ def run_standardising(

Returns:
a list of FileTiff wrapper
:param gdal_version:
paulfouquet marked this conversation as resolved.
Show resolved Hide resolved
"""
# pylint: disable-msg=too-many-arguments
start_time = time_in_ms()

gdal_version = get_gdal_version()
get_log().info("standardising_start", gdalVersion=gdal_version, fileCount=len(todo))

with Pool(concurrency) as p:
Expand Down
5 changes: 4 additions & 1 deletion scripts/tests/collection_from_items_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from collections.abc import Generator
from datetime import datetime
from os import environ
from unittest.mock import patch

import pytest
from boto3 import client, resource
Expand All @@ -20,7 +22,8 @@
@pytest.fixture(name="item", autouse=True)
def setup() -> Generator[ImageryItem, None, None]:
# Create mocked STAC Item
item = ImageryItem("123", "./scripts/tests/data/empty.tiff", utc_now)
with patch.dict(environ, {"GIT_HASH": "any Git hash", "GIT_VERSION": "any Git version"}):
item = ImageryItem("123", "./scripts/tests/data/empty.tiff", "any GDAL version", utc_now)
geometry = {
"type": "Polygon",
"coordinates": [[1799667.5, 5815977.0], [1800422.5, 5815977.0], [1800422.5, 5814986.0], [1799667.5, 5814986.0]],
Expand Down
Loading